From 615fe68889bbdd33ed63eac0a291fbd4dbb44d2f Mon Sep 17 00:00:00 2001 From: wuyuhan Date: Tue, 9 Apr 2024 16:07:46 +0800 Subject: [PATCH 01/21] =?UTF-8?q?att-advisor=E4=B8=8Ema-advisor=E5=90=88?= =?UTF-8?q?=E4=B8=80=E6=A1=86=E6=9E=B6=E6=9E=84=E5=BB=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 6 + profiler/advisor/README.md | 41 -- profiler/advisor/__init__.py | 15 - profiler/advisor/advisor_backend/__init__.py | 14 - .../advisor/advisor_backend/advice_base.py | 50 -- .../advice_factory/__init__.py | 14 - .../advice_factory/advice_factory.py | 50 -- .../advice_factory/cluster_advice_factory.py | 38 -- .../advice_factory/compute_advice_factory.py | 34 - .../advice_factory/overall_advice_factory.py | 32 - .../advice_factory/timeline_advice_factory.py | 34 - .../cluster_advice/__init__.py | 14 - .../cluster_advice/cluster_advice_base.py | 67 -- .../cluster_advice/cluster_pipeline_advice.py | 437 ------------ .../cluster_advice/kernel_cluster_advice.py | 62 -- .../cluster_advice/slow_link_advice.py | 110 --- .../cluster_advice/slow_rank_advice.py | 71 -- .../common_func_advisor/__init__.py | 14 - .../common_func_advisor/constant.py | 225 ------- .../common_func_advisor/trace_view_json.py | 209 ------ .../trace_view_preprocessor.py | 208 ------ .../compute_advice/__init__.py | 14 - .../compute_advice/compute_advice_base.py | 105 --- .../compute_advice/npu_fused/__init__.py | 14 - .../compute_advice/npu_fused/csv_analyzer.py | 81 --- .../compute_advice/npu_fused/json_analyzer.py | 55 -- .../compute_advice/npu_fused/op_perf.py | 196 ------ .../compute_advice/npu_fused_advice.py | 71 -- .../compute_advice/npu_slow_advice.py | 82 --- profiler/advisor/advisor_backend/interface.py | 62 -- .../overall_advice/overall_summary_advice.py | 174 ----- .../prof_bean_advisor/__init__.py | 14 - .../cluster_step_trace_time_bean.py | 67 -- .../timeline_advice/__init__.py | 14 - .../timeline_advice/op_schedule_advice.py | 89 --- .../timeline_advice/optimizer_advice.py | 55 -- .../timeline_advice/timeline_advice_base.py | 99 --- .../overall_advice => analyzer}/__init__.py | 0 profiler/advisor/analyzer/base_analyzer.py | 16 + .../analyzer/communication/__init__.py | 0 .../communication/bandwidth/__init__.py | 0 .../communication/environment/__init__.py | 0 .../advisor/analyzer/computing/__init__.py | 0 .../analyzer/computing/aicpu/__init__.py | 0 .../analyzer/computing/bound/__init__.py | 0 .../analyzer/computing/op_compile/__init__.py | 0 .../advisor/analyzer/dataloader/__init__.py | 0 profiler/advisor/analyzer/overall/__init__.py | 0 .../advisor/analyzer/scheduling/__init__.py | 0 .../scheduling/free_event/__init__.py | 0 .../scheduling/fusion_ops/__init__.py | 0 profiler/advisor/cluster_perf_analysis.ipynb | 625 ------------------ profiler/advisor/common/__init__.py | 0 profiler/advisor/common/constant.py | 106 +++ profiler/advisor/common/module_lib.py | 87 +++ profiler/advisor/common/timeline/__init__.py | 0 profiler/advisor/common/timeline/event.py | 23 + .../advisor/common/timeline/fusion_ops_db.py | 555 ++++++++++++++++ profiler/advisor/common/version_control.py | 26 + profiler/advisor/compute_perf_analysis.ipynb | 366 ---------- profiler/advisor/config/__init__.py | 0 profiler/advisor/config/config.ini | 16 + profiler/advisor/config/config.py | 103 +++ profiler/advisor/dataset/__init__.py | 6 + profiler/advisor/display/__init__.py | 0 profiler/advisor/display/html/__init__.py | 0 profiler/advisor/display/html/render.py | 44 ++ .../display/html/templates/affinity_api.html | 50 ++ .../advisor/display/html/templates/main.html | 202 ++++++ .../html/templates/overall_analysis.html | 15 + profiler/advisor/img/advisor_result.PNG | Bin 53557 -> 0 bytes profiler/advisor/img/jupyter_report.PNG | Bin 34097 -> 0 bytes profiler/advisor/interface/__init__.py | 0 profiler/advisor/interface/interface.py | 67 ++ profiler/advisor/overall_perf_analysis.ipynb | 323 --------- profiler/advisor/result/__init__.py | 0 profiler/advisor/result/item.py | 61 ++ profiler/advisor/result/result.py | 201 ++++++ profiler/advisor/rules/__init__.py | 0 .../advisor/rules/timeline_fusion_ops.yaml | 59 ++ profiler/advisor/timeline_perf_analysis.ipynb | 163 ----- profiler/advisor/utils/__init__.py | 0 profiler/advisor/utils/log.py | 63 ++ profiler/advisor/utils/tools.py | 76 +++ profiler/advisor/utils/utils.py | 499 ++++++++++++++ profiler/advisor/version.py | 38 ++ 86 files changed, 2319 insertions(+), 4408 deletions(-) delete mode 100644 profiler/advisor/README.md delete mode 100644 profiler/advisor/advisor_backend/__init__.py delete mode 100644 profiler/advisor/advisor_backend/advice_base.py delete mode 100644 profiler/advisor/advisor_backend/advice_factory/__init__.py delete mode 100644 profiler/advisor/advisor_backend/advice_factory/advice_factory.py delete mode 100644 profiler/advisor/advisor_backend/advice_factory/cluster_advice_factory.py delete mode 100644 profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py delete mode 100644 profiler/advisor/advisor_backend/advice_factory/overall_advice_factory.py delete mode 100644 profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py delete mode 100644 profiler/advisor/advisor_backend/cluster_advice/__init__.py delete mode 100644 profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py delete mode 100644 profiler/advisor/advisor_backend/cluster_advice/cluster_pipeline_advice.py delete mode 100644 profiler/advisor/advisor_backend/cluster_advice/kernel_cluster_advice.py delete mode 100644 profiler/advisor/advisor_backend/cluster_advice/slow_link_advice.py delete mode 100644 profiler/advisor/advisor_backend/cluster_advice/slow_rank_advice.py delete mode 100644 profiler/advisor/advisor_backend/common_func_advisor/__init__.py delete mode 100644 profiler/advisor/advisor_backend/common_func_advisor/constant.py delete mode 100644 profiler/advisor/advisor_backend/common_func_advisor/trace_view_json.py delete mode 100644 profiler/advisor/advisor_backend/common_func_advisor/trace_view_preprocessor.py delete mode 100644 profiler/advisor/advisor_backend/compute_advice/__init__.py delete mode 100644 profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py delete mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py delete mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused/csv_analyzer.py delete mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused/json_analyzer.py delete mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py delete mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py delete mode 100644 profiler/advisor/advisor_backend/compute_advice/npu_slow_advice.py delete mode 100644 profiler/advisor/advisor_backend/interface.py delete mode 100644 profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py delete mode 100644 profiler/advisor/advisor_backend/prof_bean_advisor/__init__.py delete mode 100644 profiler/advisor/advisor_backend/prof_bean_advisor/cluster_step_trace_time_bean.py delete mode 100644 profiler/advisor/advisor_backend/timeline_advice/__init__.py delete mode 100644 profiler/advisor/advisor_backend/timeline_advice/op_schedule_advice.py delete mode 100644 profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py delete mode 100644 profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py rename profiler/advisor/{advisor_backend/overall_advice => analyzer}/__init__.py (100%) create mode 100644 profiler/advisor/analyzer/base_analyzer.py create mode 100644 profiler/advisor/analyzer/communication/__init__.py create mode 100644 profiler/advisor/analyzer/communication/bandwidth/__init__.py create mode 100644 profiler/advisor/analyzer/communication/environment/__init__.py create mode 100644 profiler/advisor/analyzer/computing/__init__.py create mode 100644 profiler/advisor/analyzer/computing/aicpu/__init__.py create mode 100644 profiler/advisor/analyzer/computing/bound/__init__.py create mode 100644 profiler/advisor/analyzer/computing/op_compile/__init__.py create mode 100644 profiler/advisor/analyzer/dataloader/__init__.py create mode 100644 profiler/advisor/analyzer/overall/__init__.py create mode 100644 profiler/advisor/analyzer/scheduling/__init__.py create mode 100644 profiler/advisor/analyzer/scheduling/free_event/__init__.py create mode 100644 profiler/advisor/analyzer/scheduling/fusion_ops/__init__.py delete mode 100644 profiler/advisor/cluster_perf_analysis.ipynb create mode 100644 profiler/advisor/common/__init__.py create mode 100644 profiler/advisor/common/constant.py create mode 100644 profiler/advisor/common/module_lib.py create mode 100644 profiler/advisor/common/timeline/__init__.py create mode 100644 profiler/advisor/common/timeline/event.py create mode 100644 profiler/advisor/common/timeline/fusion_ops_db.py create mode 100644 profiler/advisor/common/version_control.py delete mode 100644 profiler/advisor/compute_perf_analysis.ipynb create mode 100644 profiler/advisor/config/__init__.py create mode 100644 profiler/advisor/config/config.ini create mode 100644 profiler/advisor/config/config.py create mode 100644 profiler/advisor/dataset/__init__.py create mode 100644 profiler/advisor/display/__init__.py create mode 100644 profiler/advisor/display/html/__init__.py create mode 100644 profiler/advisor/display/html/render.py create mode 100644 profiler/advisor/display/html/templates/affinity_api.html create mode 100644 profiler/advisor/display/html/templates/main.html create mode 100644 profiler/advisor/display/html/templates/overall_analysis.html delete mode 100644 profiler/advisor/img/advisor_result.PNG delete mode 100644 profiler/advisor/img/jupyter_report.PNG create mode 100644 profiler/advisor/interface/__init__.py create mode 100644 profiler/advisor/interface/interface.py delete mode 100644 profiler/advisor/overall_perf_analysis.ipynb create mode 100644 profiler/advisor/result/__init__.py create mode 100644 profiler/advisor/result/item.py create mode 100644 profiler/advisor/result/result.py create mode 100644 profiler/advisor/rules/__init__.py create mode 100644 profiler/advisor/rules/timeline_fusion_ops.yaml delete mode 100644 profiler/advisor/timeline_perf_analysis.ipynb create mode 100644 profiler/advisor/utils/__init__.py create mode 100644 profiler/advisor/utils/log.py create mode 100644 profiler/advisor/utils/tools.py create mode 100644 profiler/advisor/utils/utils.py create mode 100644 profiler/advisor/version.py diff --git a/.gitignore b/.gitignore index a81c8ee121..36aacc7241 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,9 @@ dmypy.json # Cython debug symbols cython_debug/ + +# advisor analysis output +att_advisor*.html +*.xlsx +operator_tuning_file*.cfg +.ipynb_checkpoints/ \ No newline at end of file diff --git a/profiler/advisor/README.md b/profiler/advisor/README.md deleted file mode 100644 index 722243cdc2..0000000000 --- a/profiler/advisor/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# 性能分析工具 - -性能分析工具是将Ascend PyTorch Profiler采集的性能数据进行分析,并输出性能调优建议的工具 。使用方式如下: - -下列以Windows环境下执行为例介绍。 - -1. 在环境下安装jupyter notebook工具。 - - ```bash - pip install jupyter notebook - ``` - - jupyter notebook工具的具体安装和使用指导请至jupyter notebook工具官网查找。 - -2. 在环境下安装ATT工具。 - - ``` - git clone https://gitee.com/ascend/att.git - ``` - - 安装环境下保存Ascend PyTorch Profiler采集的性能数据。 - -3. 进入att\profiler\advisor目录执行如下命令启动jupyter notebook工具。 - - ```bash - jupyter notebook - ``` - - 执行成功则自动启动浏览器读取att\profiler\advisor目录,如下示例: - - ![jupyter_report](img/jupyter_report.PNG) - - 若在Linux环境下则回显打印URL地址,即是打开jupyter notebook工具页面的地址,需要复制URL,并使用浏览器访问(若为远端服务器则需要将域名“**localhost**”替换为远端服务器的IP),进入jupyter notebook工具页面。 - -4. 每个.ipynb文件为一项性能数据分析任务,选择需要的.ipynb打开,并在*_path参数下拷贝保存Ascend PyTorch Profiler采集的性能数据的路径。如下示例: - - ![advisor_result](img/advisor_result.PNG) - -5. 单击运行按钮执行性能数据分析。 - - 分析结果详细内容会在.ipynb页面下展示。 diff --git a/profiler/advisor/__init__.py b/profiler/advisor/__init__.py index 0428ee03f0..e69de29bb2 100644 --- a/profiler/advisor/__init__.py +++ b/profiler/advisor/__init__.py @@ -1,15 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - diff --git a/profiler/advisor/advisor_backend/__init__.py b/profiler/advisor/advisor_backend/__init__.py deleted file mode 100644 index a0e9f748f4..0000000000 --- a/profiler/advisor/advisor_backend/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/advice_base.py b/profiler/advisor/advisor_backend/advice_base.py deleted file mode 100644 index 35939bcea9..0000000000 --- a/profiler/advisor/advisor_backend/advice_base.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from abc import abstractmethod - - -class AdviceBase: - DATA = "data" - BOTTLENECK = "bottleneck" - ADVICE = "advice" - - def __init__(self, collection_path: str): - self.collection_path = os.path.realpath(collection_path) - self.bottelneck = '' - self.output_format_data = { - self.DATA: [], - self.BOTTLENECK: '', - self.ADVICE: '' - } - - @abstractmethod - def path_check(self): - """ - check whether input path is valid - """ - - @abstractmethod - def run(self): - """ - analyze profiling data and advice - """ - - @abstractmethod - def output(self): - """ - output relevant data - """ \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/advice_factory/__init__.py b/profiler/advisor/advisor_backend/advice_factory/__init__.py deleted file mode 100644 index a0e9f748f4..0000000000 --- a/profiler/advisor/advisor_backend/advice_factory/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/advice_factory/advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/advice_factory.py deleted file mode 100644 index 639f4800cf..0000000000 --- a/profiler/advisor/advisor_backend/advice_factory/advice_factory.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -from common_func.path_manager import PathManager - - -class AdviceFactory: - def __init__(self, collection_path: str): - self.collection_path = os.path.realpath(collection_path) - - @staticmethod - def run_advice(self, advice: str, kwargs: dict): - """ - run advice to produce data - """ - - def produce_advice(self, advice: str, kwargs: dict): - """ - produce data for input mode and advice - """ - self.path_check() - self.advice_check(advice) - return self.run_advice(advice, kwargs) - - def path_check(self): - """ - check whether input path is valid - """ - PathManager.input_path_common_check(self.collection_path) - - def advice_check(self, advice: str): - """ - check whether input advice is valid - """ - if advice not in self.ADVICE_LIB.keys(): - msg = '[ERROR]Input advice is illegal.' - raise RuntimeError(msg) diff --git a/profiler/advisor/advisor_backend/advice_factory/cluster_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/cluster_advice_factory.py deleted file mode 100644 index 6bb93f4670..0000000000 --- a/profiler/advisor/advisor_backend/advice_factory/cluster_advice_factory.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from advice_factory.advice_factory import AdviceFactory -from cluster_advice.slow_link_advice import SlowLinkAdvice -from cluster_advice.slow_rank_advice import SlowRankAdvice -from cluster_advice.cluster_pipeline_advice import ClusterPipelineAdvice -from cluster_advice.kernel_cluster_advice import KernelClusterAdvice -from common_func_advisor.constant import Constant - - -class ClusterAdviceFactory(AdviceFactory): - ADVICE_LIB = { - Constant.SLOW_RANK: SlowRankAdvice, - Constant.SLOW_LINK: SlowLinkAdvice, - Constant.PIPELINE: ClusterPipelineAdvice, - Constant.KERNEL: KernelClusterAdvice - } - - def __init__(self, collection_path: str): - super().__init__(collection_path) - - def run_advice(self, advice: str, kwargs: dict): - """ - run advice to produce data - """ - return self.ADVICE_LIB.get(advice)(self.collection_path, kwargs).run() diff --git a/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py deleted file mode 100644 index 336bef7dd8..0000000000 --- a/profiler/advisor/advisor_backend/advice_factory/compute_advice_factory.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from common_func_advisor.constant import Constant -from advice_factory.advice_factory import AdviceFactory -from compute_advice.npu_fused_advice import NpuFusedAdvice -from compute_advice.npu_slow_advice import NpuSlowAdvice - - -class ComputeAdviceFactory(AdviceFactory): - ADVICE_LIB = { - Constant.NPU_FUSED: NpuFusedAdvice, - Constant.NPU_SLOW: NpuSlowAdvice, - } - - def __init__(self, collection_path: str): - super().__init__(collection_path) - - def run_advice(self, advice: str, kwargs: dict): - """ - run advice to produce data - """ - return self.ADVICE_LIB.get(advice)(self.collection_path).run() diff --git a/profiler/advisor/advisor_backend/advice_factory/overall_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/overall_advice_factory.py deleted file mode 100644 index baf80cc200..0000000000 --- a/profiler/advisor/advisor_backend/advice_factory/overall_advice_factory.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from advice_factory.advice_factory import AdviceFactory -from common_func_advisor.constant import Constant -from overall_advice.overall_summary_advice import OverallSummaryAdvice - - -class OverallAdviceFactory(AdviceFactory): - ADVICE_LIB = { - Constant.SUMMARY: OverallSummaryAdvice - } - - def __init__(self, collection_path: str): - super().__init__(collection_path) - - def run_advice(self, advice: str, kwargs: dict): - """ - run advice to produce data - """ - return self.ADVICE_LIB.get(advice)(self.collection_path, kwargs).run() diff --git a/profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py deleted file mode 100644 index 44b352e95a..0000000000 --- a/profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from advice_factory.advice_factory import AdviceFactory -from common_func_advisor.constant import Constant -from timeline_advice.optimizer_advice import OptimizerAdvice -from timeline_advice.op_schedule_advice import OpScheduleAdvice - - -class TimelineAdviceFactory(AdviceFactory): - ADVICE_LIB = { - Constant.OPTIM: OptimizerAdvice, - Constant.OP_SCHE: OpScheduleAdvice, - } - - def __init__(self, collection_path: str): - super().__init__(collection_path) - - def run_advice(self, advice: str, kwargs: dict): - """ - run advice to produce data - """ - return self.ADVICE_LIB.get(advice)(self.collection_path).run() diff --git a/profiler/advisor/advisor_backend/cluster_advice/__init__.py b/profiler/advisor/advisor_backend/cluster_advice/__init__.py deleted file mode 100644 index 8400fd5ecd..0000000000 --- a/profiler/advisor/advisor_backend/cluster_advice/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py b/profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py deleted file mode 100644 index e9be467596..0000000000 --- a/profiler/advisor/advisor_backend/cluster_advice/cluster_advice_base.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from abc import abstractmethod -from common_func.constant import Constant -from advice_base import AdviceBase -from cluster_analysis import Interface - - -class ClusterAdviceBase(AdviceBase): - def __init__(self, collection_path: str): - super().__init__(collection_path) - - @staticmethod - def compute_max_gap_ratio(data: list, mean: float): - if mean == 0: - return 0 - else: - return (max(data) - min(data)) / mean - - def path_check(self): - """ - check whether input path is valid - """ - for file in os.listdir(self.collection_path): - if file == 'cluster_analysis_output': - print("[INFO]Cluster has been analyzed " - "because of the existence of cluster analysis output directory.") - print("[INFO]Skip Cluster analyze backend.") - return - print("[INFO] cluster analysis is in the process, please wait...") - self.cluster_analyze() - - def cluster_analyze(self): - parameter = { - Constant.COLLECTION_PATH: self.collection_path, - Constant.ANALYSIS_MODE: "all" - } - try: - Interface(parameter).run() - except Exception as e: - raise ValueError(f"Cluster analyze backend failed:{e}") from e - - @abstractmethod - def run(self): - """ - analyze profiling data and advice - """ - - @abstractmethod - def output(self): - """ - output relevant data - """ \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/cluster_advice/cluster_pipeline_advice.py b/profiler/advisor/advisor_backend/cluster_advice/cluster_pipeline_advice.py deleted file mode 100644 index 7f8846f1d9..0000000000 --- a/profiler/advisor/advisor_backend/cluster_advice/cluster_pipeline_advice.py +++ /dev/null @@ -1,437 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import time -import multiprocessing -from typing import Dict -from typing import Optional -from typing import Deque -from typing import List -from typing import Tuple -from collections import defaultdict -from collections import deque -from decimal import Decimal -from dataclasses import dataclass - -from common_func.file_manager import FileManager -from common_func_advisor.constant import Constant -from common_func_advisor.trace_view_preprocessor import FineTraceViewData -from common_func_advisor.trace_view_preprocessor import TraceViewPreProcessor -from cluster_advice.cluster_advice_base import ClusterAdviceBase -from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor - - -@dataclass -class PipelineTimeSlice: - start: str = "" - end: str = "" - slice_type: str = "" - bp_timeslice: list = None - - def __post_init__(self): - self.bp_timeslice = self.bp_timeslice or [] - - -class PipelineTraceViewer: - STAGE_COLOR = "good" - BUBBLE_COLOR = "generic_work" - FP_COLOR = "good" - BP_COLOR = "bad" - PIPLINE_VIEW = "Pipeline View" - STAGE = "Stage" - BUBBLE = "Bubble" - FP = "FP" - BP = "BP" - - COLORS = { - STAGE: STAGE_COLOR, - BUBBLE: BUBBLE_COLOR, - FP: FP_COLOR, - BP: BP_COLOR - } - - def _gen_trace_pair(self, name: str, start_ts: str, end_ts: str, pid: str, tid: str) -> Dict: - data = { - Constant.OP_NAME: name, - Constant.CNAME: self.COLORS.get(name, self.BUBBLE), - Constant.PH: Constant.PH_X, - Constant.PID: pid, - Constant.OP_TID: tid, - Constant.TS: start_ts, - Constant.DUR: str(Decimal(end_ts) - Decimal(start_ts)) - } - - return data - - def gen_stage_bubble_trace_data(self, rank_id: int, timeslice_list: List[PipelineTimeSlice]) -> List[Dict]: - """ - generate stage bubble trace json data - """ - rank_str = f'Rank {rank_id}' - trace_data = [] - - for timeslice in timeslice_list: - data = self._gen_trace_pair(timeslice.slice_type, timeslice.start, - timeslice.end, self.PIPLINE_VIEW, rank_str) - trace_data.append(data) - - return trace_data - - def gen_fp_bp_trace_data(self, rank_id: int, timeslice_list: List[PipelineTimeSlice]) -> List[Dict]: - """ - generate fp bp trace json data - """ - rank_str = f'Rank {rank_id}' - trace_data = [] - - for timeslice in timeslice_list: - if timeslice.slice_type == self.BUBBLE: - data = self._gen_trace_pair(timeslice.slice_type, timeslice.start, - timeslice.end, self.PIPLINE_VIEW, rank_str) - trace_data.append(data) - else: - last_end = timeslice.start - for bp_bound in timeslice.bp_timeslice: - data = self._gen_trace_pair(self.FP, last_end, - bp_bound[0], self.PIPLINE_VIEW, rank_str) - trace_data.append(data) - last_end = bp_bound[1] - - data = self._gen_trace_pair(self.BP, bp_bound[0], - bp_bound[1], self.PIPLINE_VIEW, rank_str) - trace_data.append(data) - - last_data = self._gen_trace_pair(self.FP, last_end, - timeslice.end, self.PIPLINE_VIEW, rank_str) - trace_data.append(last_data) - - return trace_data - - -class ClusterPipelineAdvice(ClusterAdviceBase): - BUBBLE = "Bubble" - STAGE = "Stage" - PIPELINE_VIEW = "Pipeline View" - SAVE_JSON = "pipeline_view.json" - - def __init__(self, collection_path: str, kwargs: dict): - super().__init__(collection_path) - self.rank_ids = list(set(kwargs.get("rank_ids", []))) - self.worker_num = kwargs.get("worker_num", int(multiprocessing.cpu_count() / 2)) - self.rank_prof_dirs = {} - self.cur_data = [] - self.cur_bottleneck = {} - self.cur_advices = "" - - def run(self) -> dict: - """ - Unified entrance interface - """ - self.rank_prof_dirs = self.get_rank_prof_dirs(self.rank_ids) - if not self.rank_prof_dirs: - print("[ERROR] No rank profiling data found, please check the rank ids or dir path.") - return {} - - self.process() - self.output() - self.identify_bottleneck() - return self.output_format_data - - def process(self) -> None: - """ - process all rank profiling data by using multi-process - """ - start_time = time.time() - print(f"[INFO] Start to process {len(self.rank_prof_dirs)} rank profiling data with {self.worker_num} workers.") - with multiprocessing.Pool(self.worker_num) as pool: - results = pool.map(self.work, self.rank_prof_dirs.items()) - - for (rank_id, _), (res, show_fp_bp) in zip(self.rank_prof_dirs.items(), results): - if show_fp_bp: - self.cur_data += PipelineTraceViewer().gen_fp_bp_trace_data(rank_id, res) - else: - self.cur_data += PipelineTraceViewer().gen_stage_bubble_trace_data(rank_id, res) - print(f"[INFO] Pipline view data process finished, cost {time.time() - start_time:.2f}s.") - - @staticmethod - def _align_trace_bound(results: List) -> None: - """ - align all rank trace bound for better visualization - """ - start_list, end_list = [], [] - for res in results: - start_list.append(res[0].start) - end_list.append(res[-1].end) - - # update all rank trace bound - for res in results: - res[0].start = min(start_list) - res[-1].end = max(end_list) - - def work(self, kv: Tuple[int, str]) -> Tuple[List[PipelineTimeSlice], bool]: - """ - single process worker function - """ - show_fp_bp = False - rank_id, rank_prof_dir = kv - print(f"[INFO] [Rank {rank_id}] Start to process rank profiling data.") - json_path = os.path.join(rank_prof_dir, Constant.ASCEND_PROFILER_OUTPUT, Constant.TRACE_VIEW_JSON) - fine_data = self.load_trace_view_data(json_path) - if not fine_data.hcom_ops or not fine_data.hcom_tids: - print(f"[ERROR] [Rank {rank_id}] No hcom send recv ops found, make sure the trace view data is pipeline " - f"parallel sense.") - return [], show_fp_bp - - timeslice_list = self.get_pipeline_timeslice(fine_data.hcom_ops, fine_data.hcom_tids, fine_data.min_ts, - fine_data.max_ts) - if not fine_data.fp_ops or not fine_data.bp_ops: - print(f"[INFO] [Rank {rank_id}] No frameWork data in trace view, only show stage and bubble.") - elif len(fine_data.hcom_tids) > 1: - print(f"[WARN] [Rank {rank_id}] More than one hcom tid found, only show stage and bubble.") - else: - print(f"[INFO] [Rank {rank_id}] Found frameWork data in trace view, show fp bp and bubble.") - bp_ops = self.get_fp_bp_bound_ops(fine_data) - self.update_stage_fp_bp(timeslice_list, bp_ops) - show_fp_bp = True - print(f"[INFO] [Rank {rank_id}] Rank profiling data process finished.") - - return timeslice_list, show_fp_bp - - def identify_bottleneck(self) -> None: - pass - - def output(self) -> None: - """ - output result - """ - self.cur_data.append( - { - Constant.OP_NAME: Constant.PROCESS_NAME, - Constant.PH: Constant.PH_META, - Constant.PID: self.PIPELINE_VIEW, - Constant.OP_TID: self.PIPELINE_VIEW, - Constant.ARGS: { - Constant.OP_NAME: self.PIPELINE_VIEW - } - } - ) - self.output_format_data[self.DATA] = self.cur_data - self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck - self.output_format_data[self.ADVICE] = self.cur_advices - - def get_rank_prof_dirs(self, rank_ids: list) -> Dict[int, str]: - """ - get rank profiling directories by rank ids - """ - rank_prof_dirs = defaultdict(str) - prof_dirs = [] - for prof_dir in os.listdir(self.collection_path): - if prof_dir.endswith(Constant.PT_PROF_SUFFIX): - prof_dirs.append(os.path.join(self.collection_path, prof_dir)) - - data_map = PytorchDataPreprocessor(prof_dirs).get_data_map() - for rank_id in rank_ids: - if rank_id in data_map: - rank_prof_dirs[rank_id] = data_map[rank_id] - else: - print(f'[Warning] Rank {rank_id} not found in {self.collection_path}') - - return rank_prof_dirs - - @staticmethod - def load_trace_view_data(json_path) -> Optional[FineTraceViewData]: - """ - load trace view data from json file and preprocess - """ - raw_data = FileManager.read_json_file(json_path) - return TraceViewPreProcessor().process(raw_data) - - @staticmethod - def double_queue_pop(fp_que: Deque[dict], bp_que: Deque[dict]) -> Tuple[list, list]: - """ - double queue (fp and bp que) pop alternating algorithm implementation - """ - res_fp_ops, res_bp_ops = [], [] - pop_fp = fp_que[0][Constant.TS] < bp_que[0][Constant.TS] - fp_start_op, fp_end_op = fp_que[0], fp_que[0] - bp_start_op, bp_end_op = bp_que[0], bp_que[0] - - def update_bound_op(que: Deque[dict], start_op: dict, end_op: dict) -> Tuple[dict, dict]: - """ - update fp and bp bound op - """ - op = que.popleft() - op_s = Decimal(op[Constant.TS]) - op_e = op_s + Decimal(op[Constant.DUR]) - - start_op = op if op_s < Decimal(start_op[Constant.TS]) else start_op - end_op = op if op_e > Decimal(end_op[Constant.TS]) + Decimal(end_op[Constant.DUR]) else end_op - - return start_op, end_op - - while fp_que and bp_que: - if pop_fp: - if len(fp_que) > 1 and bp_que and fp_que[1][Constant.TS] > bp_que[0][Constant.TS]: - pop_fp = False # pop bp que - if len(fp_que) == 1: - pop_fp = False # pop bp que - - fp_start_op, fp_end_op = update_bound_op(fp_que, fp_start_op, fp_end_op) - - # time to pop bp que, need to record fp ops and update bp start op - if not pop_fp: - res_fp_ops.append((fp_start_op, fp_end_op)) - if fp_que: - bp_start_op, bp_end_op = bp_que[0], bp_que[0] - else: - if len(bp_que) > 1 and fp_que and bp_que[1][Constant.TS] > fp_que[0][Constant.TS]: - pop_fp = True # pop fp que - if len(bp_que) == 1: - pop_fp = True # pop fp que - - bp_start_op, bp_end_op = update_bound_op(bp_que, bp_start_op, bp_end_op) - - # time to pop fp que, need to record bp ops and update fp start op - if pop_fp: - res_bp_ops.append((bp_start_op, bp_end_op)) - if bp_que: - fp_start_op, fp_end_op = fp_que[0], fp_que[0] - - if fp_que: - fp_start_op, fp_end_op = fp_que[0], fp_que[0] - while fp_que: - fp_start_op, fp_end_op = update_bound_op(fp_que, fp_start_op, fp_end_op) - res_fp_ops.append((fp_start_op, fp_end_op)) - - if bp_que: - bp_start_op, bp_end_op = bp_que[0], bp_que[0] - while bp_que: - bp_start_op, bp_end_op = update_bound_op(bp_que, bp_start_op, bp_end_op) - res_bp_ops.append((bp_start_op, bp_end_op)) - - return res_fp_ops, res_bp_ops - - @staticmethod - def update_ops_time(ops_list: List[List[dict]], torch_to_npu_links: List[dict], - npu_ops_ts_dur: dict) -> List[List[dict]]: - """ - update fp and bp bound ops time at device by using torch_to_npu_links - """ - ops_que = deque(ops_list) - torch_to_npu_que = deque(torch_to_npu_links) - res = [] - link_stack = [] - while ops_que and torch_to_npu_que: - link = torch_to_npu_que.popleft() - link_s = Decimal(link[Constant.TS]) - - # bound op at framework level - cpu_op_l, cpu_op_r = ops_que[0][0], ops_que[0][1] - cpu_op_s = Decimal(cpu_op_l[Constant.TS]) - cpu_op_e = Decimal(cpu_op_r[Constant.TS]) + Decimal(cpu_op_r[Constant.DUR]) - - if cpu_op_s < link_s < cpu_op_e: - link_stack.append(link) - if link_s > cpu_op_e or \ - (link_stack and not torch_to_npu_que): - min_link = link_stack[0] - max_link = link_stack[-1] - - min_link_s = str(min_link[Constant.ID]) - max_link_s = str(max_link[Constant.ID]) - # for compatibility with old data (ts is float type) - if isinstance(min_link[Constant.ID], float): - cpu_op_l["npu_op_ts"] = min_link_s - cpu_op_r["npu_op_ts"] = max_link_s - else: - cpu_op_l["npu_op_ts"] = f"{min_link_s[:-3]}.{min_link_s[-3:]}" - cpu_op_r["npu_op_ts"] = f"{max_link_s[:-3]}.{max_link_s[-3:]}" - cpu_op_l["npu_op_dur"] = npu_ops_ts_dur.get(cpu_op_l["npu_op_ts"], 0) - cpu_op_r["npu_op_dur"] = npu_ops_ts_dur.get(cpu_op_r["npu_op_ts"], 0) - - res.append([cpu_op_l, cpu_op_r]) - ops_que.popleft() - link_stack.clear() - - return res - - def get_fp_bp_bound_ops(self, fine_data: FineTraceViewData) -> List[List[dict]]: - """ - get fp and bp bound ops by using double queue alternating pop algorithm and - update fp and bp bound ops time at device by using torch_to_npu_links - """ - fp_que = deque(fine_data.fp_ops) - bp_que = deque(fine_data.bp_ops) - - # get fp and bp bound ops - _, res_bp_ops = self.double_queue_pop(fp_que, bp_que) - - # according to torch_to_npu_links, split fp and bp timeslice - bp_ops = self.update_ops_time(res_bp_ops, fine_data.torch_to_npu_links, fine_data.npu_ops_ts_dur) - return bp_ops - - def get_pipeline_timeslice(self, hcom_ops: list, hcom_tids: list, - min_ts: str, max_ts: str) -> List[PipelineTimeSlice]: - """ - get pipeline timeslice by using hcom ops - """ - timeslice_list = [] - last_op_end = None - if len(hcom_tids) > 1: - print("[WARN] More than one hcom tid found, default to show minimal tid pipeline view.") - - for op in hcom_ops: - if op[Constant.OP_TID] == min(hcom_tids): - # gap between two hcom ops - if last_op_end: - timeslice_list.append(PipelineTimeSlice(str(last_op_end), op[Constant.TS], self.STAGE)) - # hcom op - last_op_end = Decimal(op[Constant.TS]) + Decimal(op[Constant.DUR]) - timeslice_list.append(PipelineTimeSlice(op[Constant.TS], str(last_op_end), self.BUBBLE)) - - # add start STAGE and end STAGE - timeslice_list.insert(0, PipelineTimeSlice(min_ts, timeslice_list[0].start, self.STAGE)) - timeslice_list.insert(len(timeslice_list), PipelineTimeSlice(timeslice_list[-1].end, max_ts, self.STAGE)) - return timeslice_list - - def update_stage_fp_bp(self, timeslice_list: List[PipelineTimeSlice], - bp_ops: List[List[dict]]) -> None: - """ - update stage fp and bp time - """ - pipeline_que = deque(timeslice_list) - bp_bound_que = deque(bp_ops) - - while pipeline_que and bp_bound_que: - while pipeline_que[0].slice_type != self.STAGE: - pipeline_que.popleft() - if not pipeline_que: - return None - - bp_bound_data = bp_bound_que[0] - bp_bound_s = Decimal(bp_bound_data[0]['npu_op_ts']) - bp_bound_e = Decimal(bp_bound_data[1]['npu_op_ts']) + Decimal(bp_bound_data[1]['npu_op_dur']) - - pipeline_s = Decimal(pipeline_que[0].start) - pipeline_e = Decimal(pipeline_que[0].end) - - if pipeline_s <= bp_bound_s and bp_bound_e <= pipeline_e: - pipeline_que[0].bp_timeslice.append((str(bp_bound_s), str(bp_bound_e))) - bp_bound_que.popleft() - elif bp_bound_s > pipeline_e: - pipeline_que.popleft() - else: - bp_bound_que.popleft() diff --git a/profiler/advisor/advisor_backend/cluster_advice/kernel_cluster_advice.py b/profiler/advisor/advisor_backend/cluster_advice/kernel_cluster_advice.py deleted file mode 100644 index 6fa83c765f..0000000000 --- a/profiler/advisor/advisor_backend/cluster_advice/kernel_cluster_advice.py +++ /dev/null @@ -1,62 +0,0 @@ -import os -import pandas as pd -from common_func.path_manager import PathManager -from common_func.constant import Constant -from common_func_advisor.constant import Constant as AdvisorConstant -from cluster_advice.cluster_advice_base import ClusterAdviceBase -from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor - - -class KernelClusterAdvice(ClusterAdviceBase): - COLUMNS_TO_GROUP = ["Name", "Input Shapes", "Input Data Types", "Output Shapes"] - COLUMNS_TO_CAL = ["Duration(us)"] - CAL_FUN = ['mean', 'var', 'max', 'min', 'count', 'sum'] - - def __init__(self, collection_path: str, kwargs: dict = None): - super().__init__(collection_path) - self.all_kernel_data = pd.DataFrame() - - def run(self): - self.load_kernel_details_data() - return self.calculate_data() - - def load_kernel_details_data(self): - prof_dirs = self.get_prof_dirs(self.collection_path) - if not prof_dirs: - msg = "[ERROR] There is no profile in this collection path, terminate analysis." - raise RuntimeError(msg) - - data_map = PytorchDataPreprocessor(prof_dirs).get_data_map() - self.all_kernel_data = pd.DataFrame() - for rank_id, profiling_dir_path in data_map.items(): - kernel_file = os.path.join(profiling_dir_path, Constant.SINGLE_OUTPUT, Constant.KERNEL_DETAILS_CSV) - if kernel_file: - # 判断csv文件大小 - PathManager.check_path_readable(kernel_file) - # 读取CSV文件 - df_temp = pd.read_csv(kernel_file) - columns_to_keep = self.COLUMNS_TO_GROUP + self.COLUMNS_TO_CAL - if [1 for element in columns_to_keep if element not in list(df_temp)]: - msg = "[ERROR] Kernel details.csv has wrong data columns, terminate analysis." - raise RuntimeError(msg) - df = df_temp[columns_to_keep] - df.insert(loc=0, column='rank id', value=rank_id) - # 将数据添加到最终的数据框中 - self.all_kernel_data = pd.concat([self.all_kernel_data, df], ignore_index=True) - - def calculate_data(self): - # 存储所有合并后的数据 - calculate_dict = {self.COLUMNS_TO_CAL[i]: self.CAL_FUN - for i in range(len(self.COLUMNS_TO_CAL))} - group_col = ["rank id"] + self.COLUMNS_TO_GROUP - view_data = self.all_kernel_data.groupby(group_col).agg(calculate_dict).reset_index() - view_data.columns = [''.join(col) if col[1] == "" else '_'.join(col) for col in view_data.columns] - return view_data - - def get_prof_dirs(self, collection_path): - prof_dirs = [] - for prof_dir in os.listdir(collection_path): - if prof_dir.endswith(AdvisorConstant.PT_PROF_SUFFIX): - prof_dirs.append(os.path.join(collection_path, prof_dir)) - - return prof_dirs \ No newline at end of file diff --git a/profiler/advisor/advisor_backend/cluster_advice/slow_link_advice.py b/profiler/advisor/advisor_backend/cluster_advice/slow_link_advice.py deleted file mode 100644 index f8a625242f..0000000000 --- a/profiler/advisor/advisor_backend/cluster_advice/slow_link_advice.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from collections import defaultdict -from common_func_advisor.constant import Constant -from common_func.file_manager import FileManager -from cluster_advice.cluster_advice_base import ClusterAdviceBase - - -class SlowLinkAdvice(ClusterAdviceBase): - RDMA_TIME_MS = "RDMA time(ms)" - RDMA_SIZE_MB = "RDMA size(mb)" - SDMA_TIME_MS = "SDMA time(ms)" - SDMA_SIZE_MB = "SDMA size(mb)" - RDMA_BANDWIDTH = "RDMA bandwidth(GB/s)" - SDMA_BANDWIDTH = "SDMA bandwidth(GB/s)" - COMMUNICATION_BANDWIDTH_INFO = "Communication Bandwidth Info" - TRANSIT_TIME = "Transit Time(ms)" - TRANSIT_SIZE = "Transit Size(MB)" - SDMA = "SDMA" - RDMA = "RDMA" - - def __init__(self, collection_path: str, kwargs: dict = None): - super().__init__(collection_path) - self.rank_bw_dict = defaultdict(lambda: { - self.RDMA_TIME_MS: 0, - self.RDMA_SIZE_MB: 0, - self.SDMA_TIME_MS: 0, - self.SDMA_SIZE_MB: 0, - }) - - @staticmethod - def compute_ratio(dividend: float, divisor: float): - if abs(divisor) < 1e-15: - return 0 - else: - return round(dividend / divisor, 4) - - def load_communication_json(self): - json_path = os.path.join(self.collection_path, Constant.CLUSTER_ANALYSIS_OUTPUT, Constant.CLUSTER_COMM_JSON) - if not os.path.exists(json_path): - msg = "[ERROR] cluster_communication.json doesn't exist, terminate analysis." - raise RuntimeError(msg) - communication_json = FileManager.read_json_file(json_path) - return communication_json - - def run(self): - self.path_check() - communication_json = self.load_communication_json() - self.process(communication_json) - self.output() - return self.output_format_data - - def process(self, communication_json: dict): - for comm_group, group_dict in communication_json.items(): - for step, step_dict in group_dict.items(): - for op, op_dict in step_dict.items(): - self.compute_bandwidth(op_dict) - if self.rank_bw_dict: - self.produce_bottleneck(self.RDMA_BANDWIDTH) - self.produce_bottleneck(self.SDMA_BANDWIDTH) - - def compute_bandwidth(self, op_dict: dict): - for rank_id, rank_dict in op_dict.items(): - try: - rank = int(rank_id) - except ValueError as e: - msg = "[ERROR] Cluster_communication.json has invalid structure." - raise ValueError(msg) from e - for comm_type, bw_dict in rank_dict.get(self.COMMUNICATION_BANDWIDTH_INFO, {}).items(): - if comm_type == self.SDMA: - self.rank_bw_dict[rank][self.SDMA_SIZE_MB] += bw_dict.get(self.TRANSIT_SIZE) - self.rank_bw_dict[rank][self.SDMA_TIME_MS] += bw_dict.get(self.TRANSIT_TIME) - if comm_type == self.RDMA: - self.rank_bw_dict[rank][self.RDMA_SIZE_MB] += bw_dict.get(self.TRANSIT_SIZE) - self.rank_bw_dict[rank][self.RDMA_TIME_MS] += bw_dict.get(self.TRANSIT_TIME) - - for rank, rank_dict in self.rank_bw_dict.items(): - self.rank_bw_dict[rank][self.RDMA_BANDWIDTH] = self.compute_ratio( - self.rank_bw_dict[rank][self.RDMA_SIZE_MB], self.rank_bw_dict[rank][self.RDMA_TIME_MS]) - self.rank_bw_dict[rank][self.SDMA_BANDWIDTH] = self.compute_ratio( - self.rank_bw_dict[rank][self.SDMA_SIZE_MB], self.rank_bw_dict[rank][self.SDMA_TIME_MS]) - - def produce_bottleneck(self, link_type: str): - data_list = [rank_dict.get(link_type, 0) for rank_id, rank_dict in self.rank_bw_dict.items()] - avg_bw = round(sum(data_list) / len(data_list), 3) - if avg_bw == 0: - return - self.bottelneck += f'{link_type}: \n' \ - f'The average is {avg_bw}, ' \ - f'while the maximum is {round(max(data_list), 3)}GB/s and ' \ - f'the minimum is {round(min(data_list), 3)}GB/s. ' \ - f'the difference is {round(max(data_list) - min(data_list), 3)}GB/s. \n' - - def output(self): - self.output_format_data[self.DATA] = self.rank_bw_dict - self.output_format_data[self.BOTTLENECK] = self.bottelneck diff --git a/profiler/advisor/advisor_backend/cluster_advice/slow_rank_advice.py b/profiler/advisor/advisor_backend/cluster_advice/slow_rank_advice.py deleted file mode 100644 index 4e789fb7fb..0000000000 --- a/profiler/advisor/advisor_backend/cluster_advice/slow_rank_advice.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from collections import defaultdict -from common_func_advisor.constant import Constant -from common_func.file_manager import FileManager -from cluster_advice.cluster_advice_base import ClusterAdviceBase -from prof_bean_advisor.cluster_step_trace_time_bean import ClusterStepTraceTimeBean - - -class SlowRankAdvice(ClusterAdviceBase): - RANK = "rank" - RATIO_THRESHOLD = 0.05 - BOTTLENECK_LIST = ['Computing', 'Communication', "Free"] - - def __init__(self, collection_path: str, kwargs: dict = None): - super().__init__(collection_path) - - def load_step_time(self): - csv_path = os.path.join(self.collection_path, Constant.CLUSTER_ANALYSIS_OUTPUT, Constant.CLUSTER_STEP_TIME_CSV) - if not os.path.exists(csv_path): - msg = "[ERROR] cluster_step_trace_time.csv doesn't exist, terminate analysis." - raise RuntimeError(msg) - step_time = FileManager.read_csv_file(csv_path, ClusterStepTraceTimeBean) - return step_time - - def run(self): - self.path_check() - step_data = self.load_step_time() - step_dict = self.process(step_data) - self.output(step_dict) - return self.output_format_data - - def process(self, step_data: list): - step_dict = defaultdict(lambda: [0, 0, 0, 0]) - for step_bean in step_data: - if step_bean.type == self.RANK: - step_dict[step_bean.index][0] += step_bean.compute - step_dict[step_bean.index][1] += step_bean.communication - step_dict[step_bean.index][2] += step_bean.free - total_time_list = [sum(data_tuple) for rank_id, data_tuple in step_dict.items()] - if total_time_list: - mean_total_time = sum(total_time_list) / len(total_time_list) - for i in range(len(self.BOTTLENECK_LIST)): - self.produce_bottleneck(step_dict, i, mean_total_time) - return step_dict - - def produce_bottleneck(self, step_dict: dict, produce_type: int, mean_total_time: float): - data_list = [data_tuple[produce_type] for rank_id, data_tuple in step_dict.items()] - max_ratio = self.compute_max_gap_ratio(data_list, mean_total_time) - if max_ratio > self.RATIO_THRESHOLD: - self.bottelneck += f'{self.BOTTLENECK_LIST[produce_type]} has some issues in the cluster, ' \ - f'because the max difference of {self.BOTTLENECK_LIST[produce_type]} time ' \ - f'has reached {round(max_ratio * mean_total_time / 1000, 3)}ms. \n' - - def output(self, step_dict: dict): - self.output_format_data[self.DATA] = step_dict - self.output_format_data[self.BOTTLENECK] = self.bottelneck diff --git a/profiler/advisor/advisor_backend/common_func_advisor/__init__.py b/profiler/advisor/advisor_backend/common_func_advisor/__init__.py deleted file mode 100644 index 8400fd5ecd..0000000000 --- a/profiler/advisor/advisor_backend/common_func_advisor/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/common_func_advisor/constant.py b/profiler/advisor/advisor_backend/common_func_advisor/constant.py deleted file mode 100644 index 46a7fb24c2..0000000000 --- a/profiler/advisor/advisor_backend/common_func_advisor/constant.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from enum import Enum - - -class CsvTitle: - MODEL_NAME = "Model Name" - MODEL_ID = "Model ID" - TASK_ID = "Task ID" - STREAM_ID = "Stream ID" - INFER_ID = "Infer ID" - TASK_START_TIME = "Task Start Time(us)" - TASK_WAIT_TIME = "Task Wait Time(us)" - BLOCK_DIM = "Block Dim" - MIX_BLOCK_DIM = "Mix Block Dim" - HF32_ELIGIBLE = "HF32 Eligible" - INPUT_SHAPES = "Input Shapes" - INPUT_DATA_TYPES = "Input Data Types" - INPUT_FORMATS = "Input Formats" - OUTPUT_SHAPES = "Output Shapes" - OUTPUT_DATA_TYPES = "Output Data Types" - OUTPUT_FORMATS = "Output Formats" - CONTEXT_ID = "Context ID" - AICORE_TIME = "aicore_time(us)" - AIC_TOTAL_CYCLES = "aic_total_cycles" - AIC_MAC_TIME = "aic_mac_time(us)" - AIC_MAC_RATIO = "aic_mac_ratio" - AIC_SCALAR_TIME = "aic_scalar_time(us)" - AIC_SCALAR_RATIO = "aic_scalar_ratio" - AIC_MTE1_TIME = "aic_mte1_time(us)" - AIC_MTE1_RATIO = "aic_mte1_ratio" - AIC_MTE2_TIME = "aic_mte2_time(us)" - AIC_MTE2_RATIO = "aic_mte2_ratio" - AIC_FIXPIPE_TIME = "aic_fixpipe_time(us)" - AIC_FIXPIPE_RATIO = "aic_fixpipe_ratio" - AIC_ICACHE_MISS_RATE = "aic_icache_miss_rate" - AIV_TIME = "aiv_time(us)" - AIV_TOTAL_CYCLES = "aiv_total_cycles" - AIV_VEC_TIME = "aiv_vec_time(us)" - AIV_VEC_RATIO = "aiv_vec_ratio" - AIV_SCALAR_TIME = "aiv_scalar_time(us)" - AIV_SCALAR_RATIO = "aiv_scalar_ratio" - AIV_MTE2_TIME = "aiv_mte2_time(us)" - AIV_MTE2_RATIO = "aiv_mte2_ratio" - AIV_MTE3_TIME = "aiv_mte3_time(us)" - AIV_MTE3_RATIO = "aiv_mte3_ratio" - AIV_ICACHE_MISS_RATE = "aiv_icache_miss_rate" - CUBE_UTILIZATION = "cube_utilization( %)" - TASK_DURATION_SUM = "Task Duration Sum(us)" - TASK_DURATION_MEAN = "Task Duration Mean(us)" - TASK_DURATION_STD = "Task Duration Std(us)" - TASK_DURATION_RATIO = "Task Duration Ratio(100%)" - SIZE = "size(MB)" - THROUGHPUT = "throughput(GB/s)" - COLOR = "color" - GAP = "Gap(us)" - DURATION_SUM = "Duration Sum(us)" - COUNT = "Count" - MAX_DURATION = "Max Duration(us)" - MIN_DURATION = "Min Duration(us)" - AVG_DURATION = "Avg Duration(us)" - DURATION_RATIO = "Duration Ratio" - INDEX = "Index" - - -# 定义CSV_TITILE_V1类,继承自CSV_TITILE类, 适配旧版csv -class CsvTitleV1(CsvTitle): - OP_NAME = "Op Name" - OP_TYPE = "OP Type" - TASK_TYPE = "Task Type" - TASK_DURATION = "Task Duration(us)" - - -# 定义CSV_TITILE_V1类,继承自CSV_TITILE类, 适配新版csv -class CsvTitleV2(CsvTitle): - OP_NAME = "Name" - OP_TYPE = "Type" - TASK_TYPE = "Accelerator Core" - TASK_DURATION = "Duration(us)" - - -class Constant: - DTYPE_SIZE_MAP = {"int8": 1, "uint8": 1, - "int16": 2, "uint16": 2, - "int32": 4, "uint32": 4, - "int64": 8, "uint64": 8, - "float16": 2, - "bfloat16": 2, - "bf16": 2, - "dt_bf16": 2, - "float32": 4, - "float": 4, - "float64": 8, - "complex64": 8, - "complex128": 16, - "bool": 1} - TP_THRESHOLD = 1150 - MAX_INPUT_MODE_LEN = 30 - MAX_INPUT_ADVICE_LEN = 30 - SMALL_OP_DUR_RATIO = 0.2 - SMALL_OP_NUM_RATIO = 0.2 - BYTE_UNIT_TRANS = 1024 - UNIT_TRANS = 1000 - - # mode list - COMPUTE = "compute" - TIMELINE = "timeline" - CLUSTER = "cluster" - OVERALL = "overall" - PIPELINE = "pipeline" - - # advice list - SLOW_RANK = "slow rank" - SLOW_LINK = "slow link" - KERNEL = "kernel" - - # compute - NPU_FUSED = "npu_fused" - NPU_SLOW = "npu_slow" - - # timeline - OPTIM = "optimizer" - OP_SCHE = "op_schedule" - - # overall - SUMMARY = "summary" - - PT_PROF_SUFFIX = "ascend_pt" - ASCEND_PROFILER_OUTPUT = "ASCEND_PROFILER_OUTPUT" - COLLECTION_PATH = "collection_path" - CLUSTER_ANALYSIS_OUTPUT = "cluster_analysis_output" - KERNEL_DETAILS_CSV = "kernel_details.csv" - CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" - CLUSTER_COMM_JSON = "cluster_communication.json" - - # pipline - OP_NAME = "name" - OP_TID = "tid" - PID = "pid" - TS = "ts" - DUR = "dur" - CAT = "cat" - ARGS = "args" - PH = "ph" - ID = "id" - PH_START = "s" - PH_BEGIN = "B" - PH_END = "E" - PH_META = "M" - PH_X = "X" - CNAME = "cname" - PROCESS_NAME = "process_name" - FRAMEWORK_NAME = "Python" - ASCEND_HARDWARE_NAME = "Ascend Hardware" - ASYNC_NPU = "async_npu" - STEP_PREFIX = "ProfilerStep#" - FP_ATEN_OP = "aten" - FP_C10D_OP = "c10d" - HCOM_OP_PREFIX = "hcom_" - BP_AUTOGRAD_OP = "autograd" - TRACE_VIEW_JSON = "trace_view.json" - - # pattern_dict key: pattern, value: pattern name - PATTERN_DICT = {("Add", "DropOutDoMask", "Add"): "bias_dropout_add", - ("BatchMatMul", "Mul", "Cast", "Mul", "MaskedFill", "SoftmaxV2", "Cast", "DropOutDoMask", - "AsStrided", "BatchMatMul", "Transpose"): "FA", - ("Transpose", "Transpose", "Transpose", "Mul", "Transpose", "BatchMatMulV2", "MaskedFill", - "Cast", "SoftmaxV2", "Cast", "DropOutDoMask", "BatchMatMulV2", "Transpose"): "FA", - ("Transpose", "BatchMatMulV2", "Transpose", "Transpose", "BatchMatMulV2", "ZerosLike", - "DropOutDoMask", "Cast", "SoftmaxGrad", "Cast", "MaskedFill", "BatchMatMulV2", - "BatchMatMulV2", "Mul"): "FA", - ("Cast", "Square", "ReduceMeanD", "Add", "Rsqrt", "Cast", "Cast", "Mul", "Cast", "Cast", - "Mul", "Cast"): "RMSNORM", - ("Cast", "LayerNorm", "Cast"): "LayerNorm", - ("Add", "LayerNorm"): "AddLayerNorm", - ("Add", "LayerNormV3"): "AddLayerNorm", - ("Gelu", "Add"): "GeluAdd", - ("Cast", "Square", "MemSet", "ReduceMean", "Add", "Rsqrt", "Mul", "Cast", "Mul"): "RMSNorm", - ("BatchMatMul", "RealDiv", "Add", "Maximum", "SoftmaxV2", "Cast", "BatchMatMul"): "FA", - ("BatchMatMulV2", "RealDiv", "Add", "Cast", "Maximum", "Cast", "SoftmaxV2", "AsStrided", - "BatchMatMulV2"): "FA", - ("BatchMatMulV2", "RealDiv", "Add", "Cast", "SoftmaxV2", "Cast", "BroadcastTo", - "BatchMatMulV2"): "FA", - ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Cast", "Mul", "Add"): "RotaryMul", - ("Mul", "AsStrided", "Neg", "AsStrided", "ConcatD", "Mul", "Add"): "RotaryMul", - ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Mul", "Add"): "RotaryMul", - ("MatMulV2", "Swish", "MatMulV2", "Mul", "MatMulV2"): "FFN", - ("Transpose", "Transpose", "GatherElement", "Transpose"): "GatherElement", - ("Slice", "Slice", "Swish", "Mul"): "torch_npu.npu_swiglu", - ("Cast", "Mul", "MaskedFill", "SoftmaxV2", "Cast"): "torch_npu.npu_scaled_masked_softmax", - ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Mul"): "torch_npu.npu_rotary_mul", - ("Cast", "Square", "ReduceMeanD", "Add", "Rsqrt", "Mul", "Cast", "Mul"): "torch_npu.npu_rms_norm"} - TITLE = CsvTitleV2 - - @classmethod - def update_title(cls): - cls.TITLE = CsvTitleV1 - - -class CoreType: - AIV = "AI_VECTOR_CORE" - AIC = "AI_CORE" - AICPU = "AI_CPU" - MIX_AIV = "MIX_AIV" - MIX_AIC = "MIX_AIC" - HCCL = "HCCL" - - -class PerfColor(Enum): - WHITE = 0 - GREEN = 1 - YELLOW = 2 - RED = 3 diff --git a/profiler/advisor/advisor_backend/common_func_advisor/trace_view_json.py b/profiler/advisor/advisor_backend/common_func_advisor/trace_view_json.py deleted file mode 100644 index 8171f06ee2..0000000000 --- a/profiler/advisor/advisor_backend/common_func_advisor/trace_view_json.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from abc import abstractmethod -from dataclasses import dataclass -from dataclasses import field -from typing import Dict -from typing import List - -import pandas as pd - -from common_func.file_manager import FileManager - - -@dataclass -class TraceObj: - ph: str = "" - bp: str = "" - cat: str = "" - name: str = "" - pid: int = 0 - tid: int = 0 - id: int = 0 - ts: str = "" - dur: float = 0.0 - args: dict = field(default='unknown') - - @abstractmethod - def hash(self): - raise Exception("To be implemented") - - def valid(self): - return self.name != "" - - def check_hashable(self): - if not self.valid(): - raise Exception("Illegal {} to hash".format(self.__class__.name)) - - -@dataclass -class Process(TraceObj): - def hash(self): - self.check_hashable() - # msprof 保证name唯一性 - return self.args.get("name") - - -@dataclass -class Thread(TraceObj): - def hash(self): - self.check_hashable() - # msprof 保证name唯一性 - return self.args.get("name") - - -@dataclass -class DurationEvent(TraceObj): - def hash(self): - self.check_hashable() - return self.ts - - -@dataclass -class FlowEvent(TraceObj): - s_point_ts: str = "" - e_point_ts: str = "" - - def hash(self): - self.check_hashable() - return self.e_point_ts - - -class TraceViewJson: - - def __init__(self, path): - self.processes: Dict[str, Process] = dict() - self.threads: Dict[str, Thread] = dict() - self.python_dur_events: Dict[str, DurationEvent] = dict() - self.cann_dur_events: Dict[str, DurationEvent] = dict() - self.ascend_hardware_dur_events: Dict[str, DurationEvent] = dict() - self.torch_2_npu_flow_events: Dict[str, FlowEvent] = dict() - traces = FileManager.read_json_file(path) - self._load_obj(traces) - - def get_call_stack(self, data: pd.DataFrame, index_id: int, ts_col: str) -> str: - if ts_col not in data.columns.tolist(): - print("[ERROR] No {} col found in data columns.".format(ts_col)) - return "" - row = data.loc[index_id] - timestamp = row[ts_col] - flow_event = self.get_torch_2_npu_flow_event(timestamp) - if not flow_event.valid(): - print("[ERROR] Get flow event failed for pattern {}.".format(row['pattern'])) - return "" - flow_event_s_key = flow_event.s_point_ts - python_dur_events = self.get_python_dur_events_contain_ts(flow_event_s_key) - if not python_dur_events: - print("[ERROR] No python dur event found for pattern {}.".format(row['pattern'])) - return "" - # 保持新老版本callstack兼容性 - if python_dur_events[0].args.get("Call stack"): - # 旧版本 - call_stack_list = python_dur_events[0].args.get("Call stack").split(";") - else: - python_dur_events.sort(key=lambda e: e.ts) - # 新版本 - call_stack_list = [event.name for event in python_dur_events if event.cat == "python_function"] - call_stack = "\n".join(call_stack_list) - return call_stack - - def get_torch_2_npu_flow_event(self, end_time) -> FlowEvent: - if not self.torch_2_npu_flow_events or not self.torch_2_npu_flow_events.get(end_time): - print("[ERROR] Find flow event failed for ts: {}".format(end_time)) - return FlowEvent() - return self.torch_2_npu_flow_events.get(end_time) - - def get_python_dur_events_contain_ts(self, ts) -> List[DurationEvent]: - res = [] - for event in self.python_dur_events.values(): - if float(event.ts) <= float(ts) <= float(event.ts) + event.dur: - res.append(event) - return res - - def _load_obj(self, traces): - self._load_format(traces) - if not self._check_format(): - print("[ERROR] parse json failed for error format") - return - self._load_duration_events(traces) - self._load_torch_to_npu_flow_events(traces) - - def _check_format(self): - # 当前功能只需要这两个process,可扩展 - check_processes = ['Python', 'Ascend Hardware'] - for check_process in check_processes: - if check_process in self.processes: - continue - print("[ERROR] {} process not found in json.".format(check_process)) - return False - return True - - # 加载pid, tid头 - def _load_format(self, traces: List[Dict]): - for i, trace in enumerate(traces): - if trace.get('name') == 'process_name': - if not trace.get('args') or not trace.get('args').get('name') or not trace.get('pid'): - continue - process = Process(**trace) - self.processes[process.hash()] = process - if trace.get('name') == 'thread_name': - if not trace.get('args') or not trace.get('args').get('name') or not trace.get('tid'): - continue - thread = Thread(**trace) - self.threads[thread.hash()] = thread - - def _load_duration_events(self, traces: List[Dict]): - def check_events(_trace): - return _trace.get('name') and _trace.get("ts") and _trace.get("dur") - - python_pid = self.processes.get("Python").pid - cann_pid = self.processes.get("CANN").pid - ascend_hardware_pid = self.processes.get("Ascend Hardware").pid - for i, trace in enumerate(traces): - if trace.get('ph') != 'X': - continue - if not check_events(trace): - continue - event = DurationEvent(**trace) - if trace.get('pid') == python_pid: - self.python_dur_events[event.hash()] = event - elif trace.get('pid') == cann_pid: - self.cann_dur_events[event.hash()] = event - elif trace.get("pid") == ascend_hardware_pid: - self.ascend_hardware_dur_events[event.hash()] = event - - def _load_torch_to_npu_flow_events(self, traces: List[Dict]): - def check_events(_trace): - return _trace.get('name') and _trace.get("id") and _trace.get("ts") - - flow_events_table_by_id = dict() - - python_pid = self.processes.get("Python") - for i, trace in enumerate(traces): - if trace.get('ph') != 's' and trace.get('ph') != 'f' and trace.get('pid') != python_pid: - continue - if not check_events(trace): - continue - event = flow_events_table_by_id.get(trace.get("id")) - if not event: - event = FlowEvent(**trace) - if trace.get('ph') == 's': - event.s_point_ts = trace.get('ts') - else: - event.e_point_ts = trace.get('ts') - flow_events_table_by_id[event.id] = event - - self.torch_2_npu_flow_events = {eve.hash(): eve for eve in flow_events_table_by_id.values()} diff --git a/profiler/advisor/advisor_backend/common_func_advisor/trace_view_preprocessor.py b/profiler/advisor/advisor_backend/common_func_advisor/trace_view_preprocessor.py deleted file mode 100644 index 7b9baa32d9..0000000000 --- a/profiler/advisor/advisor_backend/common_func_advisor/trace_view_preprocessor.py +++ /dev/null @@ -1,208 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -import sys -from typing import Optional -from dataclasses import dataclass - -from common_func_advisor.constant import Constant - - -@dataclass -class FineTraceViewData: - py_pid: int = -1 - fp_tid: int = -1 - bp_tid: int = -1 - ascend_pid: int = -1 - min_ts: str = str(sys.maxsize) - max_ts: str = "0" - hcom_tids: list = None - fp_ops: list = None - bp_ops: list = None - hcom_ops: list = None - npu_ops_ts_dur: dict = None - torch_to_npu_links: list = None - - def __post_init__(self): - self.hcom_tids = self.hcom_tids or [] - self.fp_ops = self.fp_ops or [] - self.bp_ops = self.bp_ops or [] - self.hcom_ops = self.hcom_ops or [] - self.npu_ops_ts_dur = self.npu_ops_ts_dur or {} - self.torch_to_npu_links = self.torch_to_npu_links or [] - - def sort(self): - self.fp_ops.sort(key=lambda x: x[Constant.TS]) - self.bp_ops.sort(key=lambda x: x[Constant.TS]) - self.hcom_ops.sort(key=lambda x: x[Constant.TS]) - self.torch_to_npu_links.sort(key=lambda x: x[Constant.TS]) - - -class TraceViewPreProcessor: - """ - Trace view data preprocess - """ - - @staticmethod - def _is_fp_op(op_name: str) -> bool: - """ - check whether op is fp op - """ - return op_name.startswith(Constant.FP_ATEN_OP) or op_name.startswith(Constant.FP_C10D_OP) - - @staticmethod - def _is_fp_data(data: dict, fp_tid: int, py_pid: int) -> bool: - """ - check whether data is valid fp data - """ - return data[Constant.OP_TID] == fp_tid and \ - Constant.TS in data and Constant.DUR in data and \ - not data[Constant.OP_NAME].startswith(Constant.STEP_PREFIX) and \ - data[Constant.PID] == py_pid - - @staticmethod - def _is_bp_op(op_name: str) -> bool: - """ - check whether op is bp op - """ - return op_name.startswith(Constant.BP_AUTOGRAD_OP) - - @staticmethod - def _is_bp_data(data: dict, bp_tid: int, py_pid: int) -> bool: - """ - check whether data is valid bp data - """ - return data[Constant.OP_TID] == bp_tid and \ - Constant.TS in data and Constant.DUR in data and \ - data[Constant.PID] == py_pid - - @staticmethod - def _is_torch_to_npu_link(data: dict, fp_tid: int) -> bool: - """ - check whether data is torch to npu link - """ - return Constant.CAT in data and data[Constant.CAT] == Constant.ASYNC_NPU and \ - data[Constant.PH] == Constant.PH_START and \ - data[Constant.PID] == fp_tid - - @staticmethod - def _is_send_recv_op(op_name: str) -> bool: - """ - check whether op is hcom send or recv op - """ - # eg: hcom_BatchSendRecv__101_0_1 - p1 = re.compile(r'hcom_\w+SendRecv__\d+') - # eg: hcom_send__101_0_1 - p2 = re.compile(r'hcom_send__\d+') - # eg: hcom_receive__101_0_1 - p3 = re.compile(r'hcom_receive__\d+') - return bool(p1.match(op_name)) or bool(p2.match(op_name)) or bool(p3.match(op_name)) - - @staticmethod - def _is_hcom_op(op_name: str) -> bool: - """ - check whether data is hcom data - """ - return op_name.startswith(Constant.HCOM_OP_PREFIX) - - @staticmethod - def _is_python_process(data: dict) -> bool: - """ - check whether data is python process - """ - return Constant.PH in data and data[Constant.PH] == Constant.PH_META and \ - data[Constant.OP_NAME] == Constant.PROCESS_NAME and \ - data[Constant.ARGS][Constant.OP_NAME] == Constant.FRAMEWORK_NAME - - @staticmethod - def _is_step_op(data: dict) -> bool: - """ - check whether data is step data - """ - return data[Constant.OP_NAME].startswith(Constant.STEP_PREFIX) - - @staticmethod - def _is_ascend_process(data: dict) -> bool: - """ - check whether data is ascend process data - """ - return Constant.PH in data and data[Constant.PH] == Constant.PH_META and \ - data[Constant.OP_NAME] == Constant.PROCESS_NAME and \ - data[Constant.ARGS][Constant.OP_NAME] == Constant.ASCEND_HARDWARE_NAME - - @staticmethod - def _is_npu_op(data: dict, ascend_pid: int) -> bool: - """ - check whether data is npu op - """ - return Constant.PH in data and data[Constant.PH] == Constant.PH_X and \ - not data[Constant.OP_NAME].isupper() and \ - data[Constant.PID] == ascend_pid - - def process(self, raw_data: list) -> Optional[FineTraceViewData]: - """ - preprocess raw data - """ - if not raw_data: - print("[ERROR] No raw data found in trace view data.") - return None - - raw_fp_tids, raw_bp_tids, raw_hcom_tids = set(), set(), set() - fine_data = FineTraceViewData() - - # counting fp ops and bp ops tid and ascend pid - for data in raw_data: - if self._is_fp_op(data[Constant.OP_NAME]): - raw_fp_tids.add(data[Constant.OP_TID]) - elif self._is_bp_op(data[Constant.OP_NAME]): - raw_bp_tids.add(data[Constant.OP_TID]) - elif self._is_send_recv_op(data[Constant.OP_NAME]): - fine_data.hcom_ops.append(data) - raw_hcom_tids.add(data[Constant.OP_TID]) - elif self._is_python_process(data): - fine_data.py_pid = data[Constant.PID] - elif self._is_ascend_process(data): - fine_data.ascend_pid = data[Constant.PID] - - # find max and min ts in hcom ops - if self._is_hcom_op(data[Constant.OP_NAME]): - # for compatibility with old data (ts is float type) - ts = data[Constant.TS] if not isinstance(data[Constant.TS], float) else str(data[Constant.TS]) - fine_data.min_ts = min(fine_data.min_ts, ts) - fine_data.max_ts = max(fine_data.max_ts, ts) - - unique_fp_tid = list(raw_fp_tids - raw_bp_tids) - unique_bp_tid = list(raw_bp_tids) - fine_data.hcom_tids = list(raw_hcom_tids) - - if not unique_fp_tid or not unique_bp_tid: - print("[INFO] No fp or bp tid found in trace view data.") - else: - fine_data.fp_tid, fine_data.bp_tid = unique_fp_tid[0], unique_bp_tid[0] - - # filter fp ops and bp ops and torch_to_npu_links - for data in raw_data: - if self._is_fp_data(data, fine_data.fp_tid, fine_data.py_pid): - fine_data.fp_ops.append(data) - elif self._is_bp_data(data, fine_data.bp_tid, fine_data.py_pid): - fine_data.bp_ops.append(data) - elif self._is_torch_to_npu_link(data, fine_data.fp_tid): - fine_data.torch_to_npu_links.append(data) - elif self._is_npu_op(data, fine_data.ascend_pid): - fine_data.npu_ops_ts_dur[data[Constant.TS]] = data[Constant.DUR] - - fine_data.sort() - return fine_data diff --git a/profiler/advisor/advisor_backend/compute_advice/__init__.py b/profiler/advisor/advisor_backend/compute_advice/__init__.py deleted file mode 100644 index 8400fd5ecd..0000000000 --- a/profiler/advisor/advisor_backend/compute_advice/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py b/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py deleted file mode 100644 index cafbafd8e2..0000000000 --- a/profiler/advisor/advisor_backend/compute_advice/compute_advice_base.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import abstractmethod -from collections import defaultdict -import os - -from advice_base import AdviceBase -from common_func.file_manager import FileManager - - -class ComputeAdviceBase(AdviceBase): - ASCEND_PT = 'ascend_pt' - ASCEND_PROFILER_OUTPUT = 'ASCEND_PROFILER_OUTPUT' - KERNEL_DETAIL_FILE = "kernel_details.csv" - TRACE_VIEW_FILE = "trace_view.json" - - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.kernel_details_path = "" - self.has_preparse = False - self.preparse_data = defaultdict(list) - self.call_stack = None - self.trace_view_path = "" - - def path_check(self): - """ - check whether input path is valid - """ - if not os.path.exists(self.collection_path): - print("[ERROR] Path: {} is not exist.".format(self.collection_path)) - return False - if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"): - self.kernel_details_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", - "kernel_details.csv") - if not os.path.exists(self.kernel_details_path): - print("[ERROR] kernel_details.csv is not exist in the Path: {}.".format( - os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT"))) - return False - elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path) == "kernel_details.csv": - self.kernel_details_path = self.collection_path - else: - print("[ERROR] Please input ascend_pt or kernel_details.csv") - return False - print("[INFO] Start to analyse the target file: {}".format(self.kernel_details_path)) - self.preparse() - return True - - def has_callstack(self): - if self.call_stack is not None: - return self.call_stack - profiler_info_json_path = "" - for file in os.listdir(self.collection_path): - if file.startswith("profiler_info"): - profiler_info_json_path = os.path.join(self.collection_path, file) - break - if not profiler_info_json_path: - self.call_stack = False - return self.call_stack - self.trace_view_path = os.path.join(self.collection_path, self.ASCEND_PROFILER_OUTPUT, "trace_view.json") - if not os.path.exists(profiler_info_json_path) or not os.path.exists(self.trace_view_path): - self.call_stack = False - return self.call_stack - info = FileManager.read_json_file(profiler_info_json_path) - if not info.get("config") or not info.get("config").get("common_config") \ - or not info.get("config").get("common_config").get("with_stack"): - self.call_stack = False - return self.call_stack - activities = info.get("config").get("common_config").get("activities") - if not activities or "ProfilerActivity.CPU" not in activities: - self.call_stack = False - return self.call_stack - self.call_stack = info.get("config").get("common_config").get("with_stack") - return self.call_stack - - @abstractmethod - def run(self): - """ - analyze profiling data and advice - """ - - @abstractmethod - def output(self): - """ - output relevant data - """ - self.output_format_data[self.DATA] = self.cur_data - self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck - self.output_format_data[self.ADVICE] = self.cur_advice - - def preparse(self): - if self.has_preparse: - return diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py deleted file mode 100644 index 8400fd5ecd..0000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/csv_analyzer.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/csv_analyzer.py deleted file mode 100644 index c85c14d618..0000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused/csv_analyzer.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import multiprocessing - -import pandas as pd -import numpy as np - -from common_func_advisor.constant import Constant -from .op_perf import OpPerfFactory - - -class CSVAnalyzer: - def __init__(self, path) -> None: - self._path = path - - def process(self): - df = pd.read_csv(self._path, dtype={"Start Time(us)": str}) - # 分析是否存在可融合的算子 - op_type_list = df["Type"].tolist() - duration_list = df["Duration(us)"].tolist() - start_times = df["Start Time(us)"].tolist() - # 去除末尾的\t分隔符 - start_times = [start_time[:-1] for start_time in start_times] - result_list = [] - for pattern in Constant.PATTERN_DICT.keys(): - result_list.extend(self.find_all_sub_lists(op_type_list, duration_list, start_times, pattern)) - data_frame = pd.DataFrame(result_list) - data_frame.columns = ["pattern_name", "pattern", "len", "count", "duration sum(us)", "op durations(us)", - "index", "first_timestamp"] - return data_frame - - @staticmethod - def find_all_sub_lists(op_type_list, duration_list, start_times, expect_sub_list): - # 创建一个空字典,用来存储子列表和它们的出现次数和起始位置 - len_sub_list = len(expect_sub_list) - expect_sub_list = tuple(expect_sub_list) - sublist_dict = {} - # 遍历列表,从每个位置开始,取长度为N的子列表 - for i in range(len(op_type_list) - len_sub_list + 1): - sublist = tuple(op_type_list[i:i + len_sub_list]) - if sublist != expect_sub_list: - continue - # 如果子列表已经在字典中,就增加它的出现次数,否则就初始化为1 - if sublist in sublist_dict: - # count - sublist_dict[sublist][0] += 1 - # index - sublist_dict[sublist][1].append(i) - # total duration - sublist_dict[sublist][2] += sum(duration_list[i:i + len_sub_list]) - # duration - zip_data = zip(sublist_dict[sublist][3], duration_list[i:i + len_sub_list]) - sublist_dict[sublist][3] = [a + b for a, b in zip_data] - else: - sublist_dict[sublist] = [1, [i], sum(duration_list[i:i + len_sub_list]), - duration_list[i:i + len_sub_list], len_sub_list, start_times[i]] - # 创建一个空列表,用来存储所有重复的子列表 - repeated_sublists = [] - for sublist, (count, index, duration_sum, op_durations, sublist_len, first_time) in sublist_dict.items(): - pattern_name = Constant.PATTERN_DICT.get(sublist, "unknown") - op_durations = [round(num, 2) for num in op_durations] - repeated_sublists.append([pattern_name, sublist, sublist_len, count, - duration_sum, op_durations, index, first_time]) - if len(sublist_dict) == 0: - pattern_name = Constant.PATTERN_DICT.get(expect_sub_list, "unknown") - repeated_sublists.append([pattern_name, expect_sub_list, 0, 0, 0, 0, 0, 0]) - # 返回所有重复的子列表 - return repeated_sublists diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/json_analyzer.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/json_analyzer.py deleted file mode 100644 index fd2a72ffa3..0000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused/json_analyzer.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pandas as pd - -from common_func_advisor.trace_view_json import TraceViewJson - - -class JSONAnalyzer(object): - def __init__(self, path): - self._path = path - - def get_custom_code(self, data: pd.DataFrame, ts_col: str, output_col: str): - trace_json = TraceViewJson(self._path) - callstacks = pd.DataFrame(columns=[output_col]) - - for i, row in data.iterrows(): - if ts_col not in data.columns.tolist(): - print("[ERROR] No {} col found in data columns.".format(ts_col)) - return callstacks - timestamp = row[ts_col] - flow_event = trace_json.get_torch_2_npu_flow_event(timestamp) - if not flow_event.valid(): - print("[ERROR] Get flow event failed for pattern {}.".format(row['pattern'])) - callstacks.loc[i] = "" - continue - flow_event_s_key = flow_event.s_point_ts - python_dur_events = trace_json.get_python_dur_events_contain_ts(flow_event_s_key) - if not python_dur_events: - print("[ERROR] No python dur event found for pattern {}.".format(row['pattern'])) - callstacks.loc[i] = "" - continue - # 保持新老版本callstack兼容性 - if python_dur_events[0].args.get("Call stack"): - # 旧版本 - callstack = python_dur_events[0].args.get("Call stack").split(";") - else: - python_dur_events.sort(key=lambda e: e.ts) - # 新版本 - callstack = [event.name for event in python_dur_events if event.cat == "python_function"] - callstack_str = "\n".join(callstack) - callstacks.loc[i] = callstack_str - return callstacks diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py deleted file mode 100644 index 7bcbed5a75..0000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import functools -from typing import Dict - -from common_func_advisor.constant import Constant -from common_func_advisor.constant import CoreType -from common_func_advisor.constant import PerfColor - - -class OpPerfFactory: - @classmethod - def build(cls, op_row: Dict): - if op_row.get(Constant.TITLE.TASK_TYPE) == CoreType.AIV: - return VecOpPerf(op_row) - elif op_row.get(Constant.TITLE.TASK_TYPE) == CoreType.AIC: - return CubeOpPerf(op_row) - else: - return OpPerf(op_row) - - -class OpPerf: - def __init__(self, op_row: Dict): - if "OP Type" in op_row.keys(): - Constant.update_title() - self.row = op_row - self.model_name = op_row.get("Model Name") - self.model_id = op_row.get("Model ID") - self.task_id = op_row.get("Task ID") - self.stream_id = op_row.get("Stream ID") - self.infer_id = op_row.get("Infer ID") - self.op_name = op_row.get("Name") - self.op_type = op_row.get("Type") - self.task_type = op_row.get("Accelerator Core") - self.task_start_time = op_row.get("Start Time(us)") - self.task_duration = op_row.get("Duration(us)") - self.task_wait_time = op_row.get("Wait Time(us)") - self.block_dim = op_row.get("Block Dim") - self.mix_block_dim = op_row.get("Mix Block Dim") - - self.hf32_eligible = op_row.get("HF32 Eligible") - self.input_shapes = op_row.get("Input Shapes") - self.input_data_types = op_row.get("Input Data Types") - self.input_formats = op_row.get("Input Formats") - self.output_shapes = op_row.get("Output Shapes") - self.output_data_types = op_row.get("Output Data Types") - self.output_formats = op_row.get("Output Formats") - self.context_id = op_row.get("Context ID") - self.aicore_time = op_row.get("aicore_time(us)") - self.aic_total_cycles = op_row.get("aic_total_cycles") - - self.aic_mac_time = op_row.get("aic_mac_time(us)") - self.aic_mac_ratio = op_row.get("aic_mac_ratio") - self.aic_scalar_time = op_row.get("aic_scalar_time(us)") - self.aic_scalar_ratio = op_row.get("aic_scalar_ratio") - self.aic_mte1_time = op_row.get("aic_mte1_time(us)") - self.aic_mte1_ratio = op_row.get("aic_mte1_ratio") - self.aic_mte2_time = op_row.get("aic_mte2_time(us)") - self.aic_mte2_ratio = op_row.get("aic_mte2_ratio") - self.aic_fixpipe_time = op_row.get("aic_fixpipe_time(us)") - self.aic_fixpipe_ratio = op_row.get("aic_fixpipe_ratio") - self.aic_icache_miss_rate = op_row.get("aic_icache_miss_rate") - self.aiv_time = op_row.get("aiv_time(us)") - self.aiv_total_cycles = op_row.get("aiv_total_cycles") - self.aiv_vec_time = op_row.get("aiv_vec_time(us)") - self.aiv_vec_ratio = op_row.get("aiv_vec_ratio") - self.aiv_scalar_time = op_row.get("aiv_scalar_time(us)") - self.aiv_scalar_ratio = op_row.get("aiv_scalar_ratio") - self.aiv_mte2_time = op_row.get("aiv_mte2_time(us)") - - self.aiv_mte2_ratio = op_row.get("aiv_mte2_ratio") - self.aiv_mte3_time = op_row.get("aiv_mte3_time(us)") - self.aiv_mte3_ratio = op_row.get("aiv_mte3_ratio") - self.aiv_icache_miss_rate = op_row.get("aiv_icache_miss_rate") - self.cube_utilization = op_row.get("cube_utilization( %)") - - @staticmethod - def get_dtype_size(dtype_str: str): - return Constant.DTYPE_SIZE_MAP.get(dtype_str.lower(), 0) - - @staticmethod - def get_element_count(shape: list): - return functools.reduce(lambda x, y: int(x) * int(y), shape) - - @staticmethod - def shape_to_tuple(shape_str: str) -> tuple: - if not isinstance(shape_str, str): - return [] - shape_str = shape_str.strip('"') - split_shape = shape_str.strip(';') - if not split_shape: - return [] - pairs = split_shape.split(';') - shape_result = [] - for pair in pairs: - pair = pair.strip(";") - elements = pair.split(',') - elements = tuple(int(element) if "" != element else 0 for element in elements) - shape_result.append(elements) - return tuple(shape_result) - - @staticmethod - def dtype_to_tuple(dtypes_str: str) -> tuple: - if not isinstance(dtypes_str, str): - return [] - dtypes_str = dtypes_str.strip('"') - split_dtypes = dtypes_str.strip(';') - if not split_dtypes: - return [] - pairs = split_dtypes.split(';') - return tuple(pairs) - - def get_mac_ratio(self): - return self.aic_mac_ratio - - def get_size(self, shapes_str, dtypes_str): - shapes = self.shape_to_tuple(shapes_str) - dtypes = self.dtype_to_tuple(dtypes_str) - if len(shapes) > len(dtypes): - print(f"[ERROR] The size of shape is greater than that of dtypes.") - return 0 - if len(shapes) < len(dtypes): - shapes = list(shapes) - shapes.extend([(1,)] * (len(dtypes) - len(shapes))) - all_size = 0 - for index, shape in enumerate(shapes): - element_count = self.get_element_count(shape) - dtype_size = self.get_dtype_size(dtypes[index]) - all_size += element_count * dtype_size - return all_size - - def get_calc_size(self): - # input and output bytes (MB) - if not self.input_shapes or not self.output_shapes: - print("[ERROR] There is no tensor data, do not assess vector op performance.") - return 0 - intput_size = self.get_size(self.input_shapes, self.input_data_types) - output_size = self.get_size(self.output_shapes, self.output_data_types) - return (intput_size + output_size) / (Constant.BYTE_UNIT_TRANS * Constant.BYTE_UNIT_TRANS) - - def get_throughput(self): - # throughput(GB/s) - if not self.task_duration or abs(self.task_duration) < 1e-6: - print("[ERROR] There is no task_duration, do not assess vector op performance.") - return 0 - return self.row[Constant.TITLE.SIZE] / Constant.BYTE_UNIT_TRANS / self.task_duration * Constant.UNIT_TRANS * Constant.UNIT_TRANS - - def get_perf_color(self): - return PerfColor.WHITE - - def update(self): - self.row[Constant.TITLE.SIZE] = self.get_calc_size() - self.row[Constant.TITLE.THROUGHPUT] = self.get_throughput() - self.row[Constant.TITLE.COLOR] = self.get_perf_color().name - return self.row - - -class VecOpPerf(OpPerf): - def get_perf_color(self) -> PerfColor: - throughput = self.row[Constant.TITLE.THROUGHPUT] - op_duration = self.task_duration - tp_threshold = Constant.TP_THRESHOLD - if throughput == 0: - return PerfColor.WHITE - if throughput < tp_threshold / 2 and op_duration > 20: - return PerfColor.RED - elif tp_threshold / 2 <= throughput < tp_threshold: - return PerfColor.YELLOW - else: - return PerfColor.GREEN - - -class CubeOpPerf(OpPerf): - def get_perf_color(self) -> PerfColor: - aic_mac_ratio = self.get_mac_ratio() - if not aic_mac_ratio: - print("[WARNING] There is no aic_mac_ratio, do not assess cube op performance.") - return PerfColor.WHITE - elif aic_mac_ratio < 0.6: - return PerfColor.RED - elif 0.6 <= aic_mac_ratio < 0.8: - return PerfColor.YELLOW - else: - return PerfColor.GREEN diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py deleted file mode 100644 index fd5610bbbb..0000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_fused_advice.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from abc import ABC - -import pandas as pd - -from compute_advice.compute_advice_base import ComputeAdviceBase -from compute_advice.npu_fused.csv_analyzer import CSVAnalyzer -from compute_advice.npu_fused.json_analyzer import JSONAnalyzer - - -class NpuFusedAdvice(ComputeAdviceBase, ABC): - - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.cur_data = dict() - self.cur_bottleneck = str() - self.cur_advice = str() - self.kernel_details_path = "" - self.call_stack = None - - def run(self): - if not self.path_check(): - return self.output_format_data - self.process() - self.output() - return self.output_format_data - - def process(self): - csv_analyzer = CSVAnalyzer(self.kernel_details_path) - all_pattern_data = csv_analyzer.process() - all_pattern_data = all_pattern_data.sort_values(by='duration sum(us)', ascending=False) - filter_data = all_pattern_data.get(all_pattern_data.get("duration sum(us)", 0) > 0) - if not self.has_callstack(): - print("[Warning] No call stack info found, advice will be incomplete") - self.cur_data = filter_data - else: - json_analyzer = JSONAnalyzer(self.trace_view_path) - custom_code = json_analyzer.get_custom_code(filter_data, "first_timestamp", "custom code") - self.cur_data = pd.concat([filter_data, custom_code], axis=1) - op_num = len(self.cur_data.index) - op_dur = filter_data["duration sum(us)"].sum() - if op_num > 0: - index = 0 - self.cur_bottleneck = f"The computing time of fusable op is {round(op_dur, 2)} ms." - self.cur_advice = "" - for _, row in self.cur_data.iterrows(): - advice = f"Advice {index}:\n" - cur_op = "[" + ", ".join(row.loc["pattern"]) + "]" - npu_fused_op = row.loc["pattern_name"] - advice += f"Replace {cur_op} with {npu_fused_op}. " - if self.call_stack: - advice += f"This pattern first happened in: \n{row['custom code']}" - if index != op_num - 1: - advice += "\n" - index += 1 - self.cur_advice += advice diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_slow_advice.py b/profiler/advisor/advisor_backend/compute_advice/npu_slow_advice.py deleted file mode 100644 index caff1c792c..0000000000 --- a/profiler/advisor/advisor_backend/compute_advice/npu_slow_advice.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from abc import ABC -import multiprocessing - -import pandas as pd - -from compute_advice.compute_advice_base import ComputeAdviceBase -from compute_advice.npu_fused.op_perf import OpPerfFactory -from common_func_advisor.constant import Constant -from common_func_advisor.constant import PerfColor -from advisor_backend.common_func_advisor.trace_view_json import TraceViewJson - - -class NpuSlowAdvice(ComputeAdviceBase, ABC): - OP_PERF_SHEET = "op_perf" - - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.kernel_details_path = "" - self.data = pd.DataFrame() - - @staticmethod - def save_to_excel(data: pd.DataFrame, file_path: str) -> None: - writer = pd.ExcelWriter(file_path, engine="xlsxwriter", mode="w") - data.index.name = Constant.TITLE.INDEX - data.to_excel(writer, index=True, sheet_name=NpuSlowAdvice.OP_PERF_SHEET) - NpuSlowAdvice.color_sheet(data, writer.book, writer.sheets[NpuSlowAdvice.OP_PERF_SHEET]) - writer.sheets[NpuSlowAdvice.OP_PERF_SHEET].freeze_panes = "A2" - writer.close() - - @staticmethod - def color_sheet(data: pd.DataFrame, workbook, worksheet): - color_rgb = { - PerfColor.GREEN.name: workbook.add_format({'bg_color': '#C6EFCE'}), - PerfColor.YELLOW.name: workbook.add_format({'bg_color': '#FFEB9C'}), - PerfColor.RED.name: workbook.add_format({'bg_color': '#FFC7CE'}), - } - for row in data.iterrows(): - color = row[1][Constant.TITLE.COLOR] - fill_format = color_rgb.get(color) - if not fill_format: - continue - worksheet.set_row(row[0] + 1, None, fill_format) - - @staticmethod - def update_op_row(row: tuple): - return OpPerfFactory.build(row[1]).update() - - def get_call_stack(self, data: pd.DataFrame, index_id: int, ts_col: str) -> str: - if not self.has_callstack(): - print("There is no call stack info, please set 'with_stack=True'") - return "" - trace_json = TraceViewJson(self.trace_view_path) - return trace_json.get_call_stack(data, index_id, ts_col) - - def run(self): - if not self.path_check(): - return self.data - self.process() - return self.data - - def process(self): - self.data = pd.read_csv(self.kernel_details_path, dtype={"Start Time(us)": str}) - # 去除末尾的\t分隔符 - self.data["Start Time(us)"] = self.data["Start Time(us)"].apply(lambda x: x[:-1]) - pool = multiprocessing.Pool(multiprocessing.cpu_count()) - result = pool.map(self.update_op_row, self.data.iterrows()) - pool.close() - self.data = pd.DataFrame(result) diff --git a/profiler/advisor/advisor_backend/interface.py b/profiler/advisor/advisor_backend/interface.py deleted file mode 100644 index 3e20c26d4d..0000000000 --- a/profiler/advisor/advisor_backend/interface.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "advisor_backend")) -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "compare_tools")) -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "cluster_analyse")) -from common_func_advisor.constant import Constant -from advisor_backend.advice_factory.cluster_advice_factory import ClusterAdviceFactory -from advisor_backend.advice_factory.compute_advice_factory import ComputeAdviceFactory -from advisor_backend.advice_factory.timeline_advice_factory import TimelineAdviceFactory -from advisor_backend.advice_factory.overall_advice_factory import OverallAdviceFactory - - -class Interface: - def __init__(self, collection_path: str): - self.collection_path = os.path.realpath(collection_path) - self._factory_controller = FactoryController(collection_path) - - def get_data(self: any, mode: str, advice: str, **kwargs): - if len(mode) > Constant.MAX_INPUT_MODE_LEN or len(advice) > Constant.MAX_INPUT_ADVICE_LEN: - msg = '[ERROR]Input Mode is illegal.' - raise RuntimeError(msg) - factory = self._factory_controller.create_advice_factory(mode, kwargs.get("input_path", "")) - return factory.produce_advice(advice, kwargs) - - -class FactoryController: - FACTORY_LIB = { - Constant.CLUSTER: ClusterAdviceFactory, - Constant.COMPUTE: ComputeAdviceFactory, - Constant.TIMELINE: TimelineAdviceFactory, - Constant.OVERALL: OverallAdviceFactory - } - - def __init__(self, collection_path: str): - self.collection_path = os.path.realpath(collection_path) - self.temp_input_path = None - - def create_advice_factory(self, mode: str, input_path: str): - collection_path = input_path if input_path else self.collection_path - return self.FACTORY_LIB.get(mode)(collection_path) - - -if __name__ == "__main__": - Interface() diff --git a/profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py b/profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py deleted file mode 100644 index 9fb347d823..0000000000 --- a/profiler/advisor/advisor_backend/overall_advice/overall_summary_advice.py +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -from advisor_backend.advice_base import AdviceBase -from compare_backend.utils.constant import Constant -from compare_interface.comparison_interface import ComparisonInterface - - -class OverallSummaryAdvice(AdviceBase): - advice_map = { - "Computing Time": "if you want more detailed advice please go to compute_perf_analysis.ipynb.", - "Uncovered Communication Time": "if you want more detailed advice please go to cluster_perf_analysis.ipynb.", - "Free Time": "if you want more detailed advice please go to timeline_perf_analysis.ipynb." - } - time_name_map = { - "Computing Time": "computing", - "Uncovered Communication Time(Wait Time)": "communication", - "Free Time": "free", - 'Cube Time(Num)': 'Cube Time', - 'Vector Time(Num)': 'Vector Time', - 'Flash Attention Time(Forward)(Num)': 'Flash Attention Time(Forward)', - 'Flash Attention Time(Backward)(Num)': 'Flash Attention Time(Backward)', - 'Other Time': "Other Computing Time", - 'SDMA Time(Num)': 'SDMA Time' - } - performance_time_dict = { - "Computing Time": ['Cube Time(Num)', 'Vector Time(Num)', 'Flash Attention Time(Forward)(Num)', - 'Flash Attention Time(Backward)(Num)', 'Other Time'], - "Uncovered Communication Time(Wait Time)": [], - "Free Time": ['SDMA Time(Num)'] - } - - def __init__(self, collection_path: str, kwargs: dict): - super().__init__(collection_path) - self.base_collection_path = kwargs.get("base_collection_path", "") - self._has_base_collection = False - self._is_minimal_profiling = False - self.cur_data = {} - self.cur_bottleneck = {} - self.cur_advices = "" - self._headers = [] - self._base_data = [] - self._comparison_data = [] - - @staticmethod - def split_duration_and_num(time_value: str) -> tuple: - split_data = time_value.split("s") # time value example: 0.229s(1756) - duration, num = 0.0, None - if len(split_data) >= 2: - try: - num = int(split_data[1].strip("()")) - except ValueError: - pass - if len(split_data) >= 1: - try: - duration = float(split_data[0]) - except ValueError: - print(f"[WARNING] Invalid time value: {time_value}.") - return duration, num - - @staticmethod - def calculate_ratio(dividend, divisor): - if not divisor: - return float("inf") - return dividend / divisor - - def run(self): - if self.path_check(): - self.process() - self.output() - self.identify_bottleneck() - return self.output_format_data - - def path_check(self): - if self.base_collection_path: - if os.path.exists(self.base_collection_path): - self._has_base_collection = True - else: - print(f"[WARNING] Invalid path which not exists: {self.base_collection_path}.") - return os.path.exists(self.collection_path) - - def process(self): - base_collection_path = self.base_collection_path if self._has_base_collection else self.collection_path - result_data = ComparisonInterface(base_collection_path, self.collection_path).compare(Constant.OVERALL_COMPARE) - for data in result_data.values(): - self._headers = data.get("headers", []) - rows = data.get("rows", []) - if len(rows) == 2: - self._base_data = rows[0] - self._comparison_data = rows[1] - if not self._headers or not self._comparison_data: - return - self._is_minimal_profiling = 'E2E Time(Not minimal profiling)' not in self._headers - if self._has_base_collection: - self.cur_data["comparison_result"] = result_data - time_category_dict = {} - for time_category, time_list in self.performance_time_dict.items(): - time_value = self.get_time_value(time_category, self._comparison_data) - if time_value == Constant.INVALID_VALUE: - continue - duration, _ = self.split_duration_and_num(time_value) - time_category = time_category.split("(")[0] - time_category_dict[time_category] = duration - self.get_sub_category_time(time_category, time_list, duration) - self.cur_data["overall_data"] = time_category_dict - - def get_time_value(self, header_name: str, data_list: list): - try: - data_index = self._headers.index(header_name) - except ValueError: - return Constant.INVALID_VALUE - try: - time_value = data_list[data_index] - except IndexError: - return Constant.INVALID_VALUE - return time_value - - def get_sub_category_time(self, category: str, time_list: list, total_duration: float): - sub_time_dict = {} - for time_name in time_list: - time_value = self.get_time_value(time_name, self._comparison_data) - if time_value == Constant.INVALID_VALUE: - continue - sub_time_dict.setdefault(f"{category} Subtype", []).append(self.time_name_map.get(time_name, "")) - duration, num = self.split_duration_and_num(time_value) - sub_time_dict.setdefault(f"Duration(s)", []).append(duration) - sub_time_dict.setdefault(f"Duration Ratio", []).append( - "{:.2%}".format(self.calculate_ratio(duration, total_duration))) - sub_time_dict.setdefault(f"Kernel Number", []).append(num) - self.cur_data[self.time_name_map.get(category)] = sub_time_dict - - def identify_bottleneck(self): - overall_data = self.cur_data.get("overall_data") - if not overall_data: - return - e2e_time = sum([data for data in overall_data.values()]) - overall_bottleneck = f"The Model E2E Time is {e2e_time}s.\n" - comparison_bottleneck = "" - for time_type, time_value in overall_data.items(): - # add subtype time bottleneck - advice = self.advice_map.get(time_type, "") - self.cur_bottleneck[self.time_name_map.get(time_type)] = f"{time_type} is {time_value}s.\n{advice}" - # add overall bottleneck - overall_bottleneck += f" -- {time_type} is {time_value}s\n" - if time_type == "Free Time" and self._is_minimal_profiling and self.calculate_ratio(time_value, - e2e_time) > 0.1: - overall_bottleneck += "percentage of free time exceed the threshold 10%." - if not self._has_base_collection: - continue - # add comparison bottleneck - base_duration, _ = self.split_duration_and_num(self.get_time_value(time_type, self._base_data)) - if time_value > base_duration: - ratio = "{:.2%}".format(self.calculate_ratio(time_value - base_duration, base_duration)) - comparison_bottleneck += f"{time_type} exceeds the benchmark by {ratio}\n" - self.cur_bottleneck["overall_data"] = overall_bottleneck - self.cur_bottleneck["comparison_result"] = comparison_bottleneck - - def output(self): - self.output_format_data[self.DATA] = self.cur_data - self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck - self.output_format_data[self.ADVICE] = self.cur_advices diff --git a/profiler/advisor/advisor_backend/prof_bean_advisor/__init__.py b/profiler/advisor/advisor_backend/prof_bean_advisor/__init__.py deleted file mode 100644 index 8400fd5ecd..0000000000 --- a/profiler/advisor/advisor_backend/prof_bean_advisor/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/prof_bean_advisor/cluster_step_trace_time_bean.py b/profiler/advisor/advisor_backend/prof_bean_advisor/cluster_step_trace_time_bean.py deleted file mode 100644 index b108fc77a3..0000000000 --- a/profiler/advisor/advisor_backend/prof_bean_advisor/cluster_step_trace_time_bean.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class ClusterStepTraceTimeBean: - STEP = "Step" - TYPE = "Type" - INDEX = "Index" - COMPUTING = "Computing" - COMMUNICATION = "Communication(Not Overlapped)" - FREE = "Free" - - def __init__(self, data: dict): - self._data = data - - @property - def step(self) -> str: - return self._data.get(self.STEP, '') - - @property - def type(self) -> str: - return self._data.get(self.TYPE, '') - - @property - def index(self) -> int: - try: - return int(self._data.get(self.INDEX)) - except ValueError as e: - msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Index'." - raise ValueError(msg) from e - - @property - def compute(self) -> float: - try: - return float(self._data.get(self.COMPUTING, '')) - except ValueError as e: - msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Computing'." - raise ValueError(msg) from e - - @property - def communication(self) -> float: - try: - return float(self._data.get(self.COMMUNICATION, '')) - except ValueError as e: - msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Communication'." - raise ValueError(msg) from e - - @property - def free(self) -> float: - try: - return float(self._data.get(self.FREE, '')) - except ValueError as e: - msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Free'." - raise ValueError(msg) from e - diff --git a/profiler/advisor/advisor_backend/timeline_advice/__init__.py b/profiler/advisor/advisor_backend/timeline_advice/__init__.py deleted file mode 100644 index 8400fd5ecd..0000000000 --- a/profiler/advisor/advisor_backend/timeline_advice/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/advisor_backend/timeline_advice/op_schedule_advice.py b/profiler/advisor/advisor_backend/timeline_advice/op_schedule_advice.py deleted file mode 100644 index 9e492b2156..0000000000 --- a/profiler/advisor/advisor_backend/timeline_advice/op_schedule_advice.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from decimal import Decimal -from common_func_advisor.constant import Constant -from timeline_advice.timeline_advice_base import TimelineAdviceBase - - -class OpScheduleAdvice(TimelineAdviceBase): - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.cur_data = list() - self.cur_bottleneck = str() - self.cur_advice = str() - - def run(self): - if not self.path_check(): - return self.output_format_data - self.preparse() - self.process() - self.output() - return self.output_format_data - - def process(self): - cpt_data = self.preparse_data[self.PREPARSE_TYPE.OVERLAP_CPT] - free_data = self.preparse_data[self.PREPARSE_TYPE.OVERLAP_FREE] - if not cpt_data or not free_data: - print("[ERROR] Fail to find Overlap data.") - return - - op_dur = [entry.get("dur", 0) for entry in cpt_data] - op_free = [0.0] * len(cpt_data) - merge_data = list() - merge_data.extend(cpt_data) - merge_data.extend(free_data) - merge_data.sort(key=lambda x : Decimal(x.get("ts"))) - idx = free_idx = 0 - while idx < len(merge_data) and free_idx < len(op_free): - entry = merge_data[idx] - entry_name = entry.get("name") - if entry_name == 'Free': - op_free[free_idx] = merge_data[idx].get('dur') - elif entry_name == 'Computing': - free_idx += 1 - idx += 1 - self.cur_data.append(op_dur) - self.cur_data.append(op_free) - free_ratio, cpt_ratio, _ = self.get_ratio() - if free_ratio < 0.2: - return - self.cur_bottleneck = f"NPU Utilication: {round(free_ratio * 100, 2)}%, " \ - f"NPU Free Utilization: {round(cpt_ratio * 100, 2)}%." - if len(self.preparse_data[self.PREPARSE_TYPE.SYNCHRONIZE]) > 1: - self.cur_advice = f"Device synchronize {len(self.preparse_data[self.PREPARSE_TYPE.SYNCHRONIZE])} times, " \ - "try to reduce synchronization statements to alleviate the bottleneck of operator delivery.\n" - small_op_num = self.small_op_block(op_free, op_dur) - small_op_ratio = small_op_num / len(op_dur) if op_dur else 0.0 - if small_op_ratio > Constant.SMALL_OP_NUM_RATIO: - self.cur_advice += "There are too many small operators, you can increase the batch size appropriately." - - def small_op_block(self, op_frees, op_durs): - small_op_num = 0 - for op_free, op_dur in zip(op_frees, op_durs): - if op_free > op_dur * Constant.SMALL_OP_DUR_RATIO: - small_op_num += 1 - return small_op_num - - def get_ratio(self): - cpt_data = self.preparse_data[self.PREPARSE_TYPE.OVERLAP_CPT] - free_data = self.preparse_data[self.PREPARSE_TYPE.OVERLAP_FREE] - cmu_data = self.preparse_data[self.PREPARSE_TYPE.OVERLAP_CMU] - cpt_time = sum([x.get("dur", 0) for x in cpt_data]) - free_time = sum([x.get("dur", 0) for x in free_data]) - cmu_time = sum([x.get("dur", 0) for x in cmu_data]) - total_time = cpt_time + free_time + cmu_time - if total_time > 0.0: - return (free_time / total_time, cpt_time / total_time, cmu_time / total_time) - return (0.0, 0.0, 0.0) diff --git a/profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py b/profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py deleted file mode 100644 index dee2e7ba56..0000000000 --- a/profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from timeline_advice.timeline_advice_base import TimelineAdviceBase - - -class OptimizerAdvice(TimelineAdviceBase): - OPTIMIZER_MAP = { - "Optimizer.step#SGD.step": "torch_npu.optim.NpuFusedSGD", - "Optimizer.step#Adadelta.step": "torch_npu.optim.NpuFusedAdadelta", - "Optimizer.step#Lamb.step": "torch_npu.optim.NpuFusedLamb", - "Optimizer.step#Adam.step": "torch_npu.optim.NpuFusedAdam", - "Optimizer.step#AdamW.step": "torch_npu.optim.NpuFusedAdamW", - "Optimizer.step#AdamP.step": "torch_npu.optim.NpuFusedAdamP", - "Optimizer.step#BertAdam.step": "torch_npu.optim.NpuFusedBertAdam", - "Optimizer.step#RMSprop.step": "torch_npu.optim.NpuFusedRMSprop", - "Optimizer.step#RMSpropTF.step": "torch_npu.optim.NpuFusedRMSpropTF", - } - - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.cur_data = list() - self.cur_bottleneck = str() - self.cur_advice = str() - - def run(self): - if not self.path_check(): - return self.output_format_data - self.preparse() - self.process() - self.output() - return self.output_format_data - - def process(self): - if not self.preparse_data[self.PREPARSE_TYPE.OPTIMIZER]: - return - - self.cur_data = list(set([entry.get("name", None) for entry in self.preparse_data[self.PREPARSE_TYPE.OPTIMIZER]])) - for index, opt_name in enumerate(self.cur_data): - self.cur_advice += f"You can choose {self.OPTIMIZER_MAP.get(opt_name)} to replace the current Optimizer: {opt_name}." - if index != len(self.cur_data) - 1: - self.cur_advice += "\n" - self.cur_bottleneck = self.cur_advice diff --git a/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py b/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py deleted file mode 100644 index 4c7ac96cd2..0000000000 --- a/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import abstractmethod -from collections import defaultdict -import json -import os - -from advice_base import AdviceBase -from common_func.file_manager import FileManager - - -class TimelineAdviceBase(AdviceBase): - class PREPARSE_TYPE: - OPTIMIZER = 0 - STEP = 1 - OVERLAP_CPT = 2 - OVERLAP_FREE = 3 - OVERLAP_CMU = 4 - ENQUEUE = 5 - DEQUEUE = 6 - HOST_TO_DEVICE = 7 - SYNCHRONIZE = 8 - - def __init__(self, collection_path: str): - super().__init__(collection_path) - self.trace_view_path = "" - self.has_preparse = False - self.preparse_data = defaultdict(list) - self.entry_map = { - 'Computing': self.PREPARSE_TYPE.OVERLAP_CPT, - 'Free': self.PREPARSE_TYPE.OVERLAP_FREE, - 'AscendCL@aclrtSynchronizeDevice': self.PREPARSE_TYPE.SYNCHRONIZE - } - - def path_check(self): - """ - check whether input path is valid - """ - if not os.path.exists(self.collection_path): - print("[ERROR] Path: {} is not exist.".format(self.collection_path)) - return False - if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"): - self.trace_view_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", "trace_view.json") - if not os.path.exists(self.trace_view_path): - print("[ERROR] trace_view.json is not exist in the Path: {}.".format(os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT"))) - return False - elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path) == "trace_view.json": - self.trace_view_path = self.collection_path - else: - print("[ERROR] Please input ascend_pt or trace_view.json.") - return False - print("[INFO] Start to analyse the target file: {}".format(self.trace_view_path)) - return True - - @abstractmethod - def run(self): - """ - analyze profiling data and advice - """ - - @abstractmethod - def output(self): - """ - output relevant data - """ - self.output_format_data[self.DATA] = self.cur_data - self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck - self.output_format_data[self.ADVICE] = self.cur_advice - - def preparse(self): - if self.has_preparse: - return - json_reader = FileManager.read_json_file(self.trace_view_path) - if not isinstance(json_reader, list): - return - for entry in json_reader: - name = entry.get("name", None) - if not name: - continue - if name.startswith("Optimizer.step#") and name.endswith(".step"): - self.preparse_data[self.PREPARSE_TYPE.OPTIMIZER].append(entry) - elif name.startswith("ProfilerStep#"): - self.preparse_data[self.PREPARSE_TYPE.STEP].append(entry) - elif name in self.entry_map: - self.preparse_data[self.entry_map[name]].append(entry) - self.has_preparse = True diff --git a/profiler/advisor/advisor_backend/overall_advice/__init__.py b/profiler/advisor/analyzer/__init__.py similarity index 100% rename from profiler/advisor/advisor_backend/overall_advice/__init__.py rename to profiler/advisor/analyzer/__init__.py diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py new file mode 100644 index 0000000000..f698865266 --- /dev/null +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -0,0 +1,16 @@ +from abc import abstractmethod, ABCMeta + + +class BaseAnalyzer(metaclass=ABCMeta): + + @abstractmethod + def optimize(self): + pass + + @abstractmethod + def make_record(self): + pass + + @abstractmethod + def make_render(self): + pass diff --git a/profiler/advisor/analyzer/communication/__init__.py b/profiler/advisor/analyzer/communication/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/communication/bandwidth/__init__.py b/profiler/advisor/analyzer/communication/bandwidth/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/communication/environment/__init__.py b/profiler/advisor/analyzer/communication/environment/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/computing/__init__.py b/profiler/advisor/analyzer/computing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/computing/aicpu/__init__.py b/profiler/advisor/analyzer/computing/aicpu/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/computing/bound/__init__.py b/profiler/advisor/analyzer/computing/bound/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/computing/op_compile/__init__.py b/profiler/advisor/analyzer/computing/op_compile/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/dataloader/__init__.py b/profiler/advisor/analyzer/dataloader/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/overall/__init__.py b/profiler/advisor/analyzer/overall/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/scheduling/__init__.py b/profiler/advisor/analyzer/scheduling/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/scheduling/free_event/__init__.py b/profiler/advisor/analyzer/scheduling/free_event/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/scheduling/fusion_ops/__init__.py b/profiler/advisor/analyzer/scheduling/fusion_ops/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/cluster_perf_analysis.ipynb b/profiler/advisor/cluster_perf_analysis.ipynb deleted file mode 100644 index 39e389dd3a..0000000000 --- a/profiler/advisor/cluster_perf_analysis.ipynb +++ /dev/null @@ -1,625 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "id": "initial_id", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T13:31:25.022339600Z", - "start_time": "2023-11-21T13:31:25.016155200Z" - } - }, - "outputs": [], - "source": [ - "from advisor_backend.interface import Interface\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "id": "57d17a21205c3c5e", - "metadata": { - "collapsed": false - }, - "source": [ - "# 集群调优分析\n", - "## 1. 集群分析的数据准备\n", - "首先我们当前支持PyTorch多卡大模型的集群分析,您需要输入集群分析的profiling_path路径,例如: \n", - "--{profiling_path} \n", - " -- xxxx_ascend_pt \n", - " -- xxxx_ascend_pt \n", - " -- xxxx_ascend_pt \n", - " ...... \n", - " -- xxxx_ascend_pt \n", - "里面每张卡的profiling文件都是ascend_pt结尾的文件。 \n", - "\n", - "## 2. 集群分析解决的问题 \n", - "当前的功能主要有四项: \n", - "1). 识别多卡间的计算慢卡(根据计算时间等推断) \n", - "2). 识别多卡间的通信慢现象(根据通信链路的带宽判断) \n", - "3). 对多卡间的计算算子进行统计展示(识别不同卡的算子差异) \n", - "4). 展示集群流水并行图(根据时间轴展示多卡间的计算和通信时间) " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "36b7a24cc7ca5da2", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T12:53:38.379699800Z", - "start_time": "2023-11-21T12:53:38.363755900Z" - }, - "collapsed": false - }, - "outputs": [], - "source": [ - "# EDIT THE PROFILING DATA PATH\n", - "cluster_path = \"YOUR PATH\"\n", - "interface = Interface(cluster_path)" - ] - }, - { - "cell_type": "markdown", - "id": "cf832ac2e0dfa30f", - "metadata": { - "collapsed": false - }, - "source": [ - "## 1) 识别慢卡" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "40aac93278dd6e34", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T12:53:41.815599700Z", - "start_time": "2023-11-21T12:53:41.783393700Z" - }, - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO]Cluster has been analyzed because of the existence of cluster analysis output directory.\n", - "[INFO]Skip Cluster analyze backend.\n" - ] - } - ], - "source": [ - "dataset = interface.get_data('cluster', 'slow rank')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "cd3fceda-49f0-439f-9c54-cc31490fc99e", - "metadata": {}, - "outputs": [], - "source": [ - "# EDIT THE DATA TO SHOW WHAT YOU WANT\n", - "data = dataset.get('data')\n", - "words = dataset.get('bottleneck')\n", - "rank_ids = list(data.keys())\n", - "# 柱状图显示属性\n", - "compute_time = [data.get(key, {})[0] for key in rank_ids]\n", - "communication_time = [data.get(key, {})[1] for key in rank_ids]\n", - "free_time = [data.get(key, {})[2] for key in rank_ids]\n", - "# 柱宽\n", - "width = 0.2\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "6a1d82fb-a31b-49ab-a859-6d4bb898c512", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Communication has some issues in the cluster, because the max difference of Communication time has reached 88.476ms. \n", - "Free has some issues in the cluster, because the max difference of Free time has reached 29.224ms. \n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# 设置展示图大小\n", - "fig, ax = plt.subplots(figsize=(10,8))\n", - "\n", - "x = np.arange(len(rank_ids)) # the label locations\n", - "\n", - "rects1 = ax.bar(x - width, compute_time, width, label='Computing')\n", - "rects2 = ax.bar(x, communication_time, width, label='Communication')\n", - "rects3 = ax.bar(x + width, free_time, width, label='Free')\n", - "\n", - "\n", - "# Add some text for labels, title and custom x-axis tick labels, etc.\n", - "ax.set_ylabel('Time(us)')\n", - "ax.set_xlabel('Rank ID')\n", - "ax.set_title('Step Time')\n", - "ax.set_xticks(x)\n", - "ax.set_xticklabels(rank_ids)\n", - "ax.legend()\n", - "print(words)" - ] - }, - { - "cell_type": "markdown", - "id": "3511befaff513e8e", - "metadata": { - "collapsed": false - }, - "source": [ - "## 2)识别通信链路慢" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "2a1e617d2a117125", - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO]Cluster has been analyzed because of the existence of cluster analysis output directory.\n", - "[INFO]Skip Cluster analyze backend.\n" - ] - } - ], - "source": [ - "dataset = interface.get_data('cluster', 'slow link')" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "c8bca314-a8da-4a5b-985a-c36f00154552", - "metadata": {}, - "outputs": [], - "source": [ - "# EDIT THE DATA TO SHOW WHAT YOU WANT\n", - "data = dataset.get('data')\n", - "words = dataset.get('bottleneck')\n", - "rank_ids = list(data.keys())\n", - "# 柱状图显示属性\n", - "sdma_bw = [data.get(key, {}).get(\"SDMA bandwidth(GB/s)\") for key in rank_ids]\n", - "rdma_bw = [data.get(key, {}).get(\"RDMA bandwidth(GB/s)\") for key in rank_ids]\n", - "# 柱宽\n", - "width = 0.4" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "99ef04c9-ec07-4790-bbb6-0de9bf6c99d0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "RDMA bandwidth(GB/s): \n", - "The average is 0.041, while the maximum is 0.041GB/s and the minimum is 0.041GB/s. the difference is 0.0GB/s. \n", - "SDMA bandwidth(GB/s): \n", - "The average is 0.054, while the maximum is 0.056GB/s and the minimum is 0.052GB/s. the difference is 0.003GB/s. \n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# 设置展示图大小\n", - "fig, ax = plt.subplots(figsize=(10,8))\n", - "\n", - "x = np.arange(len(rank_ids)) # the label locations\n", - "\n", - "rects1 = ax.bar(x - width/2, sdma_bw, width, label='SDMA')\n", - "rects2 = ax.bar(x + width/2, rdma_bw, width, label='RDMA')\n", - "\n", - "# Add some text for labels, title and custom x-axis tick labels, etc.\n", - "ax.set_ylabel('Bandwidth(GB/s)')\n", - "ax.set_xlabel('Rank ID')\n", - "ax.set_title('Transport Bandwidth')\n", - "ax.set_xticks(x)\n", - "ax.set_xticklabels(rank_ids)\n", - "ax.legend()\n", - "print(words)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "77d6efa1-48e3-409f-82c4-3e2b3d868898", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "RDMA bandwidth(GB/s): \n", - "The average is 0.041, while the maximum is 0.041GB/s and the minimum is 0.041GB/s. the difference is 0.0GB/s. \n", - "SDMA bandwidth(GB/s): \n", - "The average is 0.054, while the maximum is 0.056GB/s and the minimum is 0.052GB/s. the difference is 0.003GB/s. \n" - ] - } - ], - "source": [ - "print(dataset.get('bottleneck'))" - ] - }, - { - "cell_type": "markdown", - "id": "ce27a1d3-1354-45f7-88d8-dcb8e438b2b2", - "metadata": {}, - "source": [ - "## 3) 分布式卡上的kernel算子统计展示" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "e05774e9-c47e-400f-8421-b4b71bcdcbc4", - "metadata": {}, - "outputs": [], - "source": [ - "dataset = interface.get_data('cluster', 'kernel')" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "e95b6849-1738-4975-929f-734edff5d1c1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
rank idNameInput ShapesInput Data TypesOutput ShapesDuration(us)_meanDuration(us)_varDuration(us)_maxDuration(us)_minDuration(us)_countDuration(us)_sum
00Add\"1024,2,5120;1024,2,5120\"DT_BF16;DT_BF16\"1024,2,5120\"45.01205082.95274855.925535.310816720.1928
10Add\"2,8192,5120;2,8192,5120\"DT_BF16;DT_BF16\"2,8192,5120\"447.183700NaN447.1837447.18371447.1837
20Add\"8192,2,1920;1920\"DT_BF16;DT_BF16\"8192,2,1920\"54.3308501.34284655.245652.64634217.3234
30Add\"8192,2,2560;2560\"DT_BF16;DT_BF16\"8192,2,2560\"75.4853750.76131576.280274.24074301.9415
40Add\";\"FLOAT;FLOAT\"\"1.2008840.0172571.49960.95975060.0442
....................................
144115atomic_memset-1_67_1998432_1_0\"\"UNDEFINED\"\"3.160000NaN3.16003.160013.1600
144215trans_Cast_14\"1\"FLOAT\"1\"1.3900000.0230671.60001.260045.5600
144315trans_Cast_15\"\"INT32\"\"64.44500036.27610070.300059.20004257.7800
144415trans_Cast_4\"1\"FLOAT\"1\"1.5550000.0358571.94001.3200812.4400
144515trans_Cast_5\"\"INT32\"\"62.89500015.58420069.860056.76008503.1600
\n", - "

1446 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " rank id Name Input Shapes \\\n", - "0 0 Add \"1024,2,5120;1024,2,5120\" \n", - "1 0 Add \"2,8192,5120;2,8192,5120\" \n", - "2 0 Add \"8192,2,1920;1920\" \n", - "3 0 Add \"8192,2,2560;2560\" \n", - "4 0 Add \";\" \n", - "... ... ... ... \n", - "1441 15 atomic_memset-1_67_1998432_1_0 \"\" \n", - "1442 15 trans_Cast_14 \"1\" \n", - "1443 15 trans_Cast_15 \"\" \n", - "1444 15 trans_Cast_4 \"1\" \n", - "1445 15 trans_Cast_5 \"\" \n", - "\n", - " Input Data Types Output Shapes Duration(us)_mean Duration(us)_var \\\n", - "0 DT_BF16;DT_BF16 \"1024,2,5120\" 45.012050 82.952748 \n", - "1 DT_BF16;DT_BF16 \"2,8192,5120\" 447.183700 NaN \n", - "2 DT_BF16;DT_BF16 \"8192,2,1920\" 54.330850 1.342846 \n", - "3 DT_BF16;DT_BF16 \"8192,2,2560\" 75.485375 0.761315 \n", - "4 FLOAT;FLOAT \"\" 1.200884 0.017257 \n", - "... ... ... ... ... \n", - "1441 UNDEFINED \"\" 3.160000 NaN \n", - "1442 FLOAT \"1\" 1.390000 0.023067 \n", - "1443 INT32 \"\" 64.445000 36.276100 \n", - "1444 FLOAT \"1\" 1.555000 0.035857 \n", - "1445 INT32 \"\" 62.895000 15.584200 \n", - "\n", - " Duration(us)_max Duration(us)_min Duration(us)_count Duration(us)_sum \n", - "0 55.9255 35.3108 16 720.1928 \n", - "1 447.1837 447.1837 1 447.1837 \n", - "2 55.2456 52.6463 4 217.3234 \n", - "3 76.2802 74.2407 4 301.9415 \n", - "4 1.4996 0.9597 50 60.0442 \n", - "... ... ... ... ... \n", - "1441 3.1600 3.1600 1 3.1600 \n", - "1442 1.6000 1.2600 4 5.5600 \n", - "1443 70.3000 59.2000 4 257.7800 \n", - "1444 1.9400 1.3200 8 12.4400 \n", - "1445 69.8600 56.7600 8 503.1600 \n", - "\n", - "[1446 rows x 11 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "27b75df4-792b-43dc-aa5c-d3c265642c1e", - "metadata": {}, - "outputs": [], - "source": [ - "# 保存到csv查看, 可修改保存路径\n", - "dataset.to_csv('cluster_kernel_details.csv', index=False, sep='\\t')" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## 4) 展示集群流水并行图\n", - "使用说明: \n", - "1). 需要使用Ascend Torch Profiler采集数据,如果需要展示FP和BP需要将activities设置为采集CPU和NPU \n", - "2). rank_ids为要展示的rank id列表,必选参数, 可视化顺序与rank_ids的顺序一致 \n", - "3). worker_num为多进程数量,可选参数,请根据机器配置调整,默认值为机器可用核心数的一半 \n", - "4). 如果没有采集CPU数据,则展示Stage和Bubble的流水图 \n", - "5). 生成的json文件可以在chrome trace中查看 \n", - "\n", - "示例图:\n", - "![pipeline_view](../../profiler/test/resource/pipeline_view.png)" - ], - "metadata": { - "collapsed": false - }, - "id": "ae45826394463cc4" - }, - { - "cell_type": "code", - "outputs": [], - "source": [ - "import json\n", - "\n", - "# rank_ids为要呈现的rank id列表,必选参数\n", - "# 可以使用列表推导式生成需要的rank_ids,最终展示顺序和rank_ids的顺序一致\n", - "# worker_num为多进程数量,可选参数,请根据机器配置调整,默认值为机器可用核心数的一半\n", - "dataset = interface.get_data(\"cluster\", \"pipeline\", rank_ids=[0, 1, 2, 3, 4, 5, 6, 7], worker_num=8)\n", - "\n", - "# 保存json数据,在chrome trace中查看\n", - "with open(\"./pipeline_view.json\", \"w\") as f:\n", - " json.dump(dataset.get(\"data\", []), f)" - ], - "metadata": { - "collapsed": false - }, - "id": "baf66781eccfbca1" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/profiler/advisor/common/__init__.py b/profiler/advisor/common/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py new file mode 100644 index 0000000000..9703e78c00 --- /dev/null +++ b/profiler/advisor/common/constant.py @@ -0,0 +1,106 @@ +# timeline +DEQUEUE = "Dequeue" +DEQUEUE_SEP = "@" +ATEN = "aten" +NPU = "npu" +ATEN_SEP = "::" +OPTIMIZER = "Optimizer" +OPTIMIZER_SEP = "#" +OPTIMIZER_STEP = "step" +ENQUEUE = "enqueue" +TORCH_TO_NPU = "torch_to_npu" +OP_COMPILE_NAME = "AscendCL@aclopCompileAndExecute" +OP_COMPILE_ID = "aclopCompileAndExecute" +MAX_OP_COMPILE_NUM = 20 +ACL_TO_NPU = "acl_to_npu" +TASK_TYPE = "Task Type" +CPU_OP = "cpu_op" +AI_CORE = "AI_CORE" +AI_CPU = "AI_CPU" +CALL_STACKS = "Call stack" +INPUT_DIMS = "Input Dims" +OP_SEP = "-" +MA_ADVISOR_MAX_PROCESSES = 16 +MA_ADVISOR_ANALYZE_PROCESSES = "MA_ADVISOR_ANALYZE_PROCESSES" +TIMELINE_OP_STACKS_DATASET = "timeline_op_stacks_dataset" +TIMELINE_BACKWARD_NO_STACK = "Backward broadcast, without call stacks in profiling." +TIMELINE_ACL_TO_NPU_NO_STACK = "Incoming flow is 'acl_to_npu', without call stacks in profiling." +TIMELINE_BACKWARD_NO_STACK_CODE = -1 +TIMELINE_ACL_TO_NPU_NO_STACK_CODE = -2 +TIMELINE_FUSION_OPS_NO_STACK_FLAG = "NO STACK" +NO_STACK_REASON_MAP = { + TIMELINE_BACKWARD_NO_STACK_CODE: "Backward broadcast, without call stacks in profiling.", + TIMELINE_ACL_TO_NPU_NO_STACK_CODE: "Incoming flow is 'acl_to_npu', without call stacks in profiling." +} +TIMELINE_API_DOC_URL = "https://3ms.huawei.com/hi/group/3942456/wiki_7680982.html" +AFFINITY_TRAINING_API = "Affinity training api" +TIMELINE_WITH_STACK_DOC_URL = "https://www.hiascend.com/document/detail/zh/canncommercial/" \ + "70RC1/modeldevpt/ptmigr/AImpug_0067.html" +PyTorch_AOE_OPERATOR_TUNE_URL = "https://www.hiascend.com/document/detail/zh/canncommercial/" \ + "70RC1/devtools/auxiliarydevtool/aoe_16_045.html" +MSLite_Infer_AOE_OPEATOR_TUNE_URL = "https://www.mindspore.cn/lite/docs/en/master/use/cloud_infer/converter_tool_ascend.html#aoe-auto-tuning" +ENABLE_COMPILED_TUNE_URL = "https://www.hiascend.com/document/detail/zh/canncommercial/" \ + "70RC1/modeldevpt/ptmigr/AImpug_0059.html" + +ASCEND_PROFILER_URL = "https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/modeldevpt/ptmigr/AImpug_0067.html" +TIMELINE_EMPTY_STACKS_PROMPT = "These APIs have no code stack. If parameter 'with_stack=False' while profiling, " \ + "please refer to {timeline_profiling_doc_url} to set 'with_stack=True'. " \ + "Otherwise, ignore following affinity APIs due to backward broadcast lack of stack." + +CLUSTER_ANALYSIS = "Cluster analysis" +SLOW_RANK_TIME_RATIO_THRESHOLD = 0.05 + +# version_control +CANN_VERSION_C30 = '6.3.RC2' +CANN_VERSION_C13 = '7.0.RC1' +CANN_VERSION_C15 = '7.0.0' +CANN_VERSION_C17 = '8.0.0' +SUPPORTED_CANN_VERSION = [CANN_VERSION_C30, CANN_VERSION_C13, CANN_VERSION_C15, CANN_VERSION_C17] +DEFAULT_CANN_VERSION = CANN_VERSION_C15 +ASCEND_PYTORCH_PROFILER = "ascend_pytorch_proflier" +MSLITE = "mslite" +MSPROF = "msprof" +SUPPORTED_PROFILING_TYPE = [ASCEND_PYTORCH_PROFILER, MSLITE, MSPROF] +DEFAULT_PROFILING_TYPE = ASCEND_PYTORCH_PROFILER +TORCH_VERSION_1_11_0 = '1.11.0' +TORCH_VERSION_2_1_0 = '2.1.0' + +SUPPORTED_TORCH_VERSION = [TORCH_VERSION_1_11_0, TORCH_VERSION_2_1_0] +DEFAULT_TORCH_VERSION = TORCH_VERSION_2_1_0 + +TERMINAL_OUTPUT_HEADERS = ["No.", "Problem", "Description", "Suggestion"] +SKIP_ANALYZE_PROMPT = "Finish analysis, no optimization suggestions" +SKIP_QUERY_PROMPT = "Finish query operator stack, no operators" + +# operator output constant +OPERATOR_OUT_TOPK = 10 +OPERATOR_LIST_UNLIMIT = -1 + +DEFAULT_OPERATOR_TYPE = 'None_type' +DEFAULT_DURATION_ZERO = 0.0 + +ADVISOR_LOG_LEVEL = "ADVISOR_LOG_LEVEL" +DEFAULT_LOG_LEVEL = "INFO" +SUPPORTED_LOG_LEVEL = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] + +CLOUD_RULE_REGION_CN_NORTH_9 = "cn-north-9" +CLOUD_RULE_REGION_CN_NORTH_7 = "cn-north-7" +CLOUD_RULE_REGION_CN_SOUTHWEST_2 = "cn-southwest-2" +CLOUD_RULE_REGION_LIST = [CLOUD_RULE_REGION_CN_NORTH_7, CLOUD_RULE_REGION_CN_NORTH_9, CLOUD_RULE_REGION_CN_SOUTHWEST_2] +DEFAULT_CLOUD_RULE_REGION = CLOUD_RULE_REGION_CN_SOUTHWEST_2 + +AICPU_RULES_YAML_NAME = "aicpu_rules.yaml" +FUSSION_PASS_YAML_NAME = "op_fussion_pass.yaml" +TIMELINE_FUSION_OPS_YAML_NAME = "timeline_fusion_ops.yaml" +CLOUD_YAML_NAME_LIST = [AICPU_RULES_YAML_NAME, FUSSION_PASS_YAML_NAME, TIMELINE_FUSION_OPS_YAML_NAME] + +MAX_RETRIES = 3 +TIMEOUT = 3 + +ADVISOR_RULE_PATH = "ADVISOR_RULE_PATH" +CLOUD_RULE_PATH = "rules/cloud/" +DEFAULT_RULE_PATH = "./rules/" + +TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID = -1 + +DEFAULT_TEMPLATE_HEADER = "Performance Optimization Suggestions" diff --git a/profiler/advisor/common/module_lib.py b/profiler/advisor/common/module_lib.py new file mode 100644 index 0000000000..697e37f736 --- /dev/null +++ b/profiler/advisor/common/module_lib.py @@ -0,0 +1,87 @@ +import logging + +from profiler.advisor.analyzer.scheduling.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer +from profiler.advisor.analyzer.overall.overall_analyzer import OverallSummaryAnalyzer + +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + +logger = logging.getLogger() + + +class AnalysisScope: + supported_dims = ["computing", "scheduling", "communication", "overall", "dataloader"] + + @staticmethod + def get_analyzer(dimension, analyzer_name, is_inference=False): + if is_inference: + return getattr(InferenceAnalysisScope, dimension)().get(analyzer_name) + return getattr(TrainAnalysisScope, dimension)().get(analyzer_name) + + @staticmethod + def analyzer_list(dim=None, is_inference=False): + analyzer_list = [] + dims = [dim] if dim else AnalysisScope.supported_dims + for dim in dims: + analyzer_list += list(getattr(InferenceAnalysisScope, dim)().keys()) if is_inference else list( + getattr(TrainAnalysisScope, dim)().keys()) + return analyzer_list + + +class TrainAnalysisScope(AnalysisScope): + + @staticmethod + def computing(): + return dict() + + @staticmethod + def scheduling(): + return dict( + timeline_fusion_ops=TimelineFusionOpsAnalyzer + ) + + @staticmethod + def communication(): + return dict() + + @staticmethod + def overall(): + return dict( + overall_summary=OverallSummaryAnalyzer + ) + + @staticmethod + def dataloader(): + return dict() + + +class InferenceAnalysisScope(AnalysisScope): + @staticmethod + def computing(): + return dict() + + @staticmethod + def scheduling(): + return dict() + + @staticmethod + def communication(): + return dict() + + @staticmethod + def overall(): + return dict() + + @staticmethod + def dataloader(): + return dict() + + +class AnalyzerToDataset: + analyzer_to_dataset = { + "overall_summary": [], + "timeline_fusion_ops": [TimelineEventDataset] + } + + @staticmethod + def get_dataset(analyzer_name): + return AnalyzerToDataset.analyzer_to_dataset.get(analyzer_name) diff --git a/profiler/advisor/common/timeline/__init__.py b/profiler/advisor/common/timeline/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/common/timeline/event.py b/profiler/advisor/common/timeline/event.py new file mode 100644 index 0000000000..8eebc550d0 --- /dev/null +++ b/profiler/advisor/common/timeline/event.py @@ -0,0 +1,23 @@ +class AdvisorDict(dict): + def __getstate__(self): + return self.__dict__ + + def __setstate__(self, d): + self.__dict__.update(d) + + def __getattr__(self, key: str): + if key not in self: + return {} + + value = self[key] + if isinstance(value, dict): + value = AdvisorDict(value) + return value + + +class TimelineEvent(AdvisorDict): + + def ts_include(self, event): + + return float(self.ts) <= float(event.ts) and float(self.ts) + float(self.dur) >= float(event.ts) + float( + event.dur) \ No newline at end of file diff --git a/profiler/advisor/common/timeline/fusion_ops_db.py b/profiler/advisor/common/timeline/fusion_ops_db.py new file mode 100644 index 0000000000..19a86437e0 --- /dev/null +++ b/profiler/advisor/common/timeline/fusion_ops_db.py @@ -0,0 +1,555 @@ +import copy +import logging +import os + +import yaml + +from profiler.advisor.common import constant as const +from profiler.advisor.utils.log import get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) + + +class TimelineOpRuleHandler: + """基于线性规划思想保存OpRule,用于局部继承、全局继承等功能""" + + def __init__(self): + self._db_content = None + # 具体生成的timeline规则,key为unique_id + self._all_tmp_timeline_op_rule = {} + # 所有timeline规则的dict集合,key为unique_id + self._all_origin_timeline_op_rule_dict = {} + # 已生成timeline规则的id数组 + self._exist_timeline_op_rule_unique_id_list = [] + + @staticmethod + def _get_local_inherit_id_list(op_rule: dict): + local_inherit_id_list = [] + for _, val in op_rule.items(): + if val.get("inherit_unique_id") is not None: + local_inherit_id_list.append(val.get("inherit_unique_id")) + return local_inherit_id_list + + @staticmethod + def _is_duplicated_element_in_lists(list_a, list_b): + """检查两个数组中是否存在重复的元素,若有任意元素重复,返回True""" + if not isinstance(list_a, list): + list_a = [list_a] + if not isinstance(list_b, list): + list_b = [list_b] + for element in list_a: + if element in list_b: + return True + return False + + def set_db_content(self, db_content): + # 过滤非 dict 格式, 或 dict 中没有定义 unique_id 的数据, 并保存到 _all_origin_timeline_op_rule_dict 中 + self._db_content = copy.deepcopy(db_content) + for rule_dic in self._db_content: + if not isinstance(rule_dic, dict) or rule_dic.get("unique_id") is None: + continue + self._all_origin_timeline_op_rule_dict[rule_dic.get("unique_id")] = rule_dic + if self._all_origin_timeline_op_rule_dict: + self.generate_all_timeline_op_rule() + + def generate_basic_timeline_op_rules(self): + """用于实现获取无全局继承规则, 无全局继承的规则认为是基础版本规则, 默认不会存在局部继承""" + for _, rule_dic in self._all_origin_timeline_op_rule_dict.items(): + if rule_dic.get("inherit_unique_id") is None: + self.add_basic_timeline_op_rule(rule_dic) + + def add_basic_timeline_op_rule(self, rule_dic): + # 若基础规则中存在局部继承的规则,则跳过 + local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) + if local_inherit_id_list: + return + + temp_rule = OpRule() + temp_rule.merge(rule_dic.get("operator_rules")) + + unique_id = rule_dic.get("unique_id") + logger.debug("The rule of version %s is basic rule.", unique_id) + self.add_new_timeline_op_rule(unique_id, temp_rule.tmp_rule) + + def add_empty_timeline_op_rule(self, unique_id): + if self._all_origin_timeline_op_rule_dict.get(unique_id) is None: + self._all_origin_timeline_op_rule_dict[unique_id] = {} + tmp_rule = {} + logger.debug("The rule of version %s is empty.", unique_id) + self.add_new_timeline_op_rule(unique_id, tmp_rule) + + def add_new_timeline_op_rule(self, unique_id, tmp_rule): + if unique_id not in self._exist_timeline_op_rule_unique_id_list: + self._exist_timeline_op_rule_unique_id_list.append(unique_id) + self._all_tmp_timeline_op_rule[unique_id] = tmp_rule + logger.debug("The rule of version %s is successfully generated.", unique_id) + + def generate_specified_list_timeline_op_rule(self, specified_unique_id_list, kid_id_list=None): + for specified_unique_id in specified_unique_id_list: + if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: + self.generate_specified_timeline_op_rule(specified_unique_id, kid_id_list) + + def generate_specified_timeline_op_rule(self, specified_unique_id, kid_id_list=None): + """用于实现生成特定版本规则 + + 若不存在相应specified_unique_id的规则、或是已生成、循环继承等情况,将该规则置空并返回 + 规则库文件结构设置为多叉树, 结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成, + 直到specified_unique_id规则依赖继承的规则库全部生成完毕, 再生成该指定规则库, 将specified_unique_id的规则库归档 + + 参数: + specified_unique_id: 指定版本规则id + kid_id_list: 子规则id数组, 用于防止循环继承, 如间接继承自身或直接继承自身等情况 + 返回: + None + """ + if kid_id_list is None: + kid_id_list = [] + + # 若该unique_id规则在timeline_fusion_ops.yaml中没有相应的规则, 生成该id规则,置为空 + if self._all_origin_timeline_op_rule_dict.get(specified_unique_id) is None: + logger.warning("The specified version %s does not exist in the rule library. " + "Ensure that the corresponding rule is configured in the YAML file. " + "The version %s is left blank.", + specified_unique_id, + specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + return + + # 若该unique_id规则已经生成,则无需再次生成 + if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: + logger.warning("The rule has been generated and does not need to be generated again. " + "Check whether unique id %s in the YAML file is duplicate.", + specified_unique_id) + return + + # 若kid_id_list不为空,且间接继承自身,则尝试生成空规则用于继承 + if kid_id_list and self._is_duplicated_element_in_lists(specified_unique_id, kid_id_list): + logger.warning("It cannot be inherited indirectly. Ensure that the corresponding rules are correctly " + "configured in the YAML file and leave Version %s blank.", + specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + return + + rule_dic = self._all_origin_timeline_op_rule_dict.get(specified_unique_id) + if rule_dic is not None: + kid_id_list.append(specified_unique_id) + + global_inherit_id = rule_dic.get("inherit_unique_id") + if global_inherit_id and global_inherit_id not in self._exist_timeline_op_rule_unique_id_list: + logger.debug("The rule of version %s global inherit the rule of version %s", + specified_unique_id, global_inherit_id) + self.generate_specified_timeline_op_rule(global_inherit_id, kid_id_list) + + # 若局部继承的规则未生成, 生成该规则 + local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) + if local_inherit_id_list: + logger.debug("The rule of version %s local inherit the rule of version %s", + specified_unique_id, local_inherit_id_list) + self.generate_specified_list_timeline_op_rule(specified_unique_id_list=local_inherit_id_list, + kid_id_list=kid_id_list) + logger.debug("Start to generate rule of version %s", specified_unique_id) + # 实现全局继承与局部继承 + temp_rule = OpRule(timeline_op_rule_handler=self, + rule=self._all_tmp_timeline_op_rule.get(global_inherit_id)) + temp_rule.merge(rule_dic.get("operator_rules")) + # 将生成的规则归档保存 + self.add_new_timeline_op_rule(specified_unique_id, temp_rule.tmp_rule) + return + logger.error("Failed to generate the rule whose unique_id is %s. Ensure that the rule is configured in " + "the YAML file and the version %s is empty.", specified_unique_id, specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + + def generate_all_timeline_op_rule(self): + """用于实现获取所有版本规则 + + 查找db_content中的规则库, 规则库文件结构设置为多叉树, 优先生成无继承的基础规则版本 + 循环并生成其他版本, 文件结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成,直到全部规则库生成后退出函数 + + 参数: + None + 返回: + None + """ + self.generate_basic_timeline_op_rules() + _unique_id_list = copy.deepcopy(list(self._all_origin_timeline_op_rule_dict.keys())) + for unique_id in _unique_id_list: + if unique_id in self._exist_timeline_op_rule_unique_id_list: + continue + self.generate_specified_timeline_op_rule(unique_id) + + def get_tmp_timeline_op_rule_with_unique_id(self, unique_id): + if unique_id not in self._exist_timeline_op_rule_unique_id_list: + logger.error("The specified unique_id does not exist in the rule library. Ensure that the " + "corresponding rule is configured in the YAML file and the version %s is empty." + "If the value of unique_id is a negative number, the version may not be supported.", + unique_id) + self.add_empty_timeline_op_rule(unique_id) + if unique_id < 0: + logger.error("Advise to use a positive integer as the unique id of rules. " + "Negative numbers: %s are not recommended to use as unique id. " + "If specified invalid unique id: %s is used, an empty rule is returned by default.", + unique_id, const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) + return self._all_tmp_timeline_op_rule.get(unique_id) + + +class OpRule: + + def __init__(self, rule=None, timeline_op_rule_handler=None): + if rule is None: + self._tmp_rule = {} + else: + self._tmp_rule = copy.deepcopy(rule) + if timeline_op_rule_handler is None: + self.timeline_op_rule_handler = {} + else: + self.timeline_op_rule_handler = copy.deepcopy(timeline_op_rule_handler) + self._rule = {} + + @property + def tmp_rule(self): + return self._tmp_rule + + @staticmethod + def _format_rule(rule): + """格式化规则函数, 将额外规则格式化为{key,数组list}形式, 使得yaml文件中operator_rules若写成key:str形式也能正常读取""" + format_rule = {} + for key, val in rule.items(): + if not isinstance(val, list): + val = [val] + format_rule[key] = val + return format_rule + + def merge(self, extra_rule): + """合并函数, 将已有规则库与额外规则合并, 若无继承则已有规则库应为空""" + for key, val in extra_rule.items(): + for func, op_rules in val.items(): + try: + getattr(self, f"{func}")(key, op_rules) + except AttributeError: + logger.error("Undefined field and function name. Ensure that %s is correct in the rule " + "library.", func) + + def get_final_rules(self): + """获取最终的规则库""" + self._restore_rule() + return self._rule + + def add(self, key, add_rules: dict): + """新增函数, 新增已有规则库不存在的额外规则""" + if add_rules is None: + return + if self._tmp_rule.get(key) is None: + self._tmp_rule[key] = {} + format_add_rule = self._format_rule(add_rules) + for add_key, add_val in format_add_rule.items(): + logger.debug("add: %s: %s", add_key, add_val) + if add_key not in self._tmp_rule: + self._tmp_rule[key][add_key] = add_val + else: + logger.warning("This key has been written to the rule, " + "%s: %s should be written in the overwrite section", add_key, add_val) + self._tmp_rule[key][add_key].update(add_val) + + def overwrite(self, key, overwrite_rules: dict): + """重写函数, 重写已有规则库中已经存在的规则""" + if overwrite_rules is None: + return + if self._tmp_rule.get(key) is None: + self._tmp_rule[key] = {} + format_overwrite_rules = self._format_rule(overwrite_rules) + for overwrite_key, overwrite_val in format_overwrite_rules.items(): + logger.debug("overwrite: %s: %s", overwrite_key, overwrite_val) + if overwrite_key not in self._tmp_rule: + logger.warning("This key is not written to the rule. " + "%s: %s should be written in the add section", overwrite_key, overwrite_val) + self._tmp_rule[key][overwrite_key] = overwrite_val + else: + self._tmp_rule[key][overwrite_key].update(overwrite_val) + + def exclude(self, key, exclude_rules: list): + """除外函数, 将已有规则库已有的规则除外删除""" + if exclude_rules is None: + return + for exclude_key in exclude_rules: + logger.debug("exclude: %s", exclude_key) + if isinstance(exclude_key, str): + if exclude_key not in self._tmp_rule[key]: + logger.warning("This key is not written to the rule. " + "do not need to exclude: %s.", exclude_key) + continue + self._tmp_rule[key].pop(exclude_key) + else: + logger.warning("Error type rule in exclude: %s", exclude_key) + + def inherit_unique_id(self, key, inherit_unique_id): + """局部继承函数, 将规则库中指定unique_id版本覆盖指定位置""" + result_rule = self.timeline_op_rule_handler.get_tmp_timeline_op_rule_with_unique_id(inherit_unique_id) + if result_rule is not None and result_rule.get(key) is not None: + self._tmp_rule[key] = copy.deepcopy(result_rule.get(key)) + return + logger.error("Rule library version %s does not exist. ", inherit_unique_id) + + def _restore_rule(self): + for key, op_api_map in self._tmp_rule.items(): + self._rule[key] = [{op_combined: api} for op_combined, api in op_api_map.items()] + + +def get_file_path_by_walk(root, filename): + file_path = "" + for root, _, files in os.walk(root, topdown=True): + for name in files: + if name == filename: + file_path = os.path.join(root, name) + return file_path + return file_path + + +def get_timeline_fusion_ops_yaml_path(): + # 环境变量 ADVISOR_RULE_PATH 不为空且该路径存在, os.walk遍历其下文件, 若存在相应的规则文件则返回路径 + advisor_rule_path = os.getenv(const.ADVISOR_RULE_PATH) + if advisor_rule_path and os.path.exists(advisor_rule_path): + specified_file_path = get_file_path_by_walk(advisor_rule_path, const.TIMELINE_FUSION_OPS_YAML_NAME) + if len(specified_file_path.strip()) and os.path.exists(specified_file_path): + logger.debug("Successfully find The %s file which is specified by the environment variable: %s.", + specified_file_path, const.ADVISOR_RULE_PATH) + return specified_file_path + logger.warning("The %s does not exist in path: %s. Try to use cloud or default local YAML file.", + const.TIMELINE_FUSION_OPS_YAML_NAME, os.path.normpath(advisor_rule_path)) + # 检查云文件默认保存路径文件夹下是否存在相应文件, 默认路径 ~/rules/cloud/ + cloud_file_path = os.path.join(os.path.expanduser("~"), const.CLOUD_RULE_PATH, const.TIMELINE_FUSION_OPS_YAML_NAME) + if os.path.exists(cloud_file_path): + logger.debug("Successfully find The cloud %s file in %s.", const.TIMELINE_FUSION_OPS_YAML_NAME, + cloud_file_path) + return cloud_file_path + # 检查本地默认文件 + local_file_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), + const.DEFAULT_RULE_PATH, const.TIMELINE_FUSION_OPS_YAML_NAME) + if not os.path.exists(local_file_path): + # 若本地默认文件不存在, 则log异常信息并 + logger.error("The default local YAML file does not exist. Please check the YAML file in the default path %s.", + local_file_path) + return local_file_path + + +class FusionOperatorDB: + + def __init__(self, file_path=None, cann_version=None, torch_version=None): + self.timeline_fusion_ops_yaml_path = os.path.normpath(get_timeline_fusion_ops_yaml_path()) + + self.cann_version = cann_version or const.DEFAULT_CANN_VERSION + self.torch_version = torch_version or const.DEFAULT_TORCH_VERSION + + self._supported_version_dict = {} + + self.is_empty = False + self.timeline_op_rule_handler = TimelineOpRuleHandler() + self.fusion_operator = self._load_yaml(self.timeline_fusion_ops_yaml_path) + + self._dequeue_op_names = [] + self._aten_op_names = [] + self._optimizer_op_names = [] + self._dequeue_op_api_map = {} + self._aten_op_api_map = {} + self._optimizer_op_api_map = {} + self._parse_db() + + @property + def dequeue_op_names(self): + return self._dequeue_op_names + + @property + def aten_op_names(self): + return self._aten_op_names + + @property + def optimizer_op_names(self): + return self._optimizer_op_names + + @property + def dequeue_op_api_map(self): + return self._dequeue_op_api_map + + @property + def aten_op_api_map(self): + return self._aten_op_api_map + + @property + def optimizer_op_api_map(self): + return self._optimizer_op_api_map + + def get_fusion_operator_with_unique_id(self, unique_id): + if unique_id == const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID: + logger.warning("The specified unique id: %s is invalid.Please check whether the rule of the unique id " + "exists and modify the rule.", const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) + return {} + result_tmp_rule = self.timeline_op_rule_handler.get_tmp_timeline_op_rule_with_unique_id(unique_id) + result_op_rule = OpRule(result_tmp_rule) + return result_op_rule.get_final_rules() + + def regenerate_timeline_op_rule_with_unique_id(self, unique_id): + self.fusion_operator.clear() + logger.debug("Program try to regenerate the rule to version %s.", unique_id) + self.fusion_operator = self.get_fusion_operator_with_unique_id(unique_id) + self.regenerate_op_api_map_and_op_names() + + def regenerate_timeline_op_rule_with_version(self, cann_version=None, torch_version=None): + cann_version = cann_version or self.cann_version + torch_version = torch_version or self.torch_version + unique_id = self._get_unique_id_in_supported_version_dict(cann_version=cann_version, + torch_version=torch_version) + self.regenerate_timeline_op_rule_with_unique_id(unique_id) + + def regenerate_op_api_map_and_op_names(self): + self._dequeue_op_names.clear() + self._aten_op_names.clear() + self._optimizer_op_names.clear() + self._dequeue_op_api_map.clear() + self._aten_op_api_map.clear() + self._optimizer_op_api_map.clear() + self._parse_db() + + def _is_version_supported(self, db_content): + """校验当前版本是否被规则库中的版本支持, 保存版本支持信息数组, 按数组或字符串的可变方式保存""" + if db_content is None : + logger.warning( + "The rule library is empty. Check the rule library file: %s", + self.timeline_fusion_ops_yaml_path + ) + return False + for rule_dic in db_content: + if not isinstance(rule_dic, dict) or rule_dic.get("unique_id") is None: + continue + cann_version_list = rule_dic.get("cann_version") + torch_version_list = rule_dic.get("torch_version") + if not cann_version_list or not torch_version_list: + continue + supported_version = [cann_version_list, torch_version_list] + + unique_id = rule_dic.get("unique_id") + if unique_id < 0: + logger.warning( + "The unique id: %s of the rule should be a positive integer. " + "Please check and modify the rule configuration in the YAML file: %s.", + unique_id, os.path.normpath(self.timeline_fusion_ops_yaml_path) + ) + self._supported_version_dict[unique_id] = supported_version + + # 若解析timeline规则库的版本支持数组为空, 则存在问题 + if not self._supported_version_dict: + logger.warning( + "The rule library does not contain rules that support the current version. " + "Check the rule library file: %s", + self.timeline_fusion_ops_yaml_path + ) + return False + + # 检验当前版本是否被规则库支持 + is_version_supported = self._is_version_supported_in_supported_version_dict() + if not is_version_supported: + # 若规则库不支持当前版本, 则log警告信息 + logger.warning("Unsupported versions: cann-%s and torch-%s, supported version list of ['cann', 'torch'] " + "is %s", self.cann_version, self.torch_version, self._supported_version_dict.values()) + return is_version_supported + + def _is_version_supported_in_supported_version_dict(self, cann_version=None, torch_version=None): + """校验当前版本是否存在在规则库中的版本支持字典中""" + for _, supported_version in self._supported_version_dict.items(): + if self._is_version_supported_in_version(supported_version, cann_version, torch_version): + return True + return False + + def _get_unique_id_in_supported_version_dict(self, cann_version=None, torch_version=None) -> int: + """校验当前版本是否存在在规则库中的版本支持字典中, 在使用前请检查是否支持该版本""" + for key_unique_id, supported_version in self._supported_version_dict.items(): + if self._is_version_supported_in_version(supported_version, cann_version, torch_version): + return key_unique_id + return const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID + + def _is_version_supported_in_version(self, supported_version, cann_version=None, torch_version=None): + """校验当前cann版本和torch版本是否存在在规则库中的版本支持数组的元素中""" + cann_version_list = supported_version[0] + if not isinstance(cann_version_list, list): + cann_version_list = [cann_version_list] + + torch_version_list = supported_version[1] + if not isinstance(torch_version_list, list): + torch_version_list = [torch_version_list] + + cann_version = cann_version or self.cann_version + torch_version = torch_version or self.torch_version + + if (cann_version in cann_version_list) and (torch_version in torch_version_list): + return True + return False + + def _parse_db(self): + """生成输出的规则库""" + self._parse(const.ATEN) + self._parse(const.DEQUEUE) + self._parse(const.OPTIMIZER) + + def _parse(self, mode): + """生成输出的规则库中指定部分, 如aten, Optimizer等""" + op_info = self.fusion_operator.get(mode, []) or [] + for ops in op_info: + for npu_api, op_combined in ops.items(): + if not isinstance(op_combined, list): + self._parse_in_list(mode, op_combined, npu_api) + for _op_combined in op_combined: + self._parse_in_list(mode, _op_combined, npu_api) + + def _parse_in_list(self, mode, op_combined, npu_api): + """生成输出的规则库中具体部分, 如{silu: torch_npu.npu_silu/torch_npu.contrib.module.SiLU}等""" + if not isinstance(op_combined, str): + logger.warning("Error type in yaml: %s", op_combined) + return + mode_str = mode.lower() + getattr(self, f"{mode_str}_op_names", []).extend(op_combined.split("-")) + + new_npu_api = npu_api + pre_npu_api = getattr(self, f"{mode_str}_op_api_map", {}).get(op_combined) + if pre_npu_api: + new_npu_api = f"{pre_npu_api}/{npu_api}" + getattr(self, f"{mode_str}_op_api_map", {})[op_combined] = new_npu_api + logger.debug("Output rule: %s: %s: %s: %s ", mode, op_combined, new_npu_api, op_combined.split("-")) + + def _load_yaml(self, file_path): + """生成timeline规则库""" + logger.debug("Try to use the following yaml file as timeline ops rule: %s.", os.path.abspath(file_path)) + # 若文件不存在,则报错, 并返回空字典 + if not os.path.exists(file_path): + logger.warning("Path: '%s' does not exist, please specific existed path of " + "fusion operators yaml file by setting env '%s'", + os.path.abspath(file_path), const.ADVISOR_RULE_PATH) + self.is_empty = True + return {} + + logger.debug("The rule yaml file is successfully found in path: %s", os.path.abspath(file_path)) + + with open(file_path, "rb") as file: + db_content = yaml.safe_load(file) + + if not self._is_version_supported(db_content): + self.is_empty = True + return {} + + logger.debug("The rule library supports the current environment version.") + + # 获取所有版本timeline规则库 + self.timeline_op_rule_handler.set_db_content(db_content) + + # 获取所需版本规则 + unique_id = self._get_unique_id_in_supported_version_dict() + logger.debug("Program is using version %s of the rule.", unique_id) + result_op_rule = self.get_fusion_operator_with_unique_id(unique_id) + if result_op_rule and len(result_op_rule) > 0: + return result_op_rule + + logger.warning( + "Failed to load fusion operators database, skip analyze timeline for affinity api," + " please refer to database yaml %s to customize your yaml.", + self.timeline_fusion_ops_yaml_path + ) + self.is_empty = True + return {} diff --git a/profiler/advisor/common/version_control.py b/profiler/advisor/common/version_control.py new file mode 100644 index 0000000000..e3b3006a80 --- /dev/null +++ b/profiler/advisor/common/version_control.py @@ -0,0 +1,26 @@ +import logging +from typing import List + +logger = logging.getLogger() + + +class VersionControl: + _SUPPORT_VERSIONS = [] + + @classmethod + def is_supported(cls, cann_version: str) -> bool: + """ + Check whether the CANN software version is supported, which can be viewed by executing the following command: + 'cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info' + """ + flag = (cls._SUPPORT_VERSIONS.__contains__(cann_version)) + if not flag: + logger.debug("class type is %s, which is not support current CANN version %s", cls.__name__, cann_version) + return flag + + def get_support_version(self) -> List[str]: + """ + Acquire the CANN software version + :return: supported CANN software version + """ + return self._SUPPORT_VERSIONS diff --git a/profiler/advisor/compute_perf_analysis.ipynb b/profiler/advisor/compute_perf_analysis.ipynb deleted file mode 100644 index e7a663130c..0000000000 --- a/profiler/advisor/compute_perf_analysis.ipynb +++ /dev/null @@ -1,366 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-21T09:19:13.937531900Z", - "start_time": "2024-02-21T09:19:13.267899500Z" - } - }, - "outputs": [], - "source": [ - "import os\n", - "import pandas as pd\n", - "\n", - "from advisor_backend.interface import Interface\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 算子调优分析\n", - "## 1. 算子分析的数据准备\n", - "当前算子分析工具支持分析Ascend Pyorch Profiler方式生成的ascend_pt目录\n", - "## 2. 融合算子分析\n", - "当前支持分析模型中存在可融合的小算子,并给出优化建议。\n", - "\n", - "\"更多融合算子信息,请查阅 https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/700alpha003/processormodel/hardwaredesc_0001.html\n", - "\n", - "## 3. 异常性能算子分析\n", - "支持分析模型中性能异常的计算算子" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-22T08:41:17.455567500Z", - "start_time": "2024-02-22T08:41:16.716884800Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO] Start to analyse the target file: D:\\work\\ascend_pt\\ASCEND_PROFILER_OUTPUT\\kernel_details.csv\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pattern_namepatternlencountduration sum(us)op durations(us)index
18torch_npu.npu_swiglu(Slice, Slice, Swish, Mul)4127.53[21.2, 0.05, 3.14, 3.14][0]
\n", - "
" - ], - "text/plain": [ - " pattern_name pattern len count duration sum(us) op durations(us) index\n", - "18 torch_npu.npu_swiglu (Slice, Slice, Swish, Mul) 4 1 27.53 [21.2, 0.05, 3.14, 3.14] [0]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "The computing time of fusable op is 27.53 ms.\n", - "\n", - "\n", - "Advice 0:\n", - "Replace [Slice, Slice, Swish, Mul] with torch_npu.npu_swiglu. This pattern first happened in: \n", - "/root/torch/module.py\n", - "/root/test/slice.py(116)\n" - ] - } - ], - "source": [ - "# EDIT THE PROFILING DATA PATH\n", - "compute_path = \"[YOUR PATH]\"\n", - "interface = Interface(compute_path)\n", - "data = interface.get_data('compute', 'npu_fused')\n", - "pd.set_option('display.max_columns', None)\n", - "pd.set_option('display.width', 900)\n", - "display(data['data'].iloc[:, :-2])\n", - "print('\\n')\n", - "print(data['bottleneck'])\n", - "print('\\n')\n", - "print(data['advice'])" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO] Start to analyse the target file: D:\\work\\ascend_pt\\ASCEND_PROFILER_OUTPUT\\kernel_details.csv\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Step IdModel IDTask IDStream IDNameTypeAccelerator CoreStart Time(us)Duration(us)Wait Time(us)Block DimMix Block DimInput ShapesInput Data TypesInput FormatsOutput ShapesOutput Data TypesOutput FormatsContext IDaicore_time(us)aic_total_cyclesaic_mac_ratioaic_mac_int8_ratioaic_cube_fopsaic_vector_fopsaiv_time(us)aiv_total_cyclesaiv_vec_fp32_ratioaiv_vec_fp16_ratioaiv_vec_int32_ratioaiv_vec_misc_ratioaiv_cube_fopsaiv_vector_fopssize(MB)throughput(GB/s)color
014294967295126516Slice1SliceAI_VECTOR_CORE169952962310675021.20261.56904,1025INT64FORMAT_ND4,1025INT32FORMAT_NDNaN0.00.00.00.00.00.01.7729508.00.00.00.00620.00.05856.00.0469212.161371RED
414294967295126516Add1AddAI_CORE16995296231067543.14261.56904,1025INT64FORMAT_ND4,1025INT32FORMAT_NDNaN2.328888.00.20.10.10.70.000.00.00.00.00000.00.00.00.04692114.592698RED
\n", - "
" - ], - "text/plain": [ - " Step Id Model ID Task ID Stream ID Name Type Accelerator Core Start Time(us) Duration(us) Wait Time(us) Block Dim Mix Block Dim Input Shapes Input Data Types Input Formats Output Shapes Output Data Types Output Formats Context ID aicore_time(us) aic_total_cycles aic_mac_ratio aic_mac_int8_ratio aic_cube_fops aic_vector_fops aiv_time(us) aiv_total_cycles aiv_vec_fp32_ratio aiv_vec_fp16_ratio aiv_vec_int32_ratio aiv_vec_misc_ratio aiv_cube_fops aiv_vector_fops size(MB) throughput(GB/s) color\n", - "0 1 4294967295 1265 16 Slice1 Slice AI_VECTOR_CORE 1699529623106750 21.20 261.56 9 0 4,1025 INT64 FORMAT_ND 4,1025 INT32 FORMAT_ND NaN 0.0 0.0 0.0 0.0 0.0 0.0 1.77 29508.0 0.0 0.0 0.0062 0.0 0.0 5856.0 0.046921 2.161371 RED\n", - "4 1 4294967295 1265 16 Add1 Add AI_CORE 1699529623106754 3.14 261.56 9 0 4,1025 INT64 FORMAT_ND 4,1025 INT32 FORMAT_ND NaN 2.3 28888.0 0.2 0.1 0.1 0.7 0.00 0.0 0.0 0.0 0.0000 0.0 0.0 0.0 0.046921 14.592698 RED" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# 异常性能算子识别\n", - "from advisor_backend.compute_advice.npu_slow_advice import NpuSlowAdvice\n", - "\n", - "npu_slow_advice = NpuSlowAdvice(compute_path)\n", - "data = interface.get_data('compute', 'npu_slow')\n", - "slow_op_data = data[data[\"color\"] == \"RED\"]\n", - "display(slow_op_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "NpuSlowAdvice.save_to_excel(data, file_path=os.path.join(compute_path, \"slow_op.xlsx\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "call stack: \n", - "/root/torch/module.py\n", - "/root/test/slice.py(116)\n" - ] - } - ], - "source": [ - "# 异常性能算子call stack\n", - "call_stack = npu_slow_advice.get_call_stack(data, index_id=0, ts_col=\"Start Time(us)\")\n", - "print(\"call stack: \")\n", - "print(call_stack)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/profiler/advisor/config/__init__.py b/profiler/advisor/config/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/config/config.ini b/profiler/advisor/config/config.ini new file mode 100644 index 0000000000..b8f6703685 --- /dev/null +++ b/profiler/advisor/config/config.ini @@ -0,0 +1,16 @@ +[LOG] +# console_logging_level : DEBUG/INFO/WARNING/ERROR +console_logging_level = INFO +[ANALYSE] +# analysis_result_file : filename of analysis result +analysis_result_file = analysis_result_file.xlsx +# tune_ops_file: filename of tune op name list +tune_ops_file = operator_tuning_file.cfg +[THRESHOLD] +# operator_bound_ratio: (mte, cube, vector, scalar) ratio greater than this value will be checked in operator_bound_checker +operator_bound_ratio = 0.8 +[RULE] +# region : URL of different regions where can download rule yaml file +cn-north-9 = https://cnnorth9-modelarts-sdk.obs.cn-north-9.myhuaweicloud.com/modelarts/solution/ma_advisor_rules/ +cn-southwest-2 = https://cnsouthwest2-modelarts-sdk.obs.cn-southwest-2.myhuaweicloud.com/modelarts/solution/ma_advisor_rules/ +cn-north-7 = https://cnnorth7-modelarts-sdk.obs.cn-north-7.ulanqab.huawei.com/modelarts/solution/ma_advisor_rules/ \ No newline at end of file diff --git a/profiler/advisor/config/config.py b/profiler/advisor/config/config.py new file mode 100644 index 0000000000..183c2ed5a2 --- /dev/null +++ b/profiler/advisor/config/config.py @@ -0,0 +1,103 @@ +""" +advisor config +""" +from profiler.advisor.utils.utils import Timer + +import logging +import os +from configparser import ConfigParser + +from profiler.advisor.utils.utils import singleton + +logger = logging.getLogger() + + +@singleton +class Config: + """ + config + """ + # pylint: disable=too-many-instance-attributes + + _CONFIG_DIR_NAME = "config" + _CONFIG_FILE_NAME = "config.ini" + + def __init__(self) -> None: + config = ConfigParser(allow_no_value=True) + self._work_path = os.getcwd() # pwd + self._root_path = os.path.abspath(os.path.join(__file__, "../../")) + config.read(os.path.join(self._root_path, self._CONFIG_DIR_NAME, self._CONFIG_FILE_NAME)) + self.config = config + # ANALYSE + self._analysis_result_file = self._normalize_path(config.get("ANALYSE", "analysis_result_file")) + self._tune_ops_file = os.path.abspath( + os.path.join(self._work_path, f"operator_tuning_file_{Timer().strftime}.cfg")) + + def _normalize_path(self, file) -> str: + if not file.startswith("/"): + file = os.path.join(self._work_path, file) + return os.path.abspath(file) + + @property + def work_path(self) -> str: + """ + get work path + :return: work path + """ + return self._work_path + + @property + def root_path(self) -> str: + """ + get root path + :return: root path + """ + return self._root_path + + def set_config(self, key, value) -> None: + """ + set config value + :param key: config key + :param value: config value + """ + setattr(self, key, value) + + def get_config(self, key) -> str: + """ + get value of config + :param key: config key + :return: config value + """ + try: + return getattr(self, key) + except AttributeError: + return "" + + @property + def analysis_result_file(self) -> str: + """ + get filename of op result file + :return: filename + """ + return self._analysis_result_file + + @property + def tune_ops_file(self) -> str: + """ + get filename of tune op file + :return: filename + """ + return self._tune_ops_file + + @property + def operator_bound_ratio(self) -> float: + """ + operator_bound_ratio + """ + return float(self.config.get("THRESHOLD", "operator_bound_ratio")) + + def set_log_path(self, result_file: str, log_path: str = None): + log_path = log_path if log_path is not None else os.path.join(self._work_path, "log") + os.makedirs(log_path, exist_ok=True) + self.config._analysis_result_file = os.path.join(log_path, result_file) + self._analysis_result_file = os.path.join(log_path, result_file) diff --git a/profiler/advisor/dataset/__init__.py b/profiler/advisor/dataset/__init__.py new file mode 100644 index 0000000000..9fac2c8eb3 --- /dev/null +++ b/profiler/advisor/dataset/__init__.py @@ -0,0 +1,6 @@ +# import asight # noqa +# import asight.datasets.graph_dataset +# +# from .graph_dataset import GraphDataset as GraphD +# +# asight.datasets.graph_dataset.GraphDataset = GraphD diff --git a/profiler/advisor/display/__init__.py b/profiler/advisor/display/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/display/html/__init__.py b/profiler/advisor/display/html/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/display/html/render.py b/profiler/advisor/display/html/render.py new file mode 100644 index 0000000000..eb427ffc5a --- /dev/null +++ b/profiler/advisor/display/html/render.py @@ -0,0 +1,44 @@ +import os +import logging +from typing import List, Dict + +from jinja2 import Environment, FileSystemLoader +from profiler.advisor.common import constant + +from profiler.advisor.utils.utils import singleton, safe_write + +logger = logging.getLogger() + + +@singleton +class HTMLRender: + def __init__(self): + self.html = "" + self.render_list: Dict[str, List] = {} + + def render_html(self, template_dir: str = "templates", template_name: str = "main.html", + template_header=constant.DEFAULT_TEMPLATE_HEADER): + self.html = self.render_template("main", template_dir, template_name, render_list=self.render_list, + template_header=template_header) + + def render_template(self, key: str, template_dir: str, template_name: str, **kwargs): + if not os.path.isabs(template_dir): + template_dir = os.path.join(os.path.dirname(__file__), template_dir) + + env = Environment(loader=FileSystemLoader(template_dir), + autoescape=True) + template = env.get_template(template_name) + rendered_html = template.render(**kwargs) + if key not in self.render_list: + self.render_list[key] = [] + self.render_list[key].append(rendered_html) + return rendered_html + + def save_to_file(self, save_path: str): + if not save_path.endswith(".html"): + logger.error("Skip save html file because file name must endswith `.html`, " + "but got %s.", os.path.basename(save_path)) + return + + safe_write(self.html, save_path) + logger.info("Save suggestion to %s.", save_path) diff --git a/profiler/advisor/display/html/templates/affinity_api.html b/profiler/advisor/display/html/templates/affinity_api.html new file mode 100644 index 0000000000..f059fbf4c1 --- /dev/null +++ b/profiler/advisor/display/html/templates/affinity_api.html @@ -0,0 +1,50 @@ +{% if result|length > 0 %} +
+

Affinity API Issues

+
+ The analysis results of following affinity APIs are based on runtime env + cann-{{ cann_version }} + and + torch-{{ torch_version }} + +
+ + {% if empty_stacks %} + Suggestion: + These APIs have no code stack. If parameter 'with_stack=False' was set while profiling, please refer to + Ascend PyTorch Profiler to set + 'with_stack=True'. Otherwise, ignore following affinity APIs due to backward broadcast lack of stack. + {% endif %} + + {% for api_name, stacks in result.items() %} + + {% if empty_stacks %} +
{{api_name|safe}}
+ + {% else %} + +
{{api_name|safe}}
+
+ +
+ {% for stack in stacks %} +
No.{{loop.index|safe}} code stack, called {{stack[1]|safe}} times
+ + {% endfor %} +
+
+ {% endif %} + + {% endfor %} + +
+ +
+
+{% endif %} diff --git a/profiler/advisor/display/html/templates/main.html b/profiler/advisor/display/html/templates/main.html new file mode 100644 index 0000000000..1a9392d2b2 --- /dev/null +++ b/profiler/advisor/display/html/templates/main.html @@ -0,0 +1,202 @@ + + + + + + + +
+

Performance Optimization Suggestions

+{% for key, renders in render_list.items() %} + {% if key == 'operator'%} +
+

Profiling Operator Issues

+
+ {% for render in renders %} + {{render|safe}} + {% endfor %} +
+
+ {% else %} +
+

{{ key }}

+
+ {% for render in renders %} + {{render|safe}} + {% endfor %} +
+
+ {% endif %} +{% endfor %} + +
+ + + + + \ No newline at end of file diff --git a/profiler/advisor/display/html/templates/overall_analysis.html b/profiler/advisor/display/html/templates/overall_analysis.html new file mode 100644 index 0000000000..4c859a7bf9 --- /dev/null +++ b/profiler/advisor/display/html/templates/overall_analysis.html @@ -0,0 +1,15 @@ +

Model Profiling Time Distribution

+ + + {% for header in headers %} + + {% endfor %} + + {% for row in rows %} + + {% for element in row %} + + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element }}
\ No newline at end of file diff --git a/profiler/advisor/img/advisor_result.PNG b/profiler/advisor/img/advisor_result.PNG deleted file mode 100644 index a9652f4ca53ff142a5ebd1033075aad54f8f0297..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53557 zcmc$`cUV)~(?9Atwxg)%K?MuO3M$e|L`tl1lqP~8T|uOVNSBgCMN|ZgNGCv4dY2kP zNmQhl&|82gkOT+;Nk~Ha4W9EozVH40?)|;bbN{&ec_2G0Yp*?f*37J#&&=9UH%$$9 zZa=tv!-fqzum5%R)`kt6;2SplZu!R+aZAjbS0>_*-~4YGUfxjJb9i3-Ws}<_lS>;m zlp`eg_cn{aZ+rNcjsJ!XyPDSjertu~-`}vIx8wTNOO}ru7X>t=>=2Y9!A3pAElP(% zeFBJVOzFcP+FLWUGY@D;`SofWKXpzo*F7nxcd}*s`K!lvEuHpF*|qJ))<5%BzWSYz ze-q@aZQ{MSWozDb-EUUj=~ge(x4ft=k4x0rp9{V}(K0HWWZFsI_A0TxT_3bODLKv! zuf-;ZjuhHT;;IW-j!ss$W4DRP{CUKjO9B1o{KO8jtiv9x>SNx4#L#JGeFu1kN_W z*{-R&m~$Wy?*ONO2)BnVqW90XF2gK+)*^68NTLYnx7^f8pBS&xadx|@PI7ip51i6s zPz1{$B^+I0S?vOpTSZuxi1j1u>wt1`r7Keqv~(BhaXf(YQWc)b?{e#+EQ5Gx`hsQUeEb3d$28yw{aK?%kvIVXmxvN^ zP{|mKHhNvL#K+y$kWK*1wiHW^_hYwaCJ{Mv9+(9^Ks;PTg`W|zM#!YtoE66V{xyyh zk#osrZNh(Pj<%U0|Lp~c|2}-?5K#q=69&WOq?c;XX$3De+`=+mfX)!Lg##7lb2{)x zA4gfZR$c>7bb`p^Uy(x?J&}?(ZG&^oL>^ULD2o9#EiJi$ya_j%U2f9%>2-~AH7+hv zYB+Mua%j?hTWWZ*?Vj_-Nv}P0bECzULwtO5o(73N6Wb7`ddEfId(7H|y=x(C!&xGK z0o5s5Q2@MEfOE`9a9+;VS-7BCvSS~53c(rJYWG0+2#c#YO|+VEql@GZ`7bAopkP;L zFE>>_-VR&q7$Gs?YtOv-^NMs@un122A{$2HiTK$@lX`RNvCmC#WW+)5`b?S%d^pK< z>9vhb5WoswhL#?@^adA0x zq<*$cxKP$bC5!lYk-d-#7nIRwK*AY18Hlr4!t#7ZZ#|;Ud$g+km!#h68~zt%{!{zl$v`?v#DHtj z9BgcS!z|q=<>lo`a6UqG&)IizRMmoF2BXLOAi#GIb)k$PUzT?y)GM{@hm`zd5bFWZ z$`J^3HHOb$d^@pbXx2uUVta7rQ*m*Jgb&Syoq+HgyC!+o^dw@KC2S#BdKP3N$XVo( zFrU`6@5P`xi7PNKVKGtJPl&$e$d!#tj>h58Oy%e={OfB|d7z`9N5V5~4+j66Q_^gh z8NZx2h(xVLp;Y8LMsSV?Og)8jT0@=6xWyW)aP9y~%fqwwS-$TF0#PJnA8Il%K@oiF zO<;l%_@vC4w4k}(_jB(D5&GQxG2!drh9+>7xs<_xU^@yAV^y}?yNbl$RgFaf@)CEKZ^mXJ6$l_s~#V7+oITFLMdSo1jc&?sp2-EUxO|q8H)} zkNAdpZ%hH%eQNf8cUM@Pk*-3y3N&d8r>ZY%WI|&El?*lCnTFLmqnp(8zJ4XurD=|a z)PbU&b=7!yhEYx`>v%$h+LvN@W!(ysWlB!-nb)9rwImW|KFgGL@UXk$WY96sn|-CQ6rVR~*l9$c16#L2^Cnvp zqydR`kg%#z1#(WL9cef+zzbM2$nQ(kY5IU_cK|!?yOCHxRfXW8h7vypz7-d?b+JbIHjHaD<}MZ?3~e}RS5jn8jULUYZ8au zre|NBYh=G$R1_Y8Hyul*QiuBrO{jSf?te)H3ZHM2isp811v-`SrtDBL6#4J8Vm(y_ zL!a|ES;*nY&J5q&m33=V(Yff`QL8=PO zsT=68q}kuj?_+cvJa6mMQ_Cd3nFQurOQ&+jXSCFV*4O55)1+nyxUCw;2KKCl6l=g1 z6(&JR(42Zzm=8}Xq)ErDZ6CU;gY=?g#Uc%)EO1<5(50{S2%ffCLzG~QDNApir5h6m zxxibQNxah9+ecz`3nsLT-02zu!K!g|NWwj%Zd8VI3K>zFyV5EU5JimyTVEkRMElW? zch^NP;A>maw`Uudp=D#NCz9cxz~$j&b(AV0aI@JBB&QOK_37wCKOuD?>zC>}4;OL5 zgiFi99&ZZ|-t*U{Cx>)v7vC6X*eB%3@4wGV+W6pYqbSqWD|Clg0YHA)ZC>`frc~u6 z%+9s2;0^6eMUlSKX4fcR(823Q|y?BSCS6F zcidAfI}a;3594@GI(oX$-fxo{*@!moLH82(FF{RA&sz#`fY zjkN^Zzt+8VKwE%#4?gNrTUC-@Fj)08_vQOed<|O3tFZP?u=ykXEle6%_{=ib9LSvn ztgE_GkZ^G@nUv-NR}_sb>pqxTWCEDK;d|Cr$^syvl_UGm6MzGtLOg#!;<&P~t_{<4 z+)2QhMlZtmaXMm;EA#Em$A6#(sADm4&MHNZOWhMRgBA84W$^ruD;vKI zC%Ik4@||97>wxbMX%ta*1?K=xT(h2A8@ixs)(K6us~Q>Gz1!dlld{pZMB- zvo8s^D4(|iGcLd&;8Pc!+YP^C$2WJJ91O%I5Tnrm-vc1c6+np7mm}$ zZOXNxi_8$)e07u%G4H2MVxnGXuS z)Q)Tv+dQ!ffatH;$u0QA<@Cf>q0s0!@ezE*jv*H(?X#CdH;dMV=cnlM6WWrePSP{h z&ge8{JBb;*Xs;SSEDsUt>iHK<*p~7!w~y!v@+WL{hRIQ!fhInu1&IA#W3|Is_%kRP zGlVd=bJkixJ~h;hnTa_$rx;^6l0r^5zmL$WVM1KYF}NAu(3ky)VF=iz7vLGs-J%5+ z?oD@i)9w{O_mM}=K+c~wTuM#Op*Tn9_z2H+R!zMQq9rsA$;UOCbd??VWgeL8w&@1b z66KMvUd6V`Q1=IK|Gc#AeDvltiamSVVW{Unre`0Mq?FUo{DfGUI0}6z*?dAVkYt z^){F(5{0QNtk^lAABD;KQfl|&_9Ta`e5$_EXl)ziv4XxTy2?~^;@2u zq-_#U#^X409nmfUFht=ewBQYmW+)>5kYp!Zm%9feh=7Yj$+!VOe!S>tD?m?i89uvA zLhRR9hOcfbibn~brVn9+o$zTZUW3VOC-9}NXn&k?qs^?-u2;J3nd8KXI%!#O%U=Zp zAL)kM0Bc<^UC zrFzKRfV@+ZZo(L17QX6$RpXW-2G^o;$wt>oJZ(o`_#ofT6LxJPDU_xwk@2ClKq#V_9zfhyr?9fc3el@maaXf-pXAu8E@+-Kq4bMs+-pIVdZ?Y!W|b%S!( zvQ{{CtsKG%tqrdnIW&oEoDyQq+Q)%C(cwJbI0*EwS!DABaFC07*}%UP$6&tZFNkI6YYA)_Y?&|R3Z9}`JS?5(DUo(Gqc#Xg9{6{ES>4&OfAXO`h18*<@XZN)SB_JoQ4$m!mS62u zQT7lh83+qoA&lw`juo@|2n_+%c(d^#eGy;uGJNG=oExkvC3i715yS!dm($(urgfp} zm$3wI5z|S;Y`fSug63IQ0`x)$!PR0*k|XJ58Y`{osX>!ne)NL>Z7VY`3Z?Del8qD4 zAV86D3?)c4Do_W3AS_Ndk#B;|xy;ZHU;B7n)cZnzwx*^shm0^?V`}9n`gUiB4Hus=H<^gu#%1zcmuV()=FA_&iKfRib?5%AD$@3+2e1ac}c=N(SJS~gdnVa-a9wE>n7P7SV4Dl(cW8Onp zD^SZFUf4LiBj5N-Ze2Y(Og1BFUZH8LlS1h^u zFNe-(c6p{AE!FhT1CtOn>MEi3s0W?qF~3n*!DPfzL3xL4d=X>(dSiF-sN>)n$e=rJ zOW@fvHEu0t*ZNJ@mkAk&6)d$MF8*ETA8^o<@Lz`MXNAxN014%=qI3spfdo*PO%>fS-YkT!REkgz1x`8c+TM5JosCHXaxZDSP!{lqCP12uMVGs-l_C~*mAOp!d6Rc>gWcL&)m^g zeK~QMjeF3)`dbe-s7$V5C5w53x_6qmJ2x#eQ>&#P$M3||4mul4!@XoR1j%qta`z#u zKBwI*&23kKYT$AsyG~Eh&P%v=?&__bheUhAdh6C}9z^)WG z;O6z(Fdl-_>I(*J44CLahnrWQk5h^?Cv_>Wo(LZSJM|d4Uvg+|_ z?2oTELz#kmjOSG=cbji}3j+XG>dGJJtz^WX z?5tCoaK4*pxxYN!CW|>PUi|_NuTOK#ae%PPe+b$5)VdDNm1vgiV&E(efa=EGXc@;T zu+Jr|ZGeShbO`3f1rS?mNvq83)i&nPar~mPoKRG{X74Tft`ss977;Sg%nz}&-k~^) zz?aE+J`hJdBO;+PsTzSboUP^}OD-@bA)Ub#c@i^Edg6Idfa`1Equ~*%WFU00ymdAY zwvcxt{+h^2-4NBDg&F|5gfJ*90wh@(wi4~f=msXY7Or0Et@F#@2^G{W>1b+_YBaK* zBg$|$P}la-zep&zg-tOs`@hlqgSxu9Y8%hl`3O&Y8L5R4E@38$#}6cTmVu7@n{D>R zr-16-x{zkG4~Iu+9%4BL-g#hJQXl1he$T|4#20x_DfN3fPX2dtk2pqI`^_~y9&+!6 zcA#DNH8>huBDUf~`Y62H$t`ORd=q;j4b+ce?p4s&z2DhN9Se2p@}<;|ALjW_ z4-Z#O*bQtSHtX()I~3j+IC1#xiPq!~Pzy#LLuOp9s!>tFZh~`f$oXjk?>28#KfkA> z*35fiV4rRQ$E6%P&1T9WoTulxh2xp(MH&cuK{O3aM$~w{^ZyB@E6HJnCChE_wOCvL zgjMJ`FC0@Add00BapTW<0vfxivg=`v^Pl4BUe|0?U^QOk3Srn{Zp@;DC2i6|lQ=xj zKFaP2$lylxWkc%?Bna^=srx9@;O5HMVd+%Edq>b6S;}?YvZJ>chOo6OoF0JeTkB;m zn$GQyI9}Gc@aKgelM?hsdEp(b9usnZX5%%(;H#+4nVox})uFFrV|P|=pG$SijB>;X z#o;hsw@Ln_27Iq(GL}zLM!Nk7r+GQ46UKW5L{3j|nlAQ7q;hQuZEoRcqr)PHg2GST z$enA-9*XG?3TGBMl?>VSxm261Ls66aM-QgI@2mUrTj+Ebs!5VkjEYWa`}T*`LjLiY zrr4~jV%X}rF~(_FqugT%yUm6n-;z{qsg9Vwu=mt8bgTy^(4`~4Z=&x`m5j@})~LJ< zpM&dqO)c&dd(V|lx^CU5YNsl62$V^lMi)(yhgNU8iTU}szMBdfB{X1vz-?y?tp)s} zp<3;|;;6A=z`KNF@go8PGr>YfaEa>}_w1K40ls2I9@xpwTzeNTLZK1>=}w6NtRJ>_ zg!F=fQm)ZEM&zv)d-GTDossRLvBQjoNgnBi^u$Js3m+RAyknA3JdE?Z9Zv@N^T`O< zY8H8I)#kFwjl04f5kpd2bHZmvUYgen7T)IFbNzGuG3@^ogR1^?0)SJ@?_Zpu8v)k5 zIMdd+WAy1V5@3O!h?3@vPd1yi5%+UO3Y<|o5(0yAC)HMcoip?=W&3&j8}Eb1_JPXW zAlV1crhs2&k*YhF)Fba}9rY8+!RuuP1Ef9k3 z<%rNZ;OvV-Wwlv^K0?~+z1A1IWYr8}_3v_qta-J~#gW}9Aa3z7XjaORJThL&O!xb( zZy%0^?lO`*(BvlHjhfZ^aKsau|Ks&XISC+D{Vl8GbCyJk+fNFU&x%67isyW7ea?-< zYy&J_$zW=gG0o~C*0T$nG+i@oLRxg~T0z0&txjR4Sl$n~y22Bzph&dbv3f=*>{&&$ zdjW7pupdmJXc>191c6fRNNX!Irb^wYp=D9voh23{(PbAZFciNetP zlny!)Rkk<+X?(pKhsHul)#h-Gy+5h?)WaAG))K3+Xb&x|s$#J9Xio6v+lnEazieGOT}+ zoKplX-4kNv^c{<<>-Jjrt2uyXm}sbR$m={JY+lT%X9y5~MGUcuqC?R8em=#94M%6U z9;cYynW5N-6$5&F6wu#;9eegXH!*P^hfe{) z&2LaMy`(Nn*Z=Ce)5P65Q-g1+!C{WF?| z#y#N<>lBc?%(yHWPeDj;Q_p!4AS_m!CC$2o3xYlpXb`oXu#EIVDI?kBmSz!tHy*cD}#6W-nIqW zA2_Y$RpYS+*OUEA+WGU~RiC`og9)!~!W?ca_{12|;9zUUd7=rJpr+|ad6kmm;d;({ z;)4b}mU{2>2{D;j;&_R&o&k8JP&${JDmsRkN*bHWPR(;TvpH|M2#%$hhqObzUGmBJ zBIS;F_`~=HP9E_DYVC$xW7cZ$H6NY?WBH9}WC~C)KV`v$hRwGV6<-S`^C~==EbS@8 zWO;Sov;NRjRytQ;%0qk9kB{b;g&R+mvt`xFTrLqR+w0Qxn3qD>U+uDp84xI?ksjsd z*t4`;-Ay(lxB4ny0~HrVdAP4FDlz0UcWaH5cp@@hMxL#KH@MQxJH6y{M25V{2i2V` z;`pZo0V96jc57VPm*`r8x2r)B-~BF;?9rF1KYfdRo! zk#+D~@FL-(5gjGg6K4_FIem7-5u|EhW+vwLl0t%JRKvlpckfd{WiV^U@I~}&iIDwm z!Ou@uJS_wUu(6iET+tx7?KkxF8U1)y=_!mG+RDju`+$&;S@H3}D{a5uiNj7pYoRj&$4Z>pG6 zJ+^BY&vk9W94@OjVwRJ6WnfhI={9ssfluYk)K~Xp`Yagu$HBEAv2$yh(4NJ4g;{-}bc?vC&^3L3jg%(?{M+hJY`M_4auV ztP7RrR{GL%b~*xw zdkZZ`8m8CSUMOHXkEB{EinVX#Yc_kJ?eVv(%lE`?BqXW$xQ5+^;Z6_F*RI<<1G9{7 zK%*@66)(G++dg%c-2EY~kMS0BdvSR4q_e10)@#t7{E3WEhwUQrE@76Fr)&>ggfuz` zR{;b_r^j1iX&ZRiLzs{GGY52veUNcUB-m4qcC=Cda_|H|z4bXEtAek3FSF+p#u^bk zuNNzyXQeKuX>d8KO`r!DY%hD)r<+BOkhw}aa6)bZ7<0Y$cxww|ia4Z3ThT&&=q3rH zu9N7cIXPsrF@Vq?X+=_o7py@!EKs6yb_-%88h&c|OiPDdmbOujtOopGOx*aUPNZJz zq~rs^6Is#29&%+omoMKDl~jOC-F+(lcX%)4&x zG|W>n^$I$JzOBSUotDEq_&^=^<1@5`Le{DJlJAeyt%4~h&G31n356q-ScJ9OmS|NZ zv24yv&&>qRbE!%{>+|58X7fX2qT`hYk3B<*jau_=14m@hw+UxyZ&&R9B2ah*o<+lB zBjzHO%cnFM7+fm40`H(BaFdEytEKxVkhwYy*hE*tOvjyM3WC?LkuGquHdj{P^@@5i zWakdTp4E`^j#Vb4(QPiV;&n1M_d+U&98{(-J52CN6gMH71Rv)+Yj+OcJteJc0V`z;K-i@xvPk&sG{lmpQ8zsQLUzzTD z(|=Xf<4M$?iNvG9#spX{_`T}h;m-iT&}UK(e5>lV?sxrsao(}rv*7e(HL$%>)vD*} z+3u{5Q`LSm-0B5Bu?@0s=GzZLE5^Km^lW=rv}$nYb(`+&<$R8XC=Ot1a8?oN)~+GC zYbx)fqX5*trJZ%L8<6qW{Iko{RNjcAOv5Xga7Py@AOG$0Q#x`PvsEUJuVk{B=j1!U zg=~RooAIq^?i%)t{O9IJI|)M?O95f$eOd73{X?yGj$@%z?L#bZUl~T=U7uX!JI%!% zQFfELD)*hGu+T}byN@STK)>I2q{^n*^;Mq{XGfc4RG>Z!M-J_Etskh_$3zG2678b< zxqM1jxJOKd)s+8%-iN}1~4e|sl3_w0X9vHP8Y9OxIf+pa!fPx39C9Ne{=0{ z>L9=E(BYm>e{?1F-nlWbU=Ra*zTI`lYlFyKlebpt*F0UHw?>jp+#fZQlRa7qMDsFK zfq(B7bXm8u>T`Qri(=!+-?Q2P5N`Hoev~ueB!h>!_j-RDUmvnof~h2%y?=D;E%IQL!GltUbC49fB@kiTm+krW!yis4@}%AmECnx{`O70=hIQGdEfGJ zb4F|IXM}Y*aAu%-ezG|z7EbLzJ$&j_#ghvi&t3aqUgiP?$b+4O9U0%P`_;Q^3$U^Z{c7!qHa&+ zb%uO0+QJoE3kJLH-_BMR4cBd2Q3an{V7>m(RhOFM5!IzH5UbR(v7;;W?P!XiJ_SS! zzqO>;@|hr9li&oLU?aH>@Kk|f;|v+mVYb~rzU@XgD5W#XP7drwTJV;;3ai+yP#k<- zTWibJk4^=XDsdVSg<$%vDjhpBow(xa0?YGpv9e>W9x5=5Rs$S>WVRpNhSa)_$?YjmwJZQJ zT9Y$q@y6bs<>0~1oJGLi8EOgc6rAC#)YvO6wDdXK(xI%MfYbD*bOY3FPQzrRP8#=k7ZHldu_i=ucKR7sB++OG;q9FMXxE}E4l$|{JCLBDW;4&#``xI= z;~l6snPjsi?aQ7QclPo7mV+ETR>bCLj7_``-?*bsEU=jv+GH2NO=WI-Lht>#CVte; zBVNxFcm^Jk<6WrxP8J3c^UnQWxX)YDa8i}>X9v=B>dZnV?6Fe?pIs1W6i)+Q(Q=Ci zcDgrJ!tIcs)q@O;G!W&@wsyy+Py6?q$G?2$d>*}Z5r)|5edS>9xv$}lwou9MM7fpJIv#p-4JUlBfj5u1e zyk%p4DcQfXv*p+TDC*ds$fGOwF{(-Rca4MVo}UDlAb%TTc2Zmk6GsTNTNZ#8Xl})f z_TeI|LoV2Ja4Xa*CuAn80N+$Gsh!2~D2_QW%U0Mspk_3oCpdGO$C)aLq84)`odqq{pKiE%M6*)m8^cE42I^=4S*zlRq#%anyEn_ zIyjYI&NFm1kh}3-i;;IT!K@P|hsyPvAbvm8GF^jVJ!8@d9a?g7R(E9%oZfy_qhCXF+{Iy2VX*Ck#wTcJlYg%S7(E;`x)q6czi|5zFS_3mN@c&iPue4SYW+0e487qOezpU!NAiz$O6j0O9Cp7e!h|}9@9zDd+NvrloQ)Hr&28EC$lZ0Lk?Rj49$@{S#+P3)|Fhi(@BP0)4NxRV8N2o& zoT5W)fAIX@A)W;x)>Fw1=~|xRAhrzax$D-*KclbTg63H8;wLPKxe(&{Z>9H!m|gz` zjrpHcfNP55=w!#Y@U<84H+Z;^VW*^M*x6a`z;rKaLgNHrPJc2sLC3omaOg~TK$>(dE81b!8){aH~6MHe08hC zz}0|JcWqn8*R(=IJJ|sGY6%3|2_4s8eZe&SC`IxN*X+>_IxhTrmomCEiU;yYNbM^peLV=OqBf+jW2*L8}<1Xrpnc|bW8|5&R4=CLh0twHF( z!A=W5*M*v87`vVx0pM>~zdZ1Ox{hk8u8xh?`o*hT$!4E@@~`|svJLv6|MQ+TuR}v_ zlp0Uwl7CevhLY<>*aVaNUga~IC8%QBLS!X+}AfxH82snxQGeV@&01^Uyt%uKb`7!Q=RO23Z47c>j2pNa&4hhYjyFzPC z_c3Z6TCbpFM;)1bWKT=cb#NwjJ~}t_{)E=hNEwFj6o}%zvk1Xp55I%^yDn&Tp(Z8& z7N{NZVg)hl6hInmr)K53g+;{sa-tWdlk&V$tZgZjo#j_L^DL*JK?t|>Y_x}RHT1WX zMxfRpL1a{~HNj~ue*#83roAflGpPZ-c5aD!{`;=^TT4a~=rkFkpdfj=y=I~L8m1o6 zj&=LCmY1%uuu@v@y0V##k<_v#i-}gQFK06ViT*xbAaAWL;{cvdt=$oUGbqSTwwp2> z2{Vk*3}4cr>F@XpKU#D>M|J6>*I(6rW==SIvmt3IswhjXG_%&?NOIG<~*dRRY!{g`T=sBD22XINo>LzQ<^ygT?9GkL4;SG!6Mew1qS z_gjjO_oFEXt3OWbpo$E0hC7Gcbh=+iOS{rN9kwy#Ft+l~(7LDbS|!^bkoxBiNM+q8!0lOx>qX z$d3s58GgjAOUd;_SA6hf(A;z2J`wA+!!mtQ)7!~;!_qVYFokK<@5%8=&~eaai$2r*X!{^mAXc6K7v&6E# z#M8giTNg6?vdYZvhE9Ok`MQmffp3)cPeZov+@fw%ESIUQ$GnGuEcw&lmOc%=XTAiV z^9adleU-07T6p$j+zC5UEr5kQ{BFZbmu=kay6|Q~-@k0)V9MP((Qqp0^HsanX2OzE z%a`{(TY4|9uxDccW%@hb^|)eJwK+cw?Q!HT)H3cQiYMO0G5eUrDm2ToVYE!!>wc$> z7~Euk!+>6;UC^^;rDO4jNA5`GWpeheOPbgY;1?C>byiEzvtoIAE1wt8j8KNuisO_+ z7k#~X>OYxS%waIiI`Y#zRoe_?Q6&lf z2%_SPDR)3^FVo=-CY&+9&rHlnb{L8Z2!1h2qzB!HA^h;-l*0>GL-M)+ zitM4=oQH-R-Fw&22SJXqJKveQVp&xa%N+x^9Vu8;3MdzluKJ@zht$4n5<4cds^(By z%eBHRTi{t5hfQ#iWV7?Ix8;@}2!~9Pd?;|Sg+kKRz+Ij)WPBz4o7vuw#ucHl<2(~m zo!2*9-!RHIFZgYB&VIaga_2ky)H+4+lCjoy-w&D8IO(N@Y3?lB(Jv^veR*@7wk7WN z;w{Sh-U?&B4~DcqghNW6v{HprwM=+aoX`s$=+%Yt7p)ejhtc-gD7H`)$}-~JMDytl zxgvch`|M$B(wKi)s@JPD{l6EetmX}5Gi4=NM(dApqY7K&Ehrdv?UfF^=FzJ;eoJWHGg8s+EKRtb}4q+ z1c?oNXTjfi!a%+mB8tBj@=?QiDG>o%WG#H2m5oU;b%lx2b-A5&$G?*6my?&{!yqfJ zQ)h#>C|vGZh~0(%jp)5YT+m@)&DN6ma~9r z?WwgDf^HKK?iiN`=ui9*?N_=6F}Hq?rB{VpiiU0%AW-qv!j-HCo0WgAg`eii#RV1B zZltE=j)o<>z)mO~!z=;9J4Tca>3N|1v+77q6$cw43rC7RZY_+U3#?}+@<8X<0y52y zA{IAxR~CH{r>n?4mnn(5li-HC!{-&;p64okR3Z&`AC6@=((m^!{UEuBxwyB1b>sGn zA%r(}!gNJBRz?$+spwqn`;r>ibKs&wOiB;b$4bjpj`ZXUrDoN+Qq>&@pJ2cjf>Q^# zb)yE@uH17PPyuH0!6w90j>YNK}FE}-b9*yjt%I`nNoN`DnnS`o(&&(ukI=NMn(_S*%%|ozJT1W9FU1b35ISg)s6$zSL)s z&DxHVf?nZ1Kq}4UiMNdtYcT_9-hy@l>J_Lq5jI z!Pn_^9p{AK(t9+L?=xg~M!kXc1!FpMW8T5hTbRF+HL#kG;Hkh8)|E{YuFq*p{jgTY zv4Hm==pVxA8^sk;qx|+h?!`Hyy_%03=4WC7lSyjf49Y1RK27YUx;ozTK*i+$s`sp!9e8 zH!Vb&RH($AWIreFEUjXB{K{uZ_7y+cA*6i639#_$Y&WWO)!{+QhJ}$ebFD)WRUno_ zSp7tqONrA&1d?#3r+jYWAe3R~s5tKKqtIaMqqm~1O@T^7BZyC-oB2rnyC!Llt4d{c z&qHPm&+ux^W(?Oo(_4(zH|lUt$tNsJ{HiJ$e`2Yo{3c-Sez|xr<1WpxDIp7uBz*E~ zK~&rpXHswDy;4OUF4;{~O{ESCCt zu_pkTl7B(Y3@rFfSzi&55q9&4t}pJKK2iS(6-Rs366R%`^2S;p>c-rWam&tPi9FL= z?`Yy(*!5|{c+5(c*wIT=efZbXFc2GPANi*p^V&OlO9p4(#Z7ruFq zp4e%obevvoL3c)o2D{(PJ7j9YFV7gN3M#k#{hSw&pqANfK9gtJ*Sk>O9|HkhC^^UQ zm1ct(!vncum%s&rw?~J~#RhjcBkT<76LzjU>-7<0r?>ek`#H-kBcYDFWCFSytOK{) zWx3U%+kZMJWMGI}Q)W_H_t&UB0o}?ks0<|elSctHBd^-$1$0PDisf_?0`mQzUc%!Cw1^cA&9yKaS%tsUa@^*P4E9;j8$#JnLuD4 zYj0H#SUX8mI0F*wb{Zrs-X7XKM9)P}o(=BNJ44~h+->X(v(-$(rpL z1W$IOerKq-drjx}xj;5n=Du+v#RK81?|V}~NY@lH-l{zQr{{JN?k&#nDw&Y}37xd8 zmlWu;fUkJS%fKStH3UtnZPIGV^eY_AR=47p*pCmpoWpn@p_LJP9 zH-Obmf2Z)DZ~K1?sgg9|cdh>}THVmJ>F-GDnV+JLIl%d&zjv^mVWaLTu~*u}9~1n& zGx?LsXa1Sdoq>P6x9>C^d?B-V_^m>^6P!5XLWBtSLPkEc_ z_tCO)mvi6HxTVG(yrrL|=aYur{E+#a^TdeYl{h&MdK zk#b5pdqm3N-zCOqb_J9BH&$tmFENTd?7op(4PO`BhPx5 z20080um8^4vE*a_w(cI+k90IaI)={Ym^zdGQ%xbpWR$FX_$wU;^xuoK9QL`tZs$%( zNYgD$$7FHETO|5USJLI#yZ^y))}OD+-%~w;tNOT$fb}ce`(Mb^$&)*g-@R$UTUcCD z^6P^4V9N#kY!@nhK0ZxP8OQg_F8}!EXO;hum>vIxV;ed`l-^%1lhb@!Cv#DgVW_@S z;m1%JY~Qb;Xpb)=}dC--KPr>sS$0~CiAAR5aH_r5vzS!-Jsl7XHn7qr~ zcQKZ1=FSpt33ocf6sI8%{2B@zaoJ2xBSO6BPHpm3dwiK8ukpC$awSDvv8P;JSNG}{ zT{V~9%S)wQD%du7e?xVN@PO31L0dmYnU6N+6Nmo6Bi37_f2;f5UA+kYt29UH_tpI$ zYyMjwRql?XB0d1e-TJFi=jsp1$4k}!rRCf4tHj^QMB&g7-~8Uc?-sD%H0SkAP~s%~ zjCaW7{d)hv!#%sjMY@>(Ldl7zKsetYUZ=l$;NQD^JAOAXusVF{A0R?Z@28jl*bwpg z|L7eC;=BW;CxdX1L80g~cm3k|;0xIN0PZ+SJGcX*fOIkJ{!cy926r+C0v(gN81eXD z_4^ooVDQSX(xlPm9SiS&y+2{mc?Rp686zZU*rSLWQDH$M4=L(+Wi2|r!D`>W2W-Yx~Cvituqg!=FNHXQG6 zfzoO{{Lp{zi~pmy^Q*@VVkm(aJNPs_-Fl=-qtQd>Z+0~8Cgb<&1fN&1x63=fBz7bqZd*v^Dop$ z(+C)Y$IOJ=CeyxCx^w@wT^c*?3B7-7?2>VnX^GA&Xs|qk0SnY>Iq^nj19TM7OZaGh zQ>Vg2(o#Gx-G}W-$TTuSGr1Gj9O%7Z$L}={Q8)D!(FnY}WlGW#;Z#YqA1}DQ(!;J$ zLyv#BP|+l@v(g$?;Kw5k1t_gvaN8o7s8YP03{6XnzE)S#sSo>KI{r;nmCZU;4lzmh zWRx=pq;U^M6tI z-ce1Z-@ow8IHO}l2N94GE1>k=iKvKDMWhE1LWh8%_l$}nHPSng-aCX2N((*oKtd4$ zLAsO#l0bNmqx1dUd)Ix}y6au{zH8n5nRRk<&a?Nk%V+QXdAf@3ln5gKTQQuwb8n7Xnx^9N6mjO!R2V<(uv9I3~5lrCTj3G?iKL3wkGL+v^ z#eeh8$_+Eh`fhjz_<=eNE3)a%=s@_-=*1M$cKGVmr^BhhxBS09TcW7uo2HmV{H(e( zcnAIOFn<3N`&pd=Zc_4P!-v-Tc=U*CuGJiS_Tg-$$L;yu3#yraYL&;&M^l1zF189{ zA$VtrYeFo#i_h$Nf4YE!?^gi1x@V(jS1Nw}Rv>@mnlyIC?w*sEf|pwRy6O)t8Opic z4`TbXAH}GdvFQ}id^2Mw_CLGgeG`?a$x=21c``Z@;F}}86a!{_w?NY_>>ew(HV}dO7lJ?efSGAQ>3FHfK4Oo0&Vo`7C&O5qWkTt>^`*Q%smG&K1SQrW5i? zOUXSjZDilZzs0QY4r7BKacCt2enRQeupY`L_1l1h2_qXBt{8GW>P6mhE@D~9YX&`z z@gb*7H{(U_j;Pu|mxG;hU&pf8`guugE$LDhh zO|JIMXc!r8c9iRJM>x+6L&cizuX&l^h`xPD_Oj_j_1gW3kK`GM$xpYK5GsOraZLoV z%DnZ>&H34^H9;S=Jnmwgu;aSwk$ODs(HdCmp;&4^V{ejzg-CN zk~@vn>}_-+tP-1OnvVS1$IiWF8+C!-A7xl{TIeX_&_0sB`29li^(I|UrLkfO%h zre_SaD{L`?2>OrYRi2~WGsej#B^SF&e_DNMC!6WITMpU&8 znWKtfnc-nq2-OW3-T2Q1Tz1~hcKg%J+ z%)j~0`3FB%YbG4|ZR_6)J>PCPIyxT1?TWTuu15a5)`maqh`p53-WJ4|=0fw@bQ?wg zc#vWJx!h{TD)PU(7Es37$B%Qm?#Jm%BUb4h7<61zGFyRb?jv39foCV$`S)d~<8y-v z4eJSo_I_1(%i-HyJ~?D3`g$#OgW>W+GcMjI@P6Nxz<^;&oHS2E(xt4ALS5aRmgy4X z0HXK9#s&QlIslr|%p=??)HN@a1&(-T;aB}Cs(<}?$s+(f`JrMFng4@k0CDO6M1lTY z)Bn%G?DtTcOeU{?`LPv%`wb1M{dd83TJD?2rVsyp(LXnYR#HuuH)a24vHSPT==-NJ zQCacwe@!Zw_J8C)^PeL7Cn1}sOn>|*S9y`Jb6DFV(7Lx`4o690B9s3!I&krRzLCSB zKWX~Ta;Z}!KyLQq=0I7T{$Dx8|LN(7|H@DPPt0EY|9SWKlQQDMZcM^`tM~?|I#~So z3Hn=MytdY~udxED9x1iSWELa7Rc2hL&X@;np2f%u1eLNPdh9Qw%kQ3L66o&Twbu;W z*w^T*9-Y>O!*JNr`c|p-1_3Zyz)^F_$>_&WNF5+|*iObpR%Q=bZ*KRyg+LV{OGshYLnv$(zAKqdQ&J3sw%% z#XW}+Xd$9Y;)f*NxSk41M=@uM4W!E@S(xHlEZhevEe_oKi~ zI>~8DOH#Glw0Fg2WqmY!p+J%S-Z#jlvOBV_?Z3)?dk1HiZB!Z1misO_D#Zy*07LL9i3-_a3 z?8aEOlVqVW3zOlUJcimp)0w1E9r16@I{g}R%1L^S znV1TqLN($VDSCu`jo#?wKUw3gR(l!$MV?Jk1rpi$XvVv`!N&B8^I7P}M`i}lf|YR2W}P(2aA6W zuuCFnrUkmCS1R+BEzxTm&v^~36nlmF3{HgMH zZYBMoRmHDpXG*ra^dudba(IT7lVsBfUPd+pB@i;9o?^CCi@oA^n}qdHEw0FVG^JZq zX%pTgbn++Kq`Jp`HLryo%q zh04Oum`eT?xS82;I2>$%osksg5B&UO;PCWpo5ym{`mVi!R(I?iquW7y*Y;?>UC%e8 zohgqWB9{nO3zm*q3U4@4X-wIOr)^k3{dlY>Nqg_>B7N-p|Ju#F0?>AT|4c@_U_eHS zdm-OWmSx>o&_6F({Q9o@H_zIm^tYD6KfTi5VV3|xyEO72 zNliXx@ol&|Cc@!qqqF@{LkfRt#-tXAxbN~_(@;N`{%Ci39rD5agA&lYK$ZkXr|%t6 zY6HVT+B#+J3Ad@2@d*0s;*+(hFojF4JT2r`SR5C;>nUr2&v%R)_{jfCVq&`y17&kC zX4#o8=6p%#{L>uFoy@a2dH$HEd=K^l8VoG^HO(A#Lz*a}ZTX^P$zi>x{uO}Ut+1cu zoFKl!D6!GfX~tNsR(Kf6!t>=wf>o7&ZEodk^psB4LoRR)B=K{(n|vdQWo_HNjq-W) zbT9ev*IA05u4Ua1gY^jwfPe4057{XvP;|-j!0`Q&tVGcqNZihlX#ZLfj-cGUiFJ5P|i4)b}pn_WMx@zi(ti&iI22K0|TwQXQ|_&QCF-)ITCIs-Q7JC z8)kADpz)wXP(a|2&3c{$xik7CNXtb0E!f1eIl*W5*pKzci& z*WqMl%U$k&{_Pjf-vCp0?g%ZcXYW+pw{rrZzFB`6Ptp* z9q#DK$-j^>pBn_eQhvA)vjeX_7z_MjALpf6pMyz3=`g( z+;w{~r>RTT1K$O(dkbJ{DmdPXk!yLZ+b)Tiz^-Pc`LWBYu+KXr6Rz}ExxKSw+`P{cllKK(2C;oA{0(SLo{cgrW^k!98Wtm-K3lt8iCV{35&CsNNphRtyD6u0$g}KZSpLJsX(; zTaXHngEkdh;x}q$E?s9I1E_VL5Zs9a z#zeJYEaP!Bh4hm*+H~ow?u3QN18Ll>xT9bChRG^D zYH{Vj>0=E*F4dDL3y}Yqa4n9CRRFU6=cy*sUNG+UQ}v7#0v}#5yP~m(q4dtDoM^XP zf3Gp;cFkBv>^m)9fVav-<^J6jlgus^a~36o+0rp*#@Ksja|fRVN z1@}QQRtwi_Zlik-r#|FBds3?UQ!4LfLi+{ndK(0$^|@li?I*ZXqED=>O#;!#!rL3$ zeNm-|6y!!i4`#z5W&G5I2I6x-tHPbnYeZhjvZSU5j8`ZQpp1;X=RV(hN0e;8EtebK z9!OX7sD;t))Vh79xrK?jwa1BSeChP* zh@z){OXWtN7yq1{9M8WtX9HmBN>v47xo7^s3ZMBmQ~EA!$U<@Yz zmDEg%--OjX$!1hg%tOK^BqN)tJbifF@KUoglpw6iy=IA7)Mq$#Kl4eMtJ6@5LSFls z`F+~_eRnz9R>h|DYRtB@M)%^0uu9{7X#vH4RP=69rMs)z1@ju|-Rb0^!>LSL#-gEl z6jgq#X_8kwZwCD+-dZl8ba1kcvkrRQ(FUO3vP`MpSTE!u=2;%qFD?E$)mP~|d;48% zSuf7FzfTQ~oUo?80>bwPFZt7kEs_+J%%yj)_9THr1LB9h`%Jq3&AdDTqjSwQW4Za> zjb(%1Vk}jX6*4`gtTx=O-5)Xq&l<*9qgq?83*6SwMs9TDbvxmkTQ(3blyRIS!o)Bo zq-x%UE6fGNA;p0eBwKtr*Va%R3pb07>khw*PZZBnU`6v@`QbcwPXUw-+&fn5B5 zsY>`?g~|W(mw%$TT9`_ic_D%UcaL}Lt8|?WnEh>Hu(YxDH=*JlV;i^li1YiZ?;t6j ze@Bqx<$Q(QCwbeuXbr_hLUrdQC#;tW#&4uI%#nI0#mweDIK@rqTh{Zcn<=WvVJi~s>k zEAj7ez^<(1EleBC*0%C_{n+a@>@vBVD$Opb(bT+&UODGf)954)O7CQZTO|oy#%$?lu)D z>rMz4sL}fZ#G<}0l=TOKZzoa6N@y!|_moPKV*Rtdpe!W1BrUa^9-+x9Ya%r$grk)u z#AA7~ax$0#eEiDf1jO?;M2ncTdtX^9M4Ic4WiXwvBaT(C?{72}wnBfnhVQY}BoC07 zmIgStnSYJ15_=6Yl`w(ef-uTs|6aNq|71fQ8_Yrf-1J*_R#zscuqhPcE|26CjiA$Yqs#nJJsBHh5To_Ap@I z^|1ZMa?}WMV+z~$NCT6~1ro=t*K|;kb7P<1^H1_Ec$Kg3iq;>#dy>>*+Yoal-7D#b zke$R6!B3&oW`Ob_Uw`@Bd#!cMOpwn-c5%#5FjcpF`@qVGG21xS4}@5RVxNqSR7ZQ; z>Jg|Dga=eM{kA5!&9i11YA1ncsL30-Wb}9%1<>2{?gec18G|d``)licK-%NB0L`sC zF!KFqfN`ViD)PLM!s-Zy($mkLqRPAhQ1PBC3NLS<^qEzmq!DB9cdY%Y?p`xCx{|AR za$++5L@C!mF)Q(&!5OM6O;7S4n&4l4L9_OQ-oKBCoCo44r1nXl$ez!HwW1r5vHaqc zo{luKdl_vtq&Jq&;MMOZ!?fyfs9}>JlJ%-8W^pIQ#%g~1wvoTNH69X3PZdzA5OA!o zZzZv49IVym{BiPi;fmD9v(DrZ@haEuEqc6>ktlz%1g42vwSU*qn^UQ90~JTbgrZZ8 zJTQFYp&2i0K7a=uCQ84B=-fJE7l`< z(%pN=u=d`N2hQ|Zfd;ncB<(UdZfN=Q-Qt@_Z(9w`fG<%1tl-5ROFAPZZ((D< zzsL1OX^E2mt*h-&XhL6xeA^Loy?@cse&4#dkoZ4x{2QQrIi)s%3U#q(e@CsNJeITP z7RMHyc3FBRIJ+{+#1gaU+n}@3;M9$`Kb}r7l3m|XbjN1SP`<@-z9d{g8AeZ;_|L+6 zt33jqE^LL zLkHV7_fKqx-r2u6|6=K}IZ4}H-cYz98Qe!v#!l_s1BPnjturJh+f2}fgsQz?M`Mug zl86Q%Ap2$Ou=%*j6;pCJ94P2QZdfnxSFAD0O0ZxgI3HU`bRr!M90I{xAmazXE=nSCpv~57KZt1&k%zPRpz~q<)LgEOhV~ z{58q9F1@sHc6|&*Wma^&Eh1@2{Afu`l9lCDd^rWi<)+y!h4d!=L8%*86?@kn$1G+Y zelj3yLZlzm*oOJb1;rE*w|2N;qcvY*$M}q42tt<)thWcPmip4dDk-Um9KQ_l`PJl) z7rv(su>A@OJh7n%Y|zUt#w5_!`-!BjEIP}xl_-o)SV8hB{V_i&!BjDrb#f@EZ?tN= zzlrnoz~|@Zi(dcO_<&tQsi*hen)KUEEr$H5`Q~~exKbz#vuwTc%E1y`M`eDh_2o^& z$ru{`fu~<0m`oU>^eqxU7q`X%ac0!j3w~Lr9;()=s_Hf|QuAyJaHX~!<>?U4hJTYN?$2(VR@7ijWTTVf2ypiHuUZGo!U_Dt_ zB=16+VF9{G>_OWHpikY6gtUbQ=mE~VtY5qR+5E?7!C(eSy<5z z<^Oc$71gb$>XtYTpec8aYbzfBsY_POHQleWA+A?SzSZJ{$9#JL1m_PXU`z(sL!I5c zY1}%Zu7B&3>Dh6LrDbWliZ30oHGRZWH};>uEQ-17%RshR2t4-dD=wx`S7}eBxJ~;( zGc7_z$bs&Rl;ziAT$0RInk1LmwMap$*`ON3C9kR3x`?RP(gA77g)VMDL8_}rDe zdSws{A094RdtH71a)7Vi#jw;+Q8Bo?S|ntSaPcZh_6IxD5KBMk@ucsr?YC1(UeYlD zZ5Z0+#eILf`(0(S0?arPWi9hg(kRQr6}Q*vsG_n*!-O~7CUIoG&6(%YhzosA_BMJ< zZ9)VJtx(`=Rocj@XJBcQWb27T01})`vNGU`db=#a2oq!3tU})sm(IUaX@22W(VHga zsvRYSBQ9Hjr^vKWX_(OBetSUUrK9mq;r@%bfdh`$8(K7hj8G3FasCd#1x_llx7r)$ z&RQZj)SdMvx|n0wAE>T*8S;3jm|RN9x93=^q>a7qpYeEj!CL=RXIb(OG-Q%n1aZw` zZ2+ZgMpioNb`Wfpf^{A05ML0<%G`HhyLdyS3#&BIxtEscP(SLKgAVsQ=3$#w zkRYMls0mv=QZE~_rw)Ne{xy9D7{iurecIvoWWo%2{=<mLU28J~9t64;1{zE>tT)+>NBXt_v% z{?+=;NYzV7pAgZcQgwG%C4kASO8dgJ#GO%IlJ(~@k+n<=tMz<_Y*M|WF^SvcL(Vb; z{bMT;{vOu_w| z0RDR39&0vLOOJ_s*rM+znBN@FCSNwhpON{LXy7p3%PFnu<#SQedKr;{BVJ zqar;Ber7xE?8q@kByfPY^3u018OgW|q}(l;%C_IC zdQqcepwGLHCR^K;HgiFJjJ6%6i4t_Uq9!5u%5|p8ybH% zr71vorG4n|$@%=df{NyCl|Mn97t5axUjKV_Y4?x=(tm4k|4G#~3VeAvc^stK2di4$;H>zj} z2Bn?5ZC*C?_Y^-}oyf<{3k>$NY@hbKzK#lz){Nutd#)=@H5nLTvC|cU=k$So!f*d9 z{#R?po|?9dB8PTivv0GKd|H0O$T)Y>IOn`W(if?WR|6=kx5F*_LXAO*L!9MI3R>RI zT0Ee;JJ{J-7Vufy)!3=8cB1fenk&tbXZg%~xRp@0dNyU-n4+$w9gaLJ#;cyydSzTr zZEZec0e6^=t^M0n{`T3=s{&cBWPKTizE!S-@r6mk z)VX=XCkq!TD%JQxt+Cb=pdaBMZC~e11b-Onn_h{VeG?&R6!_xVg?Jags|nj95ZPJcFn$;~3zYCz#9!W6&IbWqwf&U7*$Lv^2_^ULjRdwAKqalW zT~|y;X_RMdNsRpPVmXdnVkrB-1#+RqrQYTpz#7B?rWdD=9QG7v7V6*qgD5P5T(A{7 zKX>>9Gthe))Vhc6{H6>9yZ_Dp1u8aWF5RoTyJ7)^&!0ZF_?_^g!RF0!P%(9KRNJu%_!?yDy+XQEa;Q{;KoZr&R}z5jA0FPG4?2oP4!!LKz0 z^b}jd=|x7B`-LwhpV?Ih71@db0K<8s27Zx9hoW}?ET8l1&pBO7pA^-{&t!j*Drf+54EtL1 zNZ?o(Yz|dSC}!T_YWvFp5Dd`_ZaVDEJ#QqJY_d^haG}{tXf@#oH>N4ZQB-qKuVJ9o zGxB!fHM2qj#=-c?>*e1!yt<6a0&8}N)|yw4G~XcLL4oE4DWKEgAAwBD0;#g2Z0j{u zHMa+vC|M&dg@j@%(GcORWAi4Ccb4;8Ean1y38}TG{s4;oHOXHluu^kRzu>Xo($Mx& z@2y@P6Y9-Mov|JT)jY~Mt{gyxG+ZWI%=T346`xzad|RtyO@kfvH)+gD@fMW@^I*+n z5AS^oNA2jB!;}1{VVVQHt+bWNz+IAv=|E2GQN!}ii??yQ)TVszCcCuAY0g$t0VzbZ zLG%8QTKTinKR2|H4e-=`MgiVxHXyU8nv(`r_5LMZJFA-?B0P-7PgDQcpVHy>Q}DjH zi+`;lm-Rx*^ewZ(-@ZFhU2Q-ppe*Z-yI(JN=aIR2D^HR3A2y$p#_{5BDu4GlJI7bU z=m_smgM)6wTTg5uHj3+S-?h=Z(zN1%z5l{r0%ji5NepZppKe!yIF{Apj!0I(c_Z%- z{D#axRh9~dpREkecj<4WYwjK!3Fp`tv zVdgb0Bn?-(gmTME>34Hh-5Sk31dxMam!c|9*ZWgmk&x^bGGJhhjoVy4qgEW*$YMuf%@uJD7m` zTQqz_JpHTBs93yzzitxIk?bHN9sBK2!WzA=iuFuX@GpL+?62jq;x)q?p`PV$9!K6R zb(prBliZ8zt38^2Olx7zQZ$o7aDRT;4LKEnaBp%Zv>1yKC3<;>hG=4Lf~C60N<;82 z@85vwTp-4ZP3+M;c#yY@tnmJAoF4q_uw9q|kJUJ*F}0F?^A1*T7A?CH&tTsNN3kTMgKa7|hlIxD(zcllXYsscs5u(1%-GX)SBjUx=#Dz>xlq~Eg|1~G32s~sJL;+ceTgLe1-8td^F z4f0rr{`TDd{^s$PDe+PvpR}&oVO9pUZER1)3O}}tY$!Q;7KZpHX+h7l=bTjVov468 z_TPuZ&qA-rMCVJ*~9qso@nHX1&Q4V=f~VyXO`;A$2l* z#~Tg{TN~L{xGIGw#e1r3*Z=vrRT@vYxE}|e*jJXJEGT(W(cvB`s=@GCcCI&{W$YA? zuh)XEgSl%0hrQGj+`RhAJt9}dtwtC{LkHQoq#11lvFYQILhwe8tvOj)?!t>;5ggGX zK-=r!yr?A*CZBJ9N;$L~O!HO@>^}f`AN3?$IyNjg`f#Qj2bQUS1A@b-=OQA(JIQM! z{uU00H|`P==}`&C+i}>w%{BVXe$0edi$*QAlWst*7el^{`T8xmT63S87hu=Y)M%Fa zh5>7$Dw}$DggrDom0}0nuWWn#^<6%%4qv4}a#_(zP+@|Er zz*-E=&~avsn8$5S=%UooWe!>fI<0BXA&jS@@KINZV-d5+FQ*#WZz@sFgBLespAsAf z>&qjSlyneX^dzy@fkVVLw@=x;70)y;FF&V~RftCGDUDjO;RoLA-xQL;OBl@z5m3Tb zQ&~cox_>~yj&_+mdh-b#!o4y#Uo*k&7|rP^5NF@nl7?WTvjq|P+4*bgmhmeKmTQVL zPYnpOJBiQW=0Py&pyfxd=?{H)$lQN2BeS>K4*Nuxu*6*SRA9a-e=qR_1k@4G$kIV9 zU1odfu^}}`tgx#MUh1^ommJhoEb<@la=F^h<6Bel!kgH1jUC+nX+pSvwCfpWTtD06 zGdGBjOIq1D+1B5Qv&h@&El#Y&e4A=DgzTwUkx80xjc##nLoHj(__va}K7Xofcvn%e zbQy*ZmDsP3;|nZ2Xv{8|!_%iDOLitdYHJ)tvfvWG;4gtV5*!2f-H#%7p*(a&ZrmNY zU{!!>IN!X9<8u0YUkzW&v1a{Vuau*xZ30dqvG)v7MGZz3zy?Qp7#s&y#{eG zRZ}2=M{@W<#u+a--(tph%IPb&n$2=a zF1vh0Pr6nw+E-hn2oWD}g%ixrga+I{E{XBP#x3#1|iCfbpH0%lg>Xd8RRqA$S&u+ee z{QFl0e$Ns_ml=ppZuqLzDrjpi6lv@0Z|KP;UxIjQtDEpq zn6TzROqVocN$^TE*N}M>ARp6k8MPLG7glmH-vTC9j-bBe(xil_OYw@NEC~=jDtSYwOU9UCxdp zcu~4kmjFAo$CkP5%j7q==qtTm05@LXnBoMUz!;6nu${4>YxsPwf%A{x zALC8BYBO9Gu&{vi+)m;krddOpT(-)Ilr+dIg}GWAK=}r=M;l3!S6?vxmOWj_EQZ|F zH3JgyutzU->>gpi%)Fi0dsNy@ zWd;&7Q^epqtgYXEW684aw6FK2(c%a(XA!#Qxjso)d&M`xvBtg!*1GLx{_TN1q^&jX zj_!2>f9K+=5F=1wXCHJeaV_OTxdu$zk!$4Ochms9z9DE?XCw7Y*WV;oE>w8uLcUmh z^JS85pLCAW1wo;}k~3?zM)C#x29V?AO7d~;1Y!7Cb|#3lyj$xbw0nW}7#l;{p5{u? zR;AX_mHwpN800NBm`uOSAs+!Fdh@f-Z&$AD^09-|ycIP|OQw$AD2dqlSeq^!X@Y`} z(x`-saU2u44C`z{a(+$5C(bzu3Q;bPSlC>ePD>=?h8>nvn;9(venJwpW%|KN6-!_1 z&UC#$HX&pkEgyRvaZoI#j+d0AuwUf`9uoqi;mK=R)dM=dTkukNKAvL}1md9RmCUud zX6<%aQns$Po56}jzW~62-!B?1DzY|9>e0vg*<=+rFlJBf?84XlF5M%;QR&Y<@m#!I zWv7}WwK1PxmM=$|Q~xsMMc*m``*s2+r3pr)$l9)Z7l8;+fu+IaN4{JrkXsfIDNdd> zfm`l9^Vt^R)eJ;ES`x}tZXlKLwhFeJS5LpqJg^*zK{p%_&Z>-dYV3z;?PzuIw$@+# zgUoNeYPDuy8jEkqUmY9iXDu6OU11B0XF-TcPl$(l8JZu-BcjB8{$RBoRSkd?k77m1 zVJ_7O_BS7$l_9GR^-oWEte+L!>ar}B)XHWd%*j#r_#fvYCBs!pf?dpWwd|V)Z0D3( z75z%}s2!r(7}d$GbQ+sl$m%>x#G6PTD~#4v!i};_hOfLb%L5LgLn}o7>;|3yqlb83TnIh?kRoX z5-57G^O8ltfCrC2QK6&v0E|Ys7!PG=EfAg}1-Ovf^eLR|M&m?7{C0IfHg$4|zU^aM zQE_J%U<=wln7$exD%ZJeFixs$#tV%i=e>6^hVJHbrQcj0_#9riXv%uVH{w-3lQ@cN zUWVzI5@6Jszltd(Dsb6`PEAO@YsP;S@mZ~N+aYW8xqz0{rUxx7d!4)e_(yz)uZp)e zOX~m1Q_{;H%cV`Odq?~@A^eB^Tul@~y1y0GL~E;+mrk#}QNk-VXnokH(6#{wku0~i ziA6$wTJJ>BzaKf_qas-~UDzH-y3$Wl_v3JOK*52Oz3%ZD7mx`Ox=wp=LuQ_QYjATd zu*qkfa(Yyp^+WlE2NvEoZ`Aq(n)dOyTp$Wsb1J*J7IV{^LKeAlz7Rji+_X zr+^va(eB2dn8D4Dv#g>=ovqK=uwU@TX{>hzaS3Hhi>1m*bv5A|fopy`5eb?oRhrjT zlNr*E_F7^=)nDV%t8cCeA7uw{0%s4oJTQ!;$~&v;Dpu~v%Y)(rNIyes) zpx7R!qs!i4e$Y7-Q77PbT@CMIrM2_zZ1YlP6r@}u)sb~5bQV5Zxe!)YV4H9jSr8U* z&r+A&-i+jCY(A=rzEHDwtt7Jf?{`Ph+%8J2^jPw!ob8AorNq(ENOjaplU$i?t}$PxvCD6!+4AkwtEwl;?u`z#TaMPLLW>P#lhb8+Uk7eadat%B zr$}n)q@)m=+5P=`J(GNM$m3!}n9Z?<`O1 z)rX{z9o|!WKG_K1&~I`{L|Q+?z8@HQrw8LQa$C>q)F-R{1r~P}6CKcTeDF%v4EiC& z`q^@IYvZ^idsU56_%1AvCoTskGs9_&vkDI)k9j7wu93w&aJy3PACyEcZOh-AYK@!_ zefm@#iTV3B_wump)eR7c;FD_G^?s_=@tg@kW*uI-Xesf#f7(Fn`3-g()hC==Cg6&Nfc209bGhLYGMclrPN&mrPldBG z)%{jGJS12%7cGroXIuAtO(oV_R+v6>)+css48imiVj+sV2fh-xn)HN{2gZ5jObcN( z$CU->=jO>|${4Z?N*SIkkgKV>#x8xbs-!zhKRrwx=7*pJzlfm!97W$!iS#*zb?cwA2Mm|i{4VGZEqD+Zqn6AAq=)&E@_+r zI~zQGhaGKV&+L_-1&1p`S|KRf^4Rljp-7K_*fj+6gVr*BalB10#baoP$d=e3)q$T- z3|Y=cCS*sJc!(WsjXD*#xycl|*c|3ml#$<}i~5B(!$h_K5kINtY3?6Ix9;8=Tx21+ zAZta>AM5+iZhJGWO_&+L$`~co-^pQ?mIcV0CZ1zu7iOfnPn=E)T-6kNbagi$*GH+3 zh;ac#ooqg0gWx9NWgrW1oYKiNVz6466Zf**g+j`^B@d3>cli)M2qe$)SmK>#HCoiZ zjNU|rn+>@h;T4i5YfRNoYB^>Z^e_029%(AHyT10$f!-P9ACMm4f8w?Le9S;_vhocv zCQuXeS>*ldR#$;Af`7#L38n;z>f`S<2%)&F0UYZQ2{L8IElEX2t?vVPTFY$@3RACImi@U-m?A?QBa)pk=jXy4qWBwkgyoO&ztf zb&%J_^LS9VTBgu0RI| zr%}69gVQ6J9KyH-)^^4W?KBRTpIRu@2YT$uEz_E{3}3_7@Bo2Zjaf^8kd(5K5_kP> zi#{)FaM#-BAs}S06&;$B!Fd`4{1uOb^>d84nt~5@`io|>L`UxDNzs~W0^fx1{x+Jx zh5f8GhKWZmb@b%bUMsd*Orl6iwU;{0hz>jIrEn$1m0EX=F>KT|!LQ;R`!tzz zgo_h>ftcIY(hEBdG;ZNm1=zmRoD?PR}Xjqi%-$Wawz{rg+yu;&P# zS#Mum+uuCbjIV8{J*`$Ln_8l~vGhxY{RARK%4v>SgO*+p{P$nXrbFtQ`=r zdta@zeZMEG9*POCMgL<|>Eyciij0=Od4jnmnyIF&4bIvnwNZssC01ogoW#fTUAf9@ zzU3~UN6cICe~-EMpg%@T(Tc<#<0D(I>lwg&oxfUURJURcQ9x4d<--E)RuM7PuE(W07eI<&u!>Iid*w$bGu~iZR)E5IFs-M0?`9 z5|ZV?uwhx2sFCMdwl`s3PG|Xs1j4~a4Yf>?s=dTKzYrA2p(28K1~eQ#N?Sj+KK0Ar zoXP&1_7Lni*Wh5)vkMrOxCcxK z-kXy+`Z9ek85acf+=1ptJ|$jwu)1&dNM-QL5Nt(M@jP-fH#F&Y+G$JS@!oTQ)MnEC zrcT42pvX>%#TS+jH(!hQYAEC`5si6SnRUS+ltpCH$*Tje-ybBibcukZake4uV8=84 z(j?ojvO2%a)~QqKifG4Etxk# ze**>&`GI!YuPKpW4fzZo3kJmXoZF2#N7rfVh2?R6)WnmzPK`6C985$)#67%XWMH;B znAv4hyWH#@a_^RIoRRigi))5C+jB%P?o&T` zi0lU(k#vK2Tfx8$7mRj%l;1e+ET1lpvyz&7X&VMLlvVq>- z-a3%ee0CS(DSw~@OeL8<9V*{Kt6pQX3YFZ;?$ zWiqM=n$t7z`8la8r2?!8Xh(gC+5Xk!YSRt)83M$DF#P;D*>yPqjLDju^rY^+w}MPm zh-#~yp<(VigoT(tFji)3be~R=i?_F*V?}Ad6MeGE5@+I%w3D^>xIfQpy_85cMhNR}wZS zlKSgXpnDSN#b;!3$z#X%bq>a3H?YGIvRB0XHqB|jB(NhGH7c?HK=g3F5PY!@)U^FO zhA~;zO9qQI9wk=$j7MTM{Dt(}47*|)N}f1&hKGox3o&@g(4xzN z?ye@mq+;7143613TaDxih1DCrs}Kp8n#2JK>LhHuPF;x?b`IlrP&Gp7sR9t7h7#6xutDo@Ku z8{#ja5QiBu1II7j!Lb8OF9`8<=eMmg()wF;e3TW1^fmlKrP7NraP<*j#$Q;md31y! z1s&Clc8F=qpYw}&#l1*5kMZ;LKUHU#8`E6;+P_-UT0z5Sg~9Oe3!l%{a$A9;0RTQ&eJNS;&! zC02{HKHEH%U*LEMt)^v|9xHFHMKP`o#LzQ1NJ;9g2j4^^5r3O}|7qZ%5B160B=<(x zuy+r+3X>a$dFx6|Zjf3^U^fpSH_9p{bqD!4q~Hgua*3nR=y={H4VL=dw z3)&YTBUBZ;ERL7DlocrhU$+N{LhQSn{-!${V{+B$DazCCt<_q1Pw)jdD`*U8R7wjD zHWFJdaa1+l91&G)*+?FG4zS>U$~jMB`W-wi295{bKg!|t7sO&0_$q%Ph)ltlj|Qrl zB!3>0WLz>j$}c_sh-Ey6Jw$3FKECrIjd;Dgo2yi+hhVGe80s+G@6oXOeh1fSy(3++ z^}TYGW*@mKPjKKF2>MwxY1?{Ko&qeNEV503gXeL{*KNhfVX3&7f%T4IT*2&>2p<_8 zO)vqPTzX#Fc*v8Ps0F<);hEffkgBjaXk()V2pKoc2te}3D5Gt~^DJreB&7kPB`%x(BEk4jOB zF#C9#%z1hjS+@t+X8WMM7{wQOt0+Y5jSga zE`hMgawh<@O44N{_Zj972GXuGDTu>TuMu9`%J+CWUDn4Es>KxJDCtkGjss_ zur*CV1p~~W-NDDfe3PRTxX#2$itMfACJ8PLP3i2F$ z2eys15IzDi z@^Zi9?Ehp+hUKuOy+CBTTRx{4%4?$S%&yJx}hquYkj%*{IO>c6gY2p(fK`^co@{og`KeDN%Zl^cq4<2qmZpks4_UgeF8l zO6UoMkZ>0I`;T+(9p}Tn;TkrEc^CSHlDBE>hgh6Ob?mgPY zES0cXK5svHMvYq^vAR1YnB3hMY%8NkFcD$;CTOL4u_?G~@9?MBp#l_TjQqGf=JM!I zt9}XLo#WNWdaMlQIvW%{KW`p?ijYwmuA7po#U~f1&sXr=v4J?q7kzdnU$O~} znxA=k-X9?;asasbjZ-s=M@49TZ?C})x z=&?rk&3b4#~1 z!Eu#;DnnO3GWO%!d8Y3wzv!M@PhUsRij-NH^2Vlb_=S@stK%z_o-#CwI0K*&TscHn}a&FdX;oK6}bNO6|8 z<+7Tv86T)Vb-OtAq5IF@tM5-A|2wB))Xo2OdojQy;OGB;VAY@J09x$-yV+jeXl-rgAoQJDEEd3!fc&-ERV+?9Y-Dv-rDZ?))2*|RlTm&+Jn0<1UP+c6d zZL_DvzJ2V?9}bh?Z^_!crjFd0eX<$=wFv5+FF0tNn?2I#^Q%#(lI(1oT*u2Gct4y* zSxU^;di!fTdksoBD0R1`4wKu5f>NGBgzLQt8!{NEx$hJ!AzM9C77=Qh@1=U#2$`4P zTHI+nv)brYwi#bexZw-%syvfcFSov@_%S>SNLu=mXyLjQKRlRnCB=uh*yw&|I@34p zlBwf#yZtVsMb@yhW0BE&3Xqic+PcgELb|8Wt=IyPvAmBkD%3KK^e(p+3<*MiUHOx0 zKX?#GdrKR9pI6UJ(EL0oxmG!C8U>J@w2wr4u?KIbs2ichv`x@b0Cx(?EHdhXH~e-C zUtiI8lfKGlpQr!fm!Wi#kH^JkFB1DNuLgB~dpFeZv-8G4t#10l4_$cxjUpRD|4ZdZ z^^PnG`pV1dt?p}vR?N3r{d!MC3etV`Lw0=mh+=}DWl~EH+i5%Xye+?}zR*=9eRx~N zC)NJ>$hIr$(l*Ri*Q2(X5Y^v_90f_dqCPDm{ z93Z#F29DKDuT~Oh(B*?Y@S*t)B|qx>mYk3d8WEbBkZ^FH>uj=W+2baFfmVnf0w!-= zTit;5Hz1wvk{_`L6gn#XHRPvHw2!Q{NWDr0QG}0}ZtJ$aE_={RCnh>_ity*@Z-v%_ z?h^~oS?X&3j>hNcUCd2E^2nI6UbzR=_p$S%fcjG<5e%aEzBNbP6JsKe!q73BR*9|u z0e&6G_^M@`udQNt^4K|r%VdUa!M8L5v8z%W(0P#p$w!lHWv%Bq! z3d1Hz$p~<5<<*}rtr~w*w6FR z_ZxonJmb*X%xM>n9rSeVS&_vT9rG^EC2dVjU2Z(3>OPlc;l^7HsqUvCHGu{CpvzUH z{=VpkCofq~$w-9T5W*KNs)t1~WA}LtPw?o-hD4I;^-^4#OQqY^O+IVrwLY?s|H(XR z_N_k;xA(xoPODoQpu)G!tLg)ESN4*He{27{&e~w2vx(~&dR&T}*V+)i{r%rjQGqRS zT?oG?YvNF!mP(xF8=RLyv9-DhKp5`d)Epv}6j477H-90)CN)231A&rtknfgDKfPSj)ZD46fP`T zMX-F~24Z(<1;kgcjB>uJNr*2CZpk9Dhd)z`ydrzUUF!#Knwj7{hX!QhDwjLob&gs? zMC%vJ@m-{aoiO()L(l#*ADFm#8^5o7ydFqhszZA5fa+~l@y8Qsa)?CwtIVp|7XYh} z#;hB+dW3m)w40JmKt=vxFuA$^CgS6VZO2-D**JTfXro5Xw_?i--+%K($_h#nNe6Ma z7`yICqu<5gzW~y`-(KD& zV3Gf);a0=r&G$5}c~T2)t#bWeg%r_|?ITykM_;i1(f#TXM;Dpa=P1^2?hl8I%Q_Cd zPvDt-h2GGti0n-L_VY+5@(D$*c z|77hRu$ZNG#wHFZ?n?nBbxc8*5yX>&im$`$gVHR=wrA~6Z^`ubZMzxTb6w$6`#yG+ ztd(H@!~Fak5@<1La$^yqy{|mw(1C)#^YB&)I_OZ4LX(I#$?93|l;RpW%&~C%~q@5n6r~_`$l!O5du#IRtu{rJB zbd{1#)V0aAXU^YP_frTRppjeFXK5nkBT80I++Y5%eWOuAbwN^ZKRBdyp)yOtV|aUX zI#E6ig0D{g7bfBykQr8yDR3TlgU`Y7pF6+tTSQjPwbZw`V%dvi4fzW+icn&NhcDZ& zC%$v`@9cGz+a!_+X3s|5!uw7&tN{ANY;pg#4907NM8$hH8EeL)iNOZqAq&O^mab8+ z#dN>4#&!|{3{0!`eY}EvG)=Ngk?g|FeI9kP#b5+zPHM2{Oy8p?-}Cxl=Xp#p3jURFD@3A8^aDUS(KuZEp~2HuaK6If zG$B}R-~~@r!Zky}UfU_?sG(k)J-+e1T=Sj2i_!{r6&+Vxr6dhj8?vIs#pvzD>Pa0R z!e`iug?B7yIgxF7D^>~kAK1G)#X)zUjCj_t`c$#S*$qY&tg7J2=X!FC)<0Yu&oh9f z@eS34 z)4tNoWy}Yttz<4n90^EW79Bnw8Acfk5BpkEby~~I<{VUitTeCA+t!oTXS!JdCAYHYB?S#uF$%Rz9Qt9Q zG~h9o5v(TKMlg}^{11o-d+bPP58A?H!sbwPN!ew6XI%5Hq zW_}^IS8D9+aK;7y(NjgXGC&A|bcFeS&aVB9>@-rl$;UD=vjeky#7t9o6{o}x zD0P;|4zGRg7pOXJ0ctW$9~rxM6zbFg(kTqq{4loagbP~F8r0ZXNfRqp%sc5))`MXg zJ#{t}TQn~m+p0lN>-!uv^HO8O8_F-P;lf+>&%??$cxDTK+r4KO^(f`tA75xYOK3_y zydew!qFsk_#61mGtYZ|!BP2|6nC;vVZ_1{c&2q@=GCMZ|iCG zzF*kR;hxO2xVG?j+pp?OruDb~-Yoh>{%AX`&nP|btC|5Gde%{Dj59pTIinVrpX4d@ z(u%n@Cp%s|y^@;xWPZLfbo=r09Kz;+ytD**n(t4GECa{VxeQ-@~%Dqc~kP5E2oHGg>Tf-|M7IM%VCvlrJy%{E1sUlxDpnpfyD!)30*w; z!MnHl%r$Tbfi&hbWtN|OYtuo=j=LWh8hwba?NGUs=|lnAQLrJSBq&d3F!TG5HB6pa zO&Z8U^wv+N$4$4rYg*v5&^1&-1H`Q2C($q-x_o}|9=zr8Rzqo{(ix`%2T5B1Mtgx6 zLP{|B({|0kLMt{u$Ue>eJiv8!+ydrqn-5jx(_KDVbo;5hy19223-YafUpS4!w3a#q zO-p1EO&teRaOv^}NxZt6#z}a!u@CLwA;X z3pP`LZBFJZ1Kn&2Mq(y%HFjnX|GuS)fluZ~hx>AiH2{2avPOnF-Zis8F2Jnei}M{b zBYvN0{e3`Fo=b(#dsJ>M8*7Zqy*PpuCk!nAs5oE|WQ&w;E!7C!u6!jMK4 zfR#t`!f=9o)-DcH01CjCO{4Dr94;XJ zdmW0NrvO|@1L0z5{%l+*r1V>-0vs&i>tW_~paYnk>+-qIRVz7>QgA@`nwq(_x$?J} zJ+Rt}&o_@7xqe+?I>rR5orZwxom+aS_ZdFm5c;PfJ^xAumDlVND(2y{@=578#4~ls z+uo%h+|{bj@3J^;#i?j>Rt{5tP@~BBf{MN`JDn z|5!c@EY>_#f`ONA%@UR5AMb96Zs7E1o6|>I<9Fd%W=`TIdhHI#L;BAwtlnz}=#V|@ z+Ajv~Vf?%l{idzA;+4;@vNt|{hEW6pT+b9$iCI#-#WP0jzw;QkFW?5>nwAB^UURw< zqYX~9XhJmmdMG|ROU#GTf*lP{jHq4f19$*b*ug8@%*$Hr>?sBrokF(~|3F0spilM_K*k)&Ycv^)ULxsG^CGlEj++ z07@qB;P;8b<7cPVa>=HnC1WAKo^kni52k=0t0VuN6!-s$IL-fo_^!t@3k|wadL6Os z!R}by8RmeRS9+4dhD`Rg9iu*3fJN=n@hPoe8V<6scG$BU!jZVynTWWqyaG0*Vk(M} z<8WMIN#N+HbQ08*M;1XAPU@9ImC@{!OEc03zncHj8wvN2ML3k9A33q~IH?gXNO<3> z6+0_}(E4hrM1-!hB@g04M25UEGv0lhlCB_B>S`CNxQMEl(Rud zP5RG~PKsQa4qwu=L+O{z1ShD@P77^t9sEq{R}e~CybQprK-8rbWy;iRHsjd=S1ES$ zV>k2N*F|B+ETF(r@uVjx%l_nNA_2C zu1vYLm()EKPu}i}d_e+txZ~g6heT{hcKEVzup+)&);Ap{2eGX;vpDwh=TibTy(#`D zSUN?~*p-{xe6M*IM8M~qj``43ehojhcJ-{PsG7?6KAC_HFOV5OmeJ}KJob}n{@vk* ztEfWs>@>H^8$*0@Cm(s%Aq(61*h15MDeM;SuVi;&RnFI{L#&lFYcs(~bC9Ad*kggl z9qxwT^m~MzY4z$0YD`&4k*GpvI)(J{iE+wUWcwU;@{w{nC>n(&VZ}4~C^V{%-s~Zs zoJ5~nE#(MZL0G6|_bE}HE5jtQFn<7!mg*3drND)+Y-**4pSyr>n|X9U6$+8>zrbhqP=m{G zXs)ehSk$7rTvXZm$Rm>;lL9}{D7lsXTk(D;w!W>qIanm-%k=VHibhD1YU@BQt-`z^w=y z4|TP+AUh40ZhY8Ub52EfYxYOHXlg5@13&t>)ob6{p^+oWJ9`Z}wRti_)3)}0k5fQg zOgS1+ayRu{#`~c-)yA9q<;i`v%AWOqv*Hxo0#2ys&%D8y+QwO19EU*7Le%TOueW?0 z#;qQ`mb>k#MGWq`f&K)BgYlp^97f*+qX~$wP*bwb`KosJ@y)RPp0=A~t1m?bpiyTT}DV2Ma2>Dlt2;OFitFKS4o?ZLqn@gJLlJo7t5|FYLea@(tG}=`4&qB=kfS4+ zwP$~Jn@EFAb^6-wJOE$n*fQkz6msiD&>}!w;1Q+A360nquj!da#lS$V zs{p;Ud>J4IJabjO(3cN^(#LlGs1*UQv#4-A8ND;{P-010!}rxl+N%NnYw8#)o5`UU zn?is0Fc0|oI$RiT{k3_XV1US9cm~>x9aYH%mH85Z^o}%sl$h6T)ef~@Uo())BI*+pM3Zc8t{k64I}$DCH~IET&O8XTQS|Fz!6q}c(hv=nYzfUUIK$;Oe;M5%-JNY#$sVdG-BGga z0IL>_m!!xz6avVQ&;v8~1G#xK*(EMY3}21?Q{rD4o{@T?p@Y&B$`6L{+&&J~zCTpY ztsNwglmEyLb<)E+;A>PysJftuZAhpMY;N_`)%#1j>W=}WM?#H?v0v`Cm{V>6p&w$dX zRVJM%Lc8i@1TJjbe7AbI*Dr+6Ax__W(TSJ}l6qU$|MgT2pkY=;P-g00Jr z>$NAiwFY{3&7KnMJ5hxZh;*xKRf-DuScrN@O{);_;*Z)^Fr1H#3GcGoYOl##=t%Uv zH&5H>u+7msM#o-1CpimKBP+_+W8aHCC5HMHBp)b?6$>Jq;P>n`-=Ws63 zZX@m2_sqk-F}fWM(oG#IISkT*i#VlBLkeuvx4g6}q>T|~Zoi$kMcPS3)4G`kXK}Uq z5_2>62ghe+yur%;xjQf659W->;0=#j@@q(GImZB13-z&1VllGhu?HB*6n1G0eMi2` zuD*m&L+=<%HH!CW*T?a`TLmaC{`p?#7uoVWeg@epuLgc48;tGB@DP(~+`5vrvVHt* ztM_Al{dg^;{~1ohFucU}7=Y;2THjwNwHJTXv_`VwK{#>fsL$3LBRVhY30DVwIpF00 z_Kj0GOZA|NHe=A1_fDhol+2$Y_{HjR>Qr_m%e{D zNjet(cB?55g7@w<=9QCrDP2I-5U`@0EX&C6YMHSf(m;!*5TfUV;`y&XJUJ)S3i_r+ zd_Wh9YK16!*;fnCCYQJtf{QB&a9vYIS@#=zD>T7m-0N1Rx|@5wWPPW;Lz7i@y}@`Ew4VWhY+BH{ zS!kHO{gXLCV)bPe-tZT*m~4&-LGhrX;(;39A)Y8-3Y0)qODhpZHs4RD@W?fMe@&c{{<(6X z=z31wd)4ftVLAR2QPMs`k#R#HJV2GNSd;Ndn-lCHEzrfA@VAlwsd_2izN=#Uesoml za?WfFmA7}=QH5;3-worm@I_~B2i}-sdI-(GX_u~C?kMuNW*q2GG@~K+<>?4zW8Dy9 ztouRxXY16_I*(2JK{dWNg_LB1t=YWpIw1*`77zVK{+(%m+-h=nxMs{bAwsWD1FGQj z(j$L#da_rmbhtNc^yG@)r3Q_!es4D<-mbRArwO8hbpni-ffD6Q`51-}qm3FhMK*Fi9K3yq#=GkM)a=Lub zG%>{7))ETQ0a2*6QfH^Jg>fw3slwc%03iZ>%vFFnqDPKK&zBo3L6 z?z``~J|5iZ2M)14p;*>YX_?U9_%Xm*zWcaCXYK%!QrFMMzRSo@8gG&kn~<3pm+?lO z)IHr7`zr+cpmt5h-K+y|&2aIudZY(4{+_4aqujk76tDGOB%PG^va)lNIpbs;);j`> zj__kwQ?~N@+o=GK!g}@Br$>+SHBH20@Ha1mlLr*pE%wr1CXP@hbYztysJ&{8IK>KROv*$>ys-L**DDm(S#$7z3dB%&H3)kJ4L#*(Cc<^Z<+OQ z4p!4`qBf$(f!g%gMwe``Q4Qg2B?if%5lLPW*x1=IC z|Gb^#V`%9*Gx%IkY4gl z^w!9{nupZgoo>;bcgPZ4iQr1$Kpi!-qq46;Xg$T?JJ&TN>#C{nw@g)y*6ESb-q3V+ zx)k#$h_Yp*mz$eutJ;Ie@9OavCYF^2ACK&9DAwr3w#c{^V?dyTf|MGKznL}04p(zc zHG5Ia+8;vX&?AnT!0@A8 z7za*$y%J@%dVi5!6$M_M0&*auiq4PwMCH=1x>{A3agRzoT;jg5G}P(Hc$KJZtGAzV z*i}PuaQv%!pJoABX81~`dRvBI3)UeOnSa3h$fPsouf?5icYGa!zo{5jhiWvr)|V)} z5Wbo2Nzmx;9xd#)%=ADMUL7SDETFpEiKAqND1*D5&pgktHvK(V`LGs$*tbBY(8FKn z*3MZATN0ldaSna-^f-l^h0kHUPdI!?2P>XyGnXNr;SBVHG#Y0Zpl=k>xCome>|Qy= zQFzvu_2O>tsOC_^-s(Ak6M*4ch>x0Ho#TK9hDDM*at!!=BQg5M(mXmFBk`4GDKbl~ z#SiCaJJ9?;Y>u=(ipUHR*r#{2^{&wPBvc)wXR~gKyB>aBs*H=5{kzwk2#Bcew6L8g z3t+MRR>&Ug8yKo%m|3t?|GCv~g!|I6QFe+qm-Wkqj;S4EdAJ*80!xJL8dxf>qofpE zVS_R`YaKpWd*I&69532KK;NpMy43QA|H)mCJw?R-9Z`dq}58wVh60=OEPM6^}2`3Y|bj#dl_ef@KlK~pUYA&AZ!&(0=C ztVL+gpMu8_U#ly9F@bqoHOr=Ulpw3BZGsofgHPl;NW2YF!_r->%uwNiYjz-(d;q7v zp(EJSJ*rN2q~L9$Zxy|vXrLg=*1Htfw6jfMB6J6cnQjh-Q1#XgqY*@kLr3rl*)kaW z)j7&s!ZlC};|2Fz$sD~%6f7ZkcOWbmX26kODsGyrHF{!AFKwDe8|oGHd-<%Zz;EP0|J% zV3XyzU@GyUiGO28qgw|q+dIm!&^}F6jurY$ZL@Z8%{x2!s}lcd$ZNkE&iC9+Yf7yP ziQ==93+hUKyS7ZZj?(W>s_|8RtM8-tHfE_xzhqO~bu`we9h*W|Ty$%ISiem8rz~1! z$RQvfXml#Gc%0g85fWvC7oK*T2Pe>`)~E}$*n19lR%l6S9FvuU(2_<)$IP8f&lN3b=Fc5*WsBS>h08jprWRQmWsjL)mJoF%?r z6^-2il1=jp7fL-A8zA2b6g4A-CVjs`q`t*`-X90fFO?!LYW|)Oc>fKwA39p4Ueq3U zUQN4$Cm(*HSc>eeq1h8L?+tGM+(!1dPHDnF|2kVk`#UAnmp3`HH_~YOsCatUKDMd;2{3wxJ~cUWMv_vX#=!E+qgF$)PSf@d!v;b-_~OOZ1_q-yRF;x0OIGbg zV14>cWYEsJPeM*#G%)Ho_t?9$W$8}&?LPPsz8k0hF*FJ+vZMxze)2_$; zfEI1{&AK4gOmC(G2Y`G0g{NQE9|sfmymAmn>48rC=)H|N?8Q+o2l=e=JbZpC%c=5U zxf37FZk{OI-Qh_gy`4EEG*t@||?tD3Jo zH<+x8@(~rvPZwdwr)MNZRLo{Ss|RBn?)0osH3D?sih#T;v;2r+j@(lA`_uhdk3_xs zkG9_onr5~YR_I$kWhK!ENOl@xnkkj(p=x?yr`~36)vm2ex zow|ja0MhMS*9tGwiLv=iDt&1C(v=E%8MjF3-!EJZp>$hUd~@RUP6^wo_ zUze2#yHE51cr#N1-5o+GR<(@9Raj^aXnW2c8HC;hLkZl(7O1 zafN1S$J3ONsjb0L5FN{cIsyA426Z9mKUrJ{r%(2aRjAzA_c2AJH9YNm@t20}{o{S6 zi86n^gN5iSG&jSh;X@&kXx8`s?y&&Q{%*^6ah{ z+S;YfK|C+J_+SN4zdcfJ1*92$JLYQSc*e1J*hEjc@5s|zpP{Me(>Ru$FFgrPEUz!A z84c6XFRE>?pn0CCdZU)k^x4KOONHGB_=c{&ogC=du8^nbYyc;V@tj{B^0aHBq;jq| z!nD$9|Fk~1^=(ojSV^-OOF5oWj84qo%De6$`y=Q~wx8SFOne!`c%ZA)R!s0uhXh9N zPr3(jkd`|yNFBQ7ay;GUicLP_8l9p#GFhNmk6gylwMS?5-S5&aWWQn=<H;qbyiNQ1{8*eP5Dj59Uqm#m0}*7Xs`>umy=wd-LRVuGrXb@0@MDa6y#v zQRH^hFGBP*GVv4{WCTpri#%lTj!6G@g*Q|nN4x%ScZfb!ARl;D&Pg*6>JS?3z7z?l zos;5&t&U65KJ`j;|7k2ips`}Ne%5_-eo);5=@rmRl?&5^e4!r^8&j#|VJv)AE_CT| z#PukpsTZZw-fo!Z4 z*A#8WC!VGim(tr2e|V18df%_KU`#*Sq2l8;y?MOw%!g5o2r_kB833~NS|#-7VEliZ z^vMfI@ilZdiu;T`;-92ll2INOxBP7HsPY7vV-sm>P$7MG3xW3)F!jDqBu@xzid84M$v+zz7aE?6kOAF!$S*$v3=8X1I@BR>*FEf&(KPhwk zw}HGzlR-bqr|Nt?ZSsq%`IAuh5LPv99MB&U+)|p`zs`J3A6PO zCs!46=rO!jiYmfQmqwJ$NVo5BvCM6!CRpavrRABqgU6&T`A$4*6d;GrPr0xZe`#m3 zwpBM}3IiEzHg%1}n(O}TvQ+nU(+J0TRQ#g-%M6RF8g#A6WgY$sfh*V<`jwi2hC`bX zNp%Eu#IkP!LQJCRu6FEYZLQAbndXgcC%;XlQ~6-aVGg2%P>A5zvm6eeS`?fcQ)St+ z)v6zK!y$q~+nd>28|=od&>Ygv#`*K?AkC&YdZcdvT>fzoD-*K;-qx*>Ue$pt@$Q7; zYUqt9P{$^;Ka@U9Vl%k(om)+?F}e$fE4Ll!&zM;$_W8bSt-gJvId#>dq#2deR6w6` z+fX}6sM`ox!BbJQg4;8N40_6_dfv z6(8TjVP68OqwW|60zL(P#k-faeqYUg<>x;5eU+iyEV}zqY%Y(>=I-6vvCk%tA%5Q; z`zU<^`TMFj0x0?S)h+M;T?!)|a%10RZ?o3=piLYAtGP$0lbHcyFT#<>5E>7Lo9=-L zp+Q?TVH ze@xf1R=wswMs>x6bV6tbun}UU@nsGkz{`=cfvOZu&>h zw|yrAQ_ZpdZa`D&gLcvhyJjwSEYF0s*mX#TNk}*?RjrQUHbXZAxSlTAJ+=ricD&nh zUN2NrS`}rXB8bW;S}b8sa$OtE=tPD;ma?=lvqf1M`89^ss0`LvYynbItPwU5B>quB z3Qhgeis#NTz+@eM?no9+V5>StZF{!q6SK0)z^`mvFr%c~n6!uLs$T%#9d#dA1vK4oDnaCCly3b*!{ zAAqF{;fLhR$d$iK-}T9VN~aLUB{l9c08Mb|@}$rdG~uvW$bh90-DKC*Wz6dX_@1?@ z5NCGJ0{;-Boyx4U!Hl6IX13!^?zQt%qxt%wS;< z3>9jESQ8Iv%&rTV!Vj=bLJ}4TwPY|7@Vbi`3RK_i=u*5NngSr_?_C+r|kdL!qD7-O2Gltz697w8x`A+G0)Y)<1)V-5!n6RVdlV^A*9 z$3Dz?VwLJL(Tpt)*BVQ}|0HldL)32>&60@hNy`RXn@Gx~0emel#*8wl00fXE9}qx_ zg%6p*|Z=&-57Qo#TO(4$tBuZ^91PJTyPe4(1&YNqqeFaOE9ON%` zgy=ybl_p*w%x?f;UZRdzBQDS6ZEV7#4hjwX#o26l&~K?yz%E-MTBb{p^DCJj0OOmq11W)%M|y`s^fCOL-2-0N+p|WITAU~ zu^kabw{Sji2|emLjd1lv9L1ZX?sPX{==_yMVxujh?DYQ_56VY@fer9sM02EPaz!N> zJWflY->SzLs#j6d&?clHX^FeBaP+NlTNhHH=S;{De23)vO#ciCXy)`66G;D>Tib0> z2hUf{&kg0cXnHp_*;cGZF>QfXw)CIQWA9K1ir!aak85w$%!T@u$*R`Vm2x;k)L*v1 zV5H@;nPbz)Se{8`^b9#;Q4zp^GPJha;g{^39`s^NW=eb$<~$kjq*9Y5z)Q>DcHKWP zK~mWFp6=J+g{hh!s7aXVG$xq`aod6CsdpX}ue#8F#YL;wSAt}1Q8ET>1b%$r=enBZ zOpu%fZRK+q?uhj|aHJ-CD7fj|hO_fV3nx&lj8rz>q4E?8++$r2*b@+T7il5S@W~^K+L)wvmu=Bcd3fS-KT zc=$NbP17U)lx96o25XSkotRYA)Ul?%ob83QjZGHj1FSoN#JvgvCbpZqEx^w$-CCQa zAk@@3vg?$-rCAuphFz6D5fB`Yf~|>wb`&Z}_|+lNsTsid>D`L_?9LEM2s^q2C?_@H$9E%?j%>vFA%V}$}upwJ^(rt z0`VA_v9jqXt722f+Jg}=`q&i@TMSEC z;fT=^rs9yC3n{YF7c{cb#5_we?XIr5y0}4Ubce>N`!XA@s#aB=Lysicn2|)(<=(M5 zjCTnx)f)ozE?|rUYA+H8c$PtKM%9u_m}VQAm>l<|vC54Y4+$4dk4ATdA9VdzF*9|c z6TK*?J`?+Y?biQ837O5Pgc4c0LBV|;|Q!Y(!iPoxWQ$fV>!~nWH)*?f#T5Uh0(h^ za1OisfAWb!(g+1(#?GN|Eaj;rGP|Z$Nv>XcktpX4^B*NsJ%En9vKuT_O{{$z%OApc zpqwg!ZJ<$De|mH|Nzv2d!|2gHd&;qtFoD+9FhOim!>0r&8Qu9j_6;n)2g$e`#%76+oOH| L(Y>-ePhS2vl3w>? diff --git a/profiler/advisor/img/jupyter_report.PNG b/profiler/advisor/img/jupyter_report.PNG deleted file mode 100644 index baa860a7893e1801337916aea37475ea69bbaf04..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 34097 zcmdSB2~-o=_BUEIiruY9_f`a1w@jJ0|4t=mj5*9sOJ{|;99HOufLp3@SjzMzPu2e2%GmT zyH`7we5CC6`-q?Kn>igfb-Mrivo4`+l+)9x?f1*JX9nIRZrOHYU*@LPb6c)lzpEX! z-C^DN+K>y*HCxUf+#dqmd699~<%0fiU$q(GufKEgyLQ36e(?Pt8nIkQRV9KNT7jaZuvA-{6ymimuMQ8K)@}Z1N~WAsT*%;r!MKJ>la|lSI(&} z0f%G6xV>sRT&54|EMIHJu%b0bx?N$NPEAMTV|vuhrVTSy^Ijkyu8o^xsOsi@)E|)F zfHMSDb)=A{mj;(zg@e!VeC!Vz)jj11E&^^wjKg^s91yYbQDE=bRhaTzHz3Q$iBP)NClj2KBr|Oh+~hBTFomxy6gNw}US3&W zc35fWl#PhD@1ZQgoodb&rAP)N*>TM4K1~bvN3C%p>lTW17%^i!(MwRr+C+{0Y7;r) zY=c!MkoGQ%1h9fjwVd~Evk`gpF{Y4YV|F&aq3VPW2#2&Yic{jj?}vq%=#4DH zh1bLslg8O_)LI|EC>PTDzgi89)_mA>Sy7HuU0zngGqaRSGCo1gpJ|K;5GHzW1yBhs z`foW)2wZtw=wrL(226Vk(UorcfY5f>6sDd}YZ+}xD=(!5Y#3RZCK8LgXu0#FIWym& zC{OZ+ehhX}-g{Xx+-8WAN~>D?qYQy5bgHNAJ2CwRIuyCr_Oqe2Y_{z}fi{ z+)~M)JZ`MwN;pc~`7657(%)F}B<^0!i# zU#PoU;ai;~6R#$bx{LV-)syvwK^c7OG*9s1JYUL?9uEtAd16JoGd0(>QxonhVd_!X z4LS_xV2``MP?bXobFI9`X={|x%+`ZbNLBg#yA56!cqO;m-v!}hX*e5u+~8B~RNqui zrDC2cr3kb^9;hT-E}Pwhlpm1|@tk}-Uej0xH2AvY2!9*xRR1@Q{&zSV#R?ZicR)&g zMA-V+$|;7PlYu}XVI6yGgRv{T{oEcmW;Pzct^%d>F{UTQa+#Dxp-!(|f;P!8w# zrNDnC{V37{$B-Re2rIT$b37XaJy4hvwTiih^3oc(nhQ70bF)RW<9rfC`5ZrwaPMM` z@76B189U{@O_}XTYjM9P_jp68_CIrzVF}FL#P_wMrKFL~0gtG;J{B83h#n9{_r62J zGGrciDLX$iGL-DEl0ye04xI~QAJsfJPr6(ffTSR_ycJ4c<*Q$$z7y>=D36oa=to64 zMJrC@WkI)AbLoka^n-J!qL&u=Ggj#R92J{$c4KvnNcU$eooKuuHXzDcD)X(W@-&Up ze5lrJX8k<;!Pshdv}b|M@$JS|d8Tz=?p?lIZl|64c-~)^;XbY`et_Hv_R7)MbbL0T z8^smXrAJ7G@>rtAz9i4jEWfeMW6CcZRt3-1K9ScezDbQgjw{~zMMtllcU>+i(fIc2 zboS~c`62J)O?|*lSSaj>bOYf12p7xp&U!cyDXno)rK}7?Z}=x;K)rxew~vmd!MeKi z-5X6hkxTBlUG}JwtUVJ{$Z=~Eli1Vg@8C~ah14nS)ManoISj(Lh63*bZ%!BG$7}wO zw6^f+h!X~{%C>)t$l9iUi~ydo&c?2zG2LQ*w8SK{%NMw@G44eX!p0MRu(RkL;p;#7 z2us@O@*nRn!f96dCDz#0ll2&UC+?bO+()gM{IlA3wG7cZkGP&-wY zR?JaL$UY|!_pJasg4Xj*K9lr66wS&rynbiCx3X6s$jE&3yM@MQ#myV~fV`Z`-2~DC za}O#SL#XH2wor~L||gu&pFpX zH3v*OkV4z!b1jE56Vw^2%wH!wv0l7bx5J4*(s;?neQl$5yt0SbxWAEWDA>%7D9{rq zz+!94C6-!C!+xhU4Wj``l6wDDtmeb6+ylW?c;dk^?^)7ArM0@1^R*dH-rlkXT9wBV z0=vQGH9sRs4N|C5>08KsgVj4zc{L=2+zcG2UZc+IsSn18rATlZ=a@XY78x?ZiDRCn zB|i1JiHsi%st7M|uP1Ay7`N45a64ZJiZ6HYiTENzOMA9*uh=0jOQG7<;~l%82e$Uy4st8Q%|H+!xGs1c!-6#xK{SO_TI zT{;1fWH9*Z?@Cs#`XaXizdu!PbjIv?SVF+oV{%vSeHmc^Jaw!5W+ZFn6qt6?bUwqk z(ir2cy4kx7J;nD{9zSQe0zNl9RJSw>djTOSn2=I~S%%+Gw;;5$JwF2a&cZot)hEKY zG6|V_)?!tYs8Ddy%y{LVQ`fap*X^*l|8>0o)P|LW#{MQ}7{%W-+39GmZU%Tz!N7d;;Q5{@< z>bvj%>$uyQu9f=l7roc|P4hq6xu6Mwi2iGZ~xC}wj)Gy#9e1d&xk4xew#%H*`Ie9;T-To@_%dnX4vK5^nq!A zC2<4e2^vdiuxwnskJ1L8=7hc_rc_qX#DCd9zsPdZjdb~M zBIsO~Dtk!t8(`R-2T;aUECu49Q({REJ{HeAnZT`~P8fYljdzJa@*a#C>@Wxwo?#@T z&c^JfDzmE&@oLZJ+MTk0pkFySXU?g3C%Cm>SzW~tnDVkVJ~X#c_eqPyGR>uDI**|BN#OAOnRSA<;_K(0#&b*VJTeW zXwh#~M+84sZ{!x(7L7SZ<+xucfEuHcMlMiGdGjg?f|$E0DLlgK9xufN z=g)Z-_9ez_TQqk6XW;#&UZy2Qq%V8)T&59uqfkH)Dcygcwn0xNo%;cSWA7EBKGxhuEuom{pnVN%aGco@60h_< zHB-kZLdhv4H)7xH5;|a7VCJq&it)^?Cv-*I3yq@JbKGe**2Fh*E=^<;q|6zRb}2hv ziUs-Mwnc3`IJV#k$GSlrLr+x8FS=I!$a}?wqfleZy)AmI@lo|?(g1tRu=9OBGZ6*M z9Q&-D%I?~21yp`xR;Ws4`@_`>ma}3jl^)Y@$tA^|qo0u(R8Cz-OfU~9Q5$YyM) zW|orVvZzexLqDmAM?l4I7Z!B&GLEuxJ(dy?zh3CAex}t-Hd6fd_2{GdSk-{0PG>Ro z`y$7)IJWSUYjC^jOoAl$a{b+~Cxf0R!>47gEuy}kOsZ$^(?1G3`12L9Np2XP_2QxP z0^vd$cZvI87Iv>4_FyWJ4Lh1{{SHG3)xt)~-&KtMp-KPnE+CN&$RZbH4dCA``Kq|oOp{5hEqaolOv$$NPhwp6tLHBe1c(BMTmVDeo zqkgpcrO_gN*ClI6GLHCphfeUSx_HJ>gFwl(9V?!p3J1g<^dTKBHAvl@%g+A#5Ll*P|M(w&}m@ zj|nRj+*z>EQ|@zG(o9k)N}SZ)fuTKM(9iY}b%}i2P4HycTGZamK#9z_JtmoJS;w=> zT1?D+W?O`L;k2K?QPD0>CS=gF_;NFUzNoO^&|S4}HmlAuv9Y@AqiD!OY|vzGJ1nXD zf=r`D;R}2U<}%%;>xf9}K>A>wwfk#TfBaY=n|XfejA>I5f%BGmKA0d9G#u4!bh?2I z=8I07Fctpqlfjcbd1xESE3vX_i=AX!?(EZcdY}hAA*66jIF*zrF|vg*=VI<7%-qnN zKJFn-py)#D%LaeU`aW4U{ZvK6Cv`xXNcG;aB1M{wS6p`FMJewEUb^Eh6{>4>oS26g zF{Hwuo6OFXGNROZmr_6-#}YotC(uzQ@T|Wx#hI6cT68vt678h&fj`?Ri$78G_rhWA zk9cGU@}T*JetuKy#B>}zWDONwyxmaoMK~rmE#h6_y&6XI#H>8)OAh;|<0dRiGvj7b z>>3IeUk^9Nr?onIC?0V4kHY;8dyHj$) zkt5~?GH{{(n3=ZF>crWJ%#tGOAVpiFQbcef&N5pDY-6*|S{X8jr%N5u7UOzSI(bqg z9Nz-oun_z_NYYZ3X<2i@k2$O$FGU4xH1K5k|>85;D|i+OXx1#V#TyYf{ZcHXe^3 zp3k%H>9U)(=5|%p!A{!29X|1KU;Uu_wEQjB@~qx_W6r6PhQhKRkY)(0ugbP1lxrN* zoj@qcOUlEG6?-PEJ#I>b>tLb0+Kj+*DN=80`l}Sn!tmHn?eYC4+2Mk~8JqSS9~vi- zN3H2>&!wv;hde=BnC!~7#MzC+{0U24+~KLl;xnQN7K)*t6y8rHcg3a*)iE!|c#bhMha4V4=-G7FtXt}hXn&*l8noyz|dYwrb z_M~vS;#-S&a|1Qm9z|C%YKK|U-kPkvh@)spE==lKW`6ljFzzqIba6^u9KCl-?Ua7LS4oO!@Dtz$tHJ}$`(Bj4|`EoQxt*e=?&-+7a4=Mj-y zGw)}~DnAfC^Al?ewj#d2&AEdXM4IRUs#<&|#Ly?PX;46`h(P}O zW%Zw;&p^Tg6mcuOIJj^b^!*~xlcLKszT7$s(2WCpdjdr zN0awX)Fy6V&mYOBYm1O3aQD4My!1ZyY-~x!(YZAH)d;>UZUw9RMc7jZNN@ zGeqX&o0fjkPL1`CQL1J;dY!uFBrM46rxQ*5VQo4as#DghzZ*DXb=FjMr#>f+f?7>Ra(QzLSCJ{mWr1Wa+FT5dR;gygFJQDz~)UzGJ#Ob84k5KwYJaJuPiNI zLmT*BJDBB64+24+c#2545(sk##dqfpq|CJ;=mF;cSj2L48vQR z=*f6w-5KS&bn4^qx`?*0w4uh6bz@q8^d&;^%ZxZyZZeQ4-905EKHV#Ffcu3$(_?IyaELliFp3wDY8x>nr>V zYE!I}*q&(2LyeDSShEmdBdZk8U7FwE)}8WF z<74Rl0)*xdXKNf%{6a)Ddybi96vW4rIIvP0X}yTNt`X5>Kx=k$lz=oi`JT4!fP6sG9wx zsK8C)740hOYr^h!gW#x4F}IL8J}JG7Sx+$!hZ9Ag+G8hlnp$%bQ=T%;PFU*U;SGeY zD&D-`Ysf9LQ?|Pi8q`g_aUW)L7=cT1wp7*ULaX756f>ETJ9F9uVSuJkhp2jm!oxM) z27y^aHg^a~d!leObNcuMF-Kjuowsl4^Hi~84VT#6CHlDiY%yh~hru5Ry|^%4ihXfJ zTp+&JG*`}RwEV3?1jAujynY-uoqw?qliy~JFJkD@tFi(LNkJF}+L|A=H-X~AJ?+#l zH_H1NZseYR?Y6oa<2tyxquMmf18-g7EDnjL`9m^lH6J2UQC>P$z;EPU_C$R0R}ULHC5v;ouO1$)t4QE{S`w<4_&RnKFibWkyz5@0 z`Zr`gg?UN6rd+LaPPHh>@=&Bz^EuXh+iWBQ*Bn~X(Agi0R#14Qik858bM^ei5s{(P zUhRO!lfgzZ*`5K#)oV83&^tY-mo!y;hKx9QdG?9l#Yy>W5oH?S*bU(+gQ)P4y?+%tD_~++R+$o_-8fUQ0*YnezAZNpW3{ruK^}^H;=Y zbaGr~8MwsPwkcOV<(f+j#Po|Z1?`E0%(lI3-a|{rh0g}=q_C)&Wmg&+ z3)$yZyR`UL=^(5jK0PRy{vAH3iAoBW@SgFy9qAEc%2>94I(OE(`V7mYNOUD;iejXz zYB=8MPA|2-7|6-kK;{%0Z>xG^o2J#=_@1-7)vO!X*}q4v)vT1j#f?=z9@CSdOEvbn z3wL~OB*k(Lw_D$ft}kFVj&4F7Zma6czd%uqbw-V7M`1J{A`48@nRNkiXnQvQk(P4= zZtYgUL)bVl(fHD700>DrOY}jJ5ibZIfsn1beBq+hI<^220d&-`Fe$M`qEl5w@`Cu! z%HE|nFU6SYuKfAWGmV)nV&9d-LRj`0j!s3)4@Zn2e~zFCUGwFmuXo8@rm7RU^w-ZNDj=0(agUMDuDt2NuGryLpMFf{Y8fyJtma_)DY97F$iE z5lbFL&16O;CHL^c)%E!%Ok0q|Bfs_!9-L_muyd>&lCLWW4bA^4@U)4&Z5^-Sz9ed| z0UqN|V#_zL!_#F})8iS7nXet!&(5xcb5x{oGnQu(Y*6%^Y%fXaOX~W3i4ap47CLiJ z)MdglND@09pDDqtKSLi5D~WMvld>mmRppgP&&*fNC-o?=;^!QB?4ZUronl3oHKpczKpC!Q@=lbKP z_?HP3L7%#&Co1AjqPV^j&M0Jt-&?E=OjiXlo~S-gS2Bx?a7G9^yKTUU#rxCEQR_+I z16zDo6W z)SKR+%HZRg4}+dwI_7W+g?_H_(Q@d!$wH*y&O=Z6#qK20i>#(7UDRO_y#e)NJpXrF zsyj?o3}7C;^^c|?3yasJi&=IM&$ zJFj47{;p!HUM7NcN0y*Sd#J7w>mbQpJZm~)M@o1+uT#1q%K#xYNGH8d(V_iGx3V#E zn-Ybq&Ycxdlfpc-u%}Cz?o?)vC^uYiqgI7!D1^l;7`+3Df6|Z5p=!EjEc;&|P`sZ+ zxSx6g?`%mfLNG#K-??Zoq3AF?VV6x~vT^txVVEOB#SPsbb$e>|72}!RaFh23rz@r}(U9z)##eR;UoosZuQ6SN*Y)yV#YI$0@%;}V7$J&D zf~J8%k~x)9IAMFK^nBP=R?=d%UVs>pVR^;E24^Z{SLob}e z@kJkxV45GWAE4w1jPWIB$ZFjT93|D2Oa_D33Y zHZ?H3#R<}+kVpsJumc_f-*%Eal3OwRG_c12L77#C{*;5sk6L$z@n&xe1407!AhwuQ z%0%^GoLEr!)`?s48&8Db_HLC9Mpp;Er^Gi1?pJW#ngp$ksO%|DaWs2@*STo>AlS*M zj(gf>f+FvW>36KbB=OBJN}ynzqVXh{{io=+RKVYw~tVwf{s?6{d3|vF|dgQ;wD*pNW+vtfP38)%x~EuD~Ux2ntaVn zsd!zorZxB+jJz8kUXH7^Kn08xjl!gk07_(y`esex(S+B|L;4sJ0m z_Sb0(wPbtVzu4Ho{bWa${@ri$|6&F4RKRo5^g)$O@ z4>nRiX6orZ-FvSiBVt>A&MsPmNN@EkaR$zs`J+(b6z%{eDhk{!fUX!*^=tbHpJD=U zBmJb`YdCoFm&COXqwUCD@}p3AyA#e-%2+5=k@5H6-(`D7-DiK$RdxB;(OSpKoM^6t zGcQhRKJ0y9(6kkC%2>_zqlUb=b@ROh%9>yy~=w$zLf! zK>x7j$`!!spUXTt=m`7aFYXC2;Jo#(D=)7sbL*hvoaUv!J~V6f{PiJK`|1B0FM9Eu zkvwr=bgn_*pYblR@N!{SdDng(c{e1vKVXEJd=x3}qyD3^VXl6DD2CyUUuKk*o9Y%Z zY5VE*o;+^^J+L1D5;PK+4a0_}ltoJy&q6zI$K_n=3y5&i_ecY1u1QkMc`bo>xW7l7 z0~nSt_ojC=H%p-7#aWV^9{#?lPK3M817$^njq>|ym~te}MF>Ltv~bx6pQhjcFRxM{ zVKQeK-=}GRfkR>yX8Sck@^tC^B8p;}%%y_3w@jqSaj<|grf4}pRdsz=>A;p?>Zu_Kj|G#{9>1i%5GV(K z8fC#SDK<$oxS1N;e3Cl9minDPxWr5|CL~v3p6<;ChZriqyCp?asOd$1?`T!K=~|{= zjEDH}t|qsp2}|NPR560OsVHf#Zk`HC!S!ZIgY9p1l+8NVKp6}Sl#L_R7?vbl0gaQEkp#=xdD^kh3)E+HYw)Hvma#8)%8q@sd7%Bjxg6B0-jj5EyOPbYLh8Vw!ELL;s!7s+}(-xbs5YeqRVTI9`!GEBGleN(HcatIF z>ilPBQ;qLLkGq+PlX>Li$4B#r;ERV`4vd^2ES-FPq0~t-@aiVrfSOsyFU~Lf8S2)# z;1HmrpnkVs!l?IXtmSFUwjpKN+7|DUvbIWB)cQ64aiXx&8si9u@750uq{ME8yi)wH zjep=8_EuNSh5S|pQy(%1b-;O@a@Ru% z3KqxTK~f}bEWtcoyycbrIKjccjbqsl$RMqrnh6Hgs==?+n|%C^ztATm8$@098T5cL zIW!}hBTm;$8mt}Ct4SXlzrUb&vGNA&b+mTs5prF(+vy=DV!2>|+^A>v%I@EY8kmDEaCO*tr=CcIkk}0#{ zeLp?(X&l=`0sT3h^rgPrI1Ho+y7Xr znV@VVoWyq(x@(TqPgm~AzC~8Ku_IfwXfnT>|pAFH@F)`*l{NAAqi zI8>UieH>yWV1 zw?qWLaoZBeG_;3CgkbgH$nwgaDheunEIJHj>);y4EUd?b_>H3J3D>B~Htw3?#QRI#F3Skryz0O!Eg_AvnWOzre(r4kN&24l${t^n?n{sh@jR|c9J7AKs^)D__t$N`3@8`Oz-XZiayY}gA44>5g4tXrj7L$6 zi9{olK8W2WTa(;Clls2Eiy-fV-R%G>MSV|VmMP=rpfPU2I0M9uXu_UPEE>YGjlL7w zsDwk}_kKeXnjX@x16-zMdE+yCA(?=YWN2pi4NaBonuLEJQAxaks!TQ02gI>Sa_?~9 zoz`!2zS$vwY!w6prs@=4r;vpWygBe!abEy59!UaJSCvLZ$;H2g`>@zI; z&&_V^@1E!VK*LmM9jw1oCN#ebfe(AGJ?9&a@;+>BFj2-E9n2D}9pzW|-TtOYC7$nq z1|*rAqo$4`1ad~-)eM?qk?A5_A*{WcMR1l*2`{BCBS4o?s-)upW21D@>zQ`yPOHsE z5QluMNQrk?I4m4^P^NI!nh;4Tx; zlc41#j%(`(+1_6u$^LJUWIrJ{hJC}S9V4U>sS`%mxF-h730P6vr&!%h2b746`*WZpF12Z5Mb#sldA3P_BF zsk7jOTny69? z2H^5g${~&pNBz-k1%_UpjS}iw3w1`+19+A1@fUbptaN7Z=Llns72#7#edD)~WFQwJsgJS+SacLlIr|R*4NUiG+}1d5f+{sSy;nVZMUN_@ zjs9$w=q6KfK-Efb5=KA%jW2H^;FwU^{c_y1_a4^6Cmr)-{U>O%R|3XhDn~uiBn%vx zE4%4|AA(IlWk9KVU;Pc|GZPpMoL-0;JLR?P#ejSd&2w=f5FGv~!Y1m_2_+oM7Ou2( zuJ_X4oqI>7s%9gZC3D{r8bw(wiEASCWNKf@U#N8XB?DGM6qoi^WVhXD`s8%1e_XaL z)f?J8tQ`N}Hd~Sc+n62y!76-zk@go?@_*(1*Z+A(`0`*8W~y|)SySiQGD6eOO^6U0 zEGuCk5n2iMY}b8HfOa#NFN@yAWbl^pRdW~Qw0(aR?v{H<58AjwS5mznY5*?}u6RRW zz;yYsUstxOsk4v&$G(VvYJ*+Mn;+9dZkQ|GvBojpJ%JrWzUErq_SsR87*#tFYe~|s zks1irrV$$tIQV-wV%u*;zPS>pS!YsEmR(+G2SL>N_X8MJ+^LqCMx8(DMi&_a+CKeA z={Uor$Olcap_K8KCv4btEui_^!P0UrjKYr_r3tp6G~NaIqfdCN@g7$%%ln|XdO?$k zU*uxb<$!wM%^aLNnN8YpkvC)~ah+SIaLwODI)9AeMXk(Gb3LFX3OMHo?Yv+7GiFaF zd}~DA3~iKV=D)3T6Bxy2UPl=1*>T1NypMujYKPft;6JvRyMVZ~^~DX}W`Tb7JMAZ32EJ zJo0xx*)*YiHu(cfxlV6kTB}*60y8e4Of1-$RzN@$72RGGf2CpX4ZI%3hpO?i!x$Lb z1hTc>+=`Ti%=J50$3~PakLe4u6`iSQ!m@AX$6RoY=mSnrS&nq@as1J8^Or^1sY;bH zZA1Q7E-~lgD@7tfkwWIX+?8o}P2k-1u%sx5VG#&bBQYCGlwjnBb9W$k0~1y35Y_v1 z5`#4#mWO<8K&hxL0Y?YTc}}Y%FI#~pm51oiR04qcKmJl`xs6eVgf@$@!_A{7g|8uG z^fo@lKL1(7*#oB*{Z)S?sN7PkM&RI+rdj$1n$0u9dMM8DmluTwJUW6VTyO$uo4|~3 zC4I)46H*b90Tuc@aT~4o`OOw+Gl&)H3iM|*S%PfK?~FJQQ%sB(s{Vzu{>g+ zBIfkCu=}seHuzJesf`F15&I>ASM`CrJh;Dm-Z+%BzEf?FUKlBgoXi~5(=pUhnA#b2 z{i%cQVA{uoAqSu`Q|Ei(oT!~Mw!U74-_XrIm{83OaL>;?y=xTcXGZSW-&%9rh}je< zuZHYD)#K%n)n6($c6w1oZzD%qjmt_+5*8;SM+!^IZ_QTUj~Pt&#;EHdji2(^+hKiO z{dB;Ir12eXhch|E-$E#la@CLEVjr?ZgZbjqClWQD(R66PbC7wTt-5i+q6KQ*qKZF;eY|=2TorUyPC;LchD{ za#8-^LSe!j<>D`4HW%Rcm(@QViqS|s1MR2!5iW}-d_0;C;&LA+)}(@E`7L~Xoma&T zNpptCg>O=gFHRScwVF2&zNIdu;|84wYpG^R79j;`qnZyJ#$nFZ+8(&wOQ5o$0NTr{ zfJY9y;?0ELr2hW%vvf1DcG01#BKar&l>^Dd)h z4Mk-}r3I>!3DZ<2WE~ABudB13kipU(zc;Xq(ec2nj4yF_!N-S2KklXNkK%Yk`;0x0 zUZujM=7<{q%41fVXXuvhF$PhCb9Q~U*-4Yu-?gGr{`_ra11+|uq#Ec0aprbzCr%P4 z7{v(kXj{<3UHd2@PLX@{jK21v9Hn zYA4~0iA{;2vIO*%Tt0&{w~S^iE5;K3+$hX1mxNvtI_?^iemgFD_cmGNb~OM|_!YJf z2q`Xy3fLYin*!+vw69||sO6@UB|c&XaT12WVZ+#7tKKCB|0Y>>mD2%`i7A&CQK_JD zciji4VXct2Fm2DHW_iZ<_Q;;djR7qQ=X8^Gp|BE7*~8j<^Oh*OCe{@3asz8<89bF2 zvw*`Ty;{!YTlU311OI?l#=LLtU!i*$-Db6gqoOn)I%Ia0Y#2-2q_{-nSQy`%IMn;k zo9))xnFIfPZ?L??zsTjU{vykxD6ME zop<8V!>e|dAUc-;m+uB>ej?E@m61_$yC{1_WgVcD51Z_t607=d~bCkEUETPE$UzX_I=ycx;>+=JsYRmiHcU@ z0Dkr2!qn1JfOC1>J8`fm|FH@RTh`7#dYoLx+1*C6#``G3Oqhdcb0YH?BvPt)|4HYr z;XvHZI1LIDcz}pi5JIB68~1_wnxZ6iiI1Q}JGDxN#BMz=tn;{ZYVUrzUs2LF2j#gq zH0TQ7q-MPFNy|`+TwMHBU65P8>-E#GE!-IO-wCaiWwpr90;$;$XIk#yw1SZUfY-fy z(?8MWAx--6(|)HClmEgJK-UxR-9HFUbthZ@53bEnhk!_&t$^?&)wqRBK^Om=FVMr5Pv!`dS#8#*UPnoE`I}_3)eS<+K{rVp-MgA)Zt{V ztQW32@vkDa(^ub}>Ds-rF(V}vBpvwI^}BuUFUDW&Uq-Kw|EoBz$_auZ=b(#!-*s1f z3qlN8CYJ#R*~rzE#N0HLd2<}3lMh4{M05Yn@wSgI|FVKaS{Zl5A#JEmJ7_~!;vREP z`QEL1P&8o&;uCNav{>V%W47wi-~C@0{F8GihG+GshYhb{pJ!r5s|yq! zX^St@h!jwyHxMXF-1)I+`*3Tl^8@9f_6_)JYhcPFS*}Lydw*JKX5p)@Ox1eyR9Pex z@A27SAG#p8=^f9N_IqC_SkS$bv8;mGUm<89;Ye!8;&fHrr}o*WY?;RS9vu^}WGAIy zPv-Tt>$l5u^;_=fT*yBA!0QHDz}6p5ag8Wii2=M{y7)m5={1yaZY@n|{I-)=zt1nO zEYGiX1M*u!{q%{^#j9REI=Ax|?E~(0$6%My*xR)&*RR&?KDy;rui$9^r8{hXq<5%( zRKfad`LK!|#y1)CQ>}3b`lYLXE17jms~HN(8ZDPzdY|BqMMuf6_rMael|Q1EJHB|R zorkk-v@lWo$oV=$n<7+&o^wmd314n?SWOKH=U$=(4K2noqQ6Y4-zEc_wkI7F3(_c` zuFq5r5Rk=LK5PdB#_xm!;boWwAN_9o6P^JMW~6kqDGF}T__%w~*fx^ed{>WfyS8r7 zGhTHWWRuLugYX-rHQ@y}^0o_WGZOoRw3tGfL8g7$#hz9F8mGq^k^E;lr;r}i3GxKX zgR0guO>OJdBlk%6V|@Fc!p>-I1rEuBeL!r&c5>nAIq?1XdBM2xTUBo2Uq&)rFv=PI zz|l_oww4s>X!hRIWsFWlg-_6`EIBRN>~%Vw|*J*%C> z5x31atW1%+ah_^)?U%PNW%dEn>9V`!q&4ZiLB+_|*kOHuHUCX&M9)|X8wjcCz0G>R z*&EvKa)U6M0j$0 zaG^*^^M%lEU^0F(!ZBU=)zKwGb2oKCxtGIHkG}1`6B~f2K~0w* z8^Qo6!E%!tLr!Pr3U@=5`Z@Fgi=AWUDCDDt{5Q;2-K_vE<_rvdJotSDj1jC(NM_&X z?n42up0(#Nkd&h&>Z=;_fbLpor?YPm%JO1-lmchXk+TAfkLZwxb=2^-ENsYNrB<_^ z=e2gJzmaG1j|aR~jP63`fm!LtG8-q-Lpz^J>oZD%yhnjNsq*x-(r_Avesbj>PTm{A zcD_LI44+m~7WaD=*7fNA_^ygSuzN4+V7ETBplHnhS}SkubveKMI}uf*$35Y~ko~7< z0&bu0$-1deVMnKV=idsSLH20ZnZAv0F5U{vjwxULf!$3`4$fzPcEK2|{d+ozzDcd~ z+i>m(v~EIEC*BfJYMfgc@xhVX>h(E$7HjTp#8it!{7qO0at}$|MLU|MDL7oAKv62a z&M2zL7Ccy<&7_zJr<7y7LZADcc#Aec2lS&DWdD1wG*ud`zKimx%p;Od zL*1$^N5FAXwBUQKlTU2%*T4rQqfD^1@vO*v0XTWZQ~p7jvZE@oKEtT5EwcZ#cW= zw+7bT22HOps^`~-sOjU*l{x-j8HSdWAT%z&WoQ3V5M8rQHBNH+ zJUyr9(uCKq5gQjzR2B}dYkU%GYE`(bs-W-kZHDyaAtZs(HHX6!a+fwTzD2)YQS-0i z;xJn8=z0R{raS6(I5UCweK99EvP(%{JY4G zGv*C9DG;K*{nK6j_aT5hgd-Mc7Ja4Rbu^L}f^N?A-!0fqE$6S3aQ}Vzfq4AuE)BEt z`8VD(CDgQO3MolRzhr*T*R46aFkuj;!VcL}_2wWBZ>4N<~abMB!n_V#U0=N(Sn2CNJ3Ha>o#s0f zLE83vGq1$(BR7z9naBHpeZ)dL|op6B50#IsFkJ6YVH}vt*p&?)nu}Xw@ zUycZSiBxlh&Adwp%9$=ZT)sw$HSq@N(Y1~bTUOKFiq}`z8HeRtw?f4fLzl9kf{67Y zE-j%trF{zElG=4Z$%936Uomi6e*FL*7N{GzRoEEMTA$;5+yZVIojqNC^Y>^dAiCIa00Fw z86Pza!)a|fgp>S|N*P_(eo^Q|;I zp{{a5&>`G%2M6euRi+Pf64&BVLE3pFFT;H>p%V198J=A8v49^>LX)+3dXIhzg1qIZ zg0TFKW_X3;2w(O?#__U|WMcJJ-PB{ zl_b>s{MvbU$yk@`HP2--2lwslCOPMJDBtD8M;Ldeyt?V^(>;}9gG!+@3Z~9t+$G~x zQ&JE)Rp=PT)56is%_1;K_HumlYCy8oAIb5V;27o#Ws+pL*T}cS@;F+Z_~fI%1I>kC zH5zO*D|^pXkoFfCFClH_!)&fzlg*mS2m%p;%1^q0GkP<~$0V^oxs?rA7nRH+h@SigU4*kL|qM^;;y0OvA_KsLLKW114xCJBOB>vsl*QM$6fFp${6* zFn+uJ+yHtIdNN(*t|=v5L+o!O;CaCR#<4vkBqv;yXKbR|i8H;|+Buj+$Y8(ndtccq z(cWUepSNV~3iWMe89Av~Kj)^6lm#uPZ19GZxsBk89k^cAceN($RLQv)uI~whfU8*P zv^x~Bj*@#X4mHCI4IUQ2K784hZHYQ4aakO}=(fF8=G(bm-a5!vpc*k#u-$OhE`UVs zgZ9dPYGH{+t&A2N+PY-B*1M29e?QydOd&y~^Q0YbxUO+Vw=TQ`ux73;d|d`2x=32b z$5W6)Y!`aVUP5~%KwH$fuaByaPyR)BJr>W1(<6;zJUy(Yc%i;?7Dhd?U7~|bWku+j zZIHipP@soqz4AzlaxcR94M(d`LL42w)2OfmQ2A9_Nu_3Fpp~oRXH3Nh_1+$Omn6a9s zxk(6twFY4ga^)!lm-RLhXiCxVcH2vP)_Kj;Ggyr5XFfN0k@MPyw7&rHva4iw<|}3! z+3#vq{#ZA$TjgGotq^^JsX`vT3;a0B){KgsEioYP1GCPqWwR7{f0gDRYB1P2lDzW* z$lR85>?aM5@Zkj~)#|$7Q?*y0-~{?89Zpf}omkv;zoB*%zLmXzv#q+J%EI)P zqWRM{Prz9x0{$!9bMveMC-aRftWW4cFHC9|?H>Fe(VGDMe|{QY3xPrNzdY{e_0IgB z@1N}whXsyn5pPzKH-m%sKR@38;uroqBPJDbRjm=NcQa?mst-ek9#dF|OIu(at}V%9 zHA8)<^+4xHe9De{OyOs1_}P*7jW|&i5$1tL=_b?73;@hBEQG@|Z5cT6l4nmsR0Fz@ z=+5ijmms++uIz;f$TEk;f-YmJs@d&9kEc}?7_7;yyy2=l-{H!{EyhKW5MS-OI=g!Q zUiO4-ug{9w1OU@nC$No)@pi=?CAR3A=*K=b_FQ#En~@O2szU+{gS3lVNyMbgH&C)3zC50I(%nRaP*hsexTqiA5XWf?~c3?v0udN-k>$YDT&FWWM8Oouuk z^10d{?aWhVwTX_0uIlnf|2iEY11e?fo)gH|H}?;Sb@QLr?WM|sTfAcfe@Lp1Y>f(XD2yFK{d8oq2DnMIm z!DjWA$A)*OT{N*e7pv?s$DA8MxO3-|huy-h4SB=qSKkZ_*{&`F;T{B0j2!l-$`2-s z*dDdY(*zL3tbbMk$K>p5b^tqGNiUN20cG-DO+nc7`SphqMFt`rziF?WcIkC~kWCj| z{jTbc9gJEEfSFG%nBpRbSjE%pVl@Or*yBP`Bq%z0aEo}BObSHU@*xx6lvW79mS`n1?4CkNh#vib(tj}b-MP6G+-$bR2n*UHaLuo-HQqZHy3Zn z7uj)|ZDvG3__!-Vf%s~P$gT;d9Y>kP9O$5^qCKDW=&dtv_`+%#i-uOqnlE>t>x-cy z%~llwNjy!Hbr|m1+~f}O&SItJC?a$WmeXfyq*x@k5N z1F6WUh%x5c1mNX}=7>tg2=-Bl#a-&wuVS8Cw93TC(oI33mRkT{{N9d?S7<(Z6wQ}d zd|9f@-LILG_g*tIiu(Li9&GZ^#qH(|&$kqnsTWv_D_>cen==-#u>db(n4?ka7eW_$)`pVG@@0^m z^2ek#{1>@Pf32lu&k6HacN=o3A&Fn?w8-VRUD)?!N1Tj1NVSEVa-r5xU+U) zcd*Zc$TeKIOT3%5LIbiUl5I7~f zDmgsDif4v{+bZiD2KkU-t4SW+j{=0!o!|X@D1wE(J>$_@T6P5uonX+ zT-GBOsCerS5DB>^1ns&rB~zx*&$c|JJTh%HG}(~1YZoG}rZl1-)utgmETH{m#$-&p zt=M=Zf9z=o8I?Lwbe{m`SP6Gy*|t=|Ru+%87lQPzmmSRjWw5VUMh6UnR4;H;rlCa~ zXNxJyrX5Wyb}icH@f^pM;^j&x69i-*wbCOQJgt``qUvdqG)&~wPQEuUvOGj7CauWU|vcF)ege+je*#z1a!J!% zT3$hDtG5AC{;a=!GG{&Omn&qitdb*o;^#^DfnEhNiR$y1kg)ETgQOQGBS^6^qbMa9 z?&$mIGKr0ofcI)>7fI`rNtHOd(YxHjt_iZ0Q(7yAk(;bnuAz-gI zbprW+ok@XVO>+l^BeHt!Nr*o;t9IGSCs@Htj+_yCLcLYT>@zeje91-g&}IgV8IT`- zy4GOkx12^5;ZY>~>|Gls>AqbY9jIg(*86T~!1*1+%SI{qi1x>oT<$t1SUZm6qASScVVU?E2_qC1b~7H*2?@X&~z=m)OZ%__OQ zipv+77o`+hcs|n%phfOBxT~uHnb_dQe8ySpH(Wl+z1-uW$G4-8_&yiihcxKy6Qzq?k{)8L8Tj@G1VlA=m7T>(JHoa_q6`H^)eE`{PZT^F`4C zJZXW+_|N#63BlwJihk78*JPiC8l??!)}PQjR;`HVIo2_aSIMe5A2;vjmJgbK?L7r6 zuZzkOX~aNXl_^Ei3?IpYs3@N{p~C3)Z?=(RoZzEAQA_A7sP$G1tT+VYg+vMNTBqkB zUs(X|A#xB?PISeVh<5GFv9BNc#9w<)Id(`pb;+tA=`@Zxu_-^`25R?WqL=PGxRzm%D2G+W%_(rOzY zSJ2?f>z_Twy__Vb)%nxj{&VjJD)i?yKUvW<`*cB03y}0VfTX=biVs^X8Z1ms)zlcI z?r{HY^G=U(V=wGhIw32MXy4+8Um8f#kDAQA9rk3Vuq?G}{s#AY?iOFn7qv_iSDB_3 z>B8t6nuHUt;?Z5uP08fLPQR!fU^SQEp3qkS?<|iL>0*`;WdBQLpewi~cd?QEQ0kf@>FOgct#f(bqY*Jz zbMC|D1Cxg7-1Rtb2|#6{W?zt*_pncFAf@ zl_6Q>%aL6`iqnW)DhuXJmvU~wbzYd->m682EmE@Le;*{V`DB?~FY6mym!!-isHLws z3?Ez~%>33{N)N%sNXQ@_c=sd5gyY0LJg=QvJ_-Qv$p*S@Z)h&xd0HLB#xvkuP2 z=}3k<)GwM>rew3|zObMeK@trU>-Rl8!1c=9O(*}jYPV!4rgjB6Q~}>hCSnG|og7)x`~mJlF$}cgY~H|1MMD4n(o>s^kSemmj45(gJu^w)r9j7#1S50lQg#- zBBJ+UF2ft`%T&g*uq7Y0yyw(Y=B?0p%mENSwscAVn9$L|-D_0A(fTUS9PQ}LnOdANOU^3kaHz)*K$C!CQxlU6BqCsKvOn0wnN%mbTK>Ej|( zy34}NBA4oX+LvSCg$Q^q`3-5sSP}-nlY)F+iPPo$$+H4fC-IF?De(gH7Gc}2XGzs{ zH#8G-)|S+Ixf*p2J--Pp;^c;mx5|CP<3BWqj%LlpM+x?G-5fkJ)ECrj>LPVnz;23E zm03&6N)(C{@JVokgbC8U=L-gOjjPh^0o{X(Y-N;>B~IFg7Ot+EZ8NE*)|{tO+pT2P z?=CKtMlAZB2!2%h_JEu!Cg5pk2=c|&`K0V|n;W=|MWsOE6=@RkP9{6XNw7PaV!)DX zSS@dN&??nuFs7GX(?D~>-6kzYE)T@yp-k)g`Ino;bYPr}s3q!|A6RFPmTn zN=fnCR{%_qh+F{)zxHpNUy+CWx>lJQW!F!G7VgTUTCN9nw*YYg{DQxu^#21Gv`GX1 zg=QcyDF2Ogx)D;aX)yViT?w!uI0-koGB678^FI+G{&B=alP%@B7*1%0a7Lb6FuJ)nShh`A0Zr*Mh0Lre6DRDisJV&j&!?Xo?yuKQAOe zn->*r7&>>MFkg(*u5UBF0G#3Os1&|Eq2X?k?QX5rDc?DIdy_VZb5 zCi^qUEq90_fdMa*|Dc^%jF}QyVU6RYSt+(VswmJ^Ftx%BH%qF1?d;`badpFzRZ_K# z65#+$<0-?cGzTDPNQCatsaEv&)->NrFsE0MJy7u6}7zH2E~| zGRCjHev6Bk!KLLW97%j;w^?2W6P&vNwOLotz*1wG6;JR2Bx8ZPq_q6GW@x1$kC$bd zlbb;Y#bc}SdxmaH&hCgsbo?%ASV~D59OY9q|CpdZ=(j(jG z&$CdnCFHg1&PJi5e#f|bdxw3j{}Q*w^8PC4pCZLdl_;WWa6FWr zL{R(IzpK(}t2mXv|An(?30IXz$8)_+$nuh7ZZ0~%$#Q{3Df>riRk3+2i_$#hgC=Orr&H!unb3YDtW;4QbF^a zSbAXys;SH}dY^zXa)rJ+OqjuIcV|ooRPRBcfb_%*l zZf?rtHmF$9vTQtdV_^ka0=7Va-}K8bx6IJ3ezJ7%f51X44-$?Jupy-H@3PkVEK)gdO8h6Rl8gRKKXy- zr7GbFlWZs1BEF#reKOc4PcP+tas`VHDfFIk&*`zDDoeT>J#o_Y`3orK%+GjH@Gx1j zSx}p@IA!{aqQp)XuGd6hyrqQW;fyAk`j`kpTDsekTE-9!7d7g1qQayIl;>(O!f0cXLKGy9uSJemleb8@=NwUtr ziem^LZi-wAm=&IKdyqdUCFf&Zud>Mvf<*3D80IUT$F`t~=zz2hQD=gsB*fqX_)Tnhm>-8)THl^iGS+1@bzby3mCrG?%{weaYFNmq8 zv0D4dyO#_)4R3POC%ik33USdXdWWYT#Ej~$2b9{iX<)jRwUazS(wvxOaaaaJ$LG}7 z15Hc2t@PLz!Fs;;Q6X8eL^r?qDALtrE-jOX?lHN}H%B5@YD@gxIpmM0<*xmEfJ$OS zAYw@ORyjfkaI$*8C`s$EA_=1!eOqUPhsqI3B8E~4(=QT{kv$EV(j%vrmBjWIpwMqHJ zCk^CyVSg(3(vcXqK+bCp!>F^lv@D`r(DW6Rd=m6Zh!>PIv(ztn>rbqt($ho@F#8_9 zDfEykB+5PK@K|j2B22>%te!%JC{Nd-egyJQp>Hx7(m~D^ z@(9F^&iv__KC z|FyT_A7RCRY@Ud6-~ZKNc<^9>##MU|h$L@-L4xZ;iQwY@+ir}nBks)tFK^OhMgl|T zz^!SX1mVg`bJ)&pegD7j)mTqa4|NIXsIVN;tbbHiE!Q~=e&O3RTaHkJp12!>*w>W^ z(37>_^VK`Ddhh8C56<+X%-LSS7|knsf(=<6IT>njylB*d$EAN3JFWkKCGk^x+WTV_L?nLRfM`*n1*r{og71E}9j884v0F z%+$e`>>&+ah#mAgXz6jyuW=ia2a`?oYk4trS28wY4XaMqY4&= zI>ACz@o7H_F~=6W|HI#*_R?(Guh zxo407P|Ry)&$I^Efl&DAzehtdBqvCC)^dK}(@7$>x>S23egL!o@~HUs0^X-bgdn>H z%;qEUm%LRBC=8)S^-uYAZdeK@|Ggsji_shF;bSXiu4#g=NN8xBKSPc;d_Ao+7J+E> zPu^s(vI9hTCM5fg_bhX1ojTX4!37lgAyg-PtN%iY8-2QUY(YTLeouPXyYrHO)>&C4)M5$GdEP7*SLfV)@{`A?WB5OS}65M5E_t_|2F z>_5fSveM&B7pD7w7OKbpTAPE}zeon*ptn_~Y@x3E=%Y;Hyeif(=AePCxW2CZwoJ(}NM#DOGR1CdyA|H8qzET7LYu zC@B}g5_;{*C9<{2LqoQ9(2StN;^<8kFRZlw52Hdk=>SF{gi`*8yK< z^LjGvjTh&&ZBZLW?;0z5m;KRbOU`tL+Jj9=7;rWzyyAm6SsOs3#Sv$A!5Y1%P`W1x zv`57o4Yu2-6?~sE=ulRCz?aIvCrEnvFGkXUQr8h%SOkz;NFzHx1+_ zGdYeYxIohqk>uPr;LTDhW7fMXseM`pM1buSup>KfosB-{T%#cJqtu zkRy+_qvH6tjxn)5Eag$9OQ2m}srIZ-AaL%ac0ZemF|u@5m|b*D0&ff3SBsL5SYB0% z=lyjgK&W2jh21`0;TDiBtrvqZmszSk>=ih^&Ikby@{3aEkhIreUcK}VT|Wi%1CG#f zvpVk8d0}&k=0PCT{X2XT?f`Fic6timttk7PiB$!5?L-$-ReNOUw(oh9A|$Ru!qz~& zxxD~LYD1x*-K^YtDs8dusCs46jAu*!D|G&#`s{E($HcjUmaTUCMq?a3Ij;r7n>ob( zL`lfYwJV*Ze)ghmy7rWz~pu?tF9Ig8*#YUC+YL^>V&B|q?7S~X{T%ew*Rc4OL zI`LOz(Sw|*V(fUbKwMeGD05NAv-D#*lrlJ0wNk(SDI<(PV46^V&9b8&oJusWS7=AV^kcGwTJmkH6=FdO3s!pv1ueM?u}a4OZ%~tOA(a z70M$A(WWbvJqPmdl4dJZ%7>p8+RKB(2MRy6iAa-ypO32k8o+M|0>saJ6!Xp*|0{78 zUMGe7yl*lGE8@1vkwV7>||?F0>%{e_*~{0(ren*Gqx$h9D51Z9^=M{U8xqRLU?QB53Z z!z;`hP%5fDcYzR%`UhK57daMCfp~{E91q!S=uAUevhn%Y2p-`0)Z7 z6CKWUwF9bMhUuNlwO5ShWQjS#Wm)4kYP(9XZ-l-F<4X0sIqRSJI{|jCydZ2}gN&wP z7c_3}&^zi1$c{f8w6a;?GS@Q9GSP-dkwx)<@+>)bI*Y?&wWlI*7F$K5PeIA;fHp9A&Kn8FK8?_~%bj@tX ztiY-_Z@7M2xzph{*Ez5GJrD;TtHoJ5T3$_ME4wE}A|Bli-K;DA^a1;=?ySuXYK&U% z^zkL3Q!BR`5;pb>-97(!Vx;HU?Xdix!Nsa3b0S;xhDub)0z6H$pwGXnNyitrhd(Fs zT=+#z4;!3~SpJ@KJ{E%Gju=nJ=brD=QSJF5`&UXucI{(;Z&Wtlm+|yLRPV zL2yoQK?IkUa^9JyyUJe|rAH*E3({c?mbPmytB6FAHoz}(4L7^f3|mke`P0F4Oit3x z9*TTbRe~ZVsom>)Anl<-I zW3>hY+NzM(L6&}v*Ej0J=%V4RQrSW1CLf{*vvvAgaB2b!`ce_-a;11#ghX|Ep+Fu$1-lw`6uI z4!k-TG-EMc13V}=Lx0~to5TJWl>&beUcM?=tWN|$*$+(z)%Xsr7j7n*kP8@4cs=ZM z0_yPWyTXmJ52(uh$V&F)TqL47z75b*V7qU6J>0-vPhn+>!T%!bA--z{)Fs@v*`)m8 zR8%d+^V?~@tgui(|7tXKi_ZP?Vkmiy=)flL9jrqQ z%23#@;=NH6RatanW}e9PDR{lYj`LKsQp};~C(F#K2cpKCjPgHvPY0i2xwK|xId@14 zkp*{(DhbGZ905l9M@d9?>QqppR>^Bni56}cybYg|v zW@PZ|)_H^yL+GYS@~i+Vm_(o$=1a7$$ycjM`q^)6VW5Onlq?floI|pI?$+J+_6FxQ z*X1X0=mO@pyOHBGqBv*^X|m#3GVHw8Y_i(RRi!&MU%>H$(DyV0u0|mc?ch!ForN3p zWJ-G7rz&_YZW}8iB$Y(^P>mwY7Uu&6efE1FjZ=mgU?{MZ$}}jh4#A|9P;QjW{uaSJ z*9Vz7kh=;x_nJ`ALCtkS%HCgt(Zjw0F0Jy5l~Ru#ajKu8o6hD$CvGQo!5-DkA(dqX@jQ#4j8`Zb(~>$ms~YgCbhOE1 zxKw+-#MdHuc^!~DDn!AkK}jI`14rJ?Xgy1RCZm4Dc=xZo@eZ2y^O{z9Jq`=Pd`U_Q zzmMpXU|S&`$_6cs%=K<%2iWj0MftYpENv5lCOR1mg-^tGuCpnF$g@v~X{vd_#_9cI ztL52@DVoXF4KH#t|3>GaD0o6Zu->Y@8#O+;!R%yH+HZ&CO6juj1aN3#63N1dg8u!W z%5dz`Nn|IzjgNkQ|MAg+QSoD?dbh#rBT)nAgw(fx4Kg~s{B5L&6~CbnEvd&W?)B81 zC9qUu+8cqPQjNefvTPfsk9jqitdHwf?V2e7*PXua25Jpy8KJL1n6XM4DCr}l7IC!p z`l&Hbxq}pZ!^nYbd7{!#J&f%@)pSc;P;Sy3{I(pEpc&|CfA%^X&@P>UdYsqOvTlMm zUFgJKAAoMIr)jn8^t|KJ8gt*<=hy9NTqFcKdzkB5V!Kt!>Q zwgEbi?3g1u>V_uyb|u(rWk7^|Dw&ZG`?7WhrcA^&w%%m_gHI!O2X{?4$beU z_rOyp5C;G)I{X2n8n1(BB9JN-ta4GS9C zQGe?G7WA~7)9())`vktjM^{DfKbje_=*pSizY$C>7`Kt#lV2K(KKLruy&K$M`B-tJ z9q5e$7E8kZ=Kup)?Y0uA(-2yo8GWL1a8MrneE;o$x3tZI10##b#V^6!Ay7r#DayFm)%S5if zL&IpnMts>h?)eKp6OLuDx(%Wxfj5?Ass^I8e~^8)?)qAGhF<6V$;t3DNuQNO7QOV1QG-unCDcjU%U!Tq4rBFytGEKb z`Tx7Jw`sU_!XS7%dX5BqAybn#Lw|HfFi>Y)nXlVjM*J2g3rc@Gm15SgN&Lc}{=e*? eUblT{W0&d5Yu16Ir2_E9E@特别说明:通信(Uncovered Communication Time)和空闲(Free Time)耗时会受profiling性能膨胀的影响,以L0 + NPU采集的profiling为准。" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "36b7a24cc7ca5da2", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T12:53:38.379699800Z", - "start_time": "2023-11-21T12:53:38.363755900Z" - }, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [], - "source": [ - "# 数据准备 EDIT THE PROFILING DATA PATH\n", - "profiling_path = \"YOUR PATH\"\n", - "# 若您有GPU上采集到的性能数据,可将NPU的性能数据与GPU之间进行对比,分析性能差距。输入GPU的性能数据路径\n", - "gpu_profiling_path = \"\" #默认为空,若有则可填写\n", - "interface = Interface(profiling_path)" - ] - }, - { - "cell_type": "markdown", - "id": "cf832ac2e0dfa30f", - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, - "source": [ - "## 1) 性能拆解分析" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "40aac93278dd6e34", - "metadata": { - "ExecuteTime": { - "end_time": "2023-11-21T12:53:41.815599700Z", - "start_time": "2023-11-21T12:53:41.783393700Z" - }, - "jupyter": { - "outputs_hidden": false - }, - "scrolled": false - }, - "outputs": [], - "source": [ - "print(\"Start performance analysis, please wait...\")\n", - "dataset = interface.get_data('overall', 'summary', base_collection_path=gpu_profiling_path)\n", - "data = dataset.get('data', {}) or {}\n", - "bottleneck = dataset.get('bottleneck', {}) or {}\n", - "print(\"Performance analysis is complete, you can edit the data to show what you want.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3f353506", - "metadata": {}, - "outputs": [], - "source": [ - "# 等待性能分析完成后再查看数据" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "cd3fceda-49f0-439f-9c54-cc31490fc99e", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The Model E2E Time is 9.352s.\n", - " --Computing Time is 6.273s\n", - " --Uncovered Communication Time is 0.464s\n", - " --Free Time is 2.615s\n" - ] - } - ], - "source": [ - "# 饼图展示计算、通信、空闲耗时的占比\n", - "overall_data = data.get(\"overall_data\", {})\n", - "plt.figure(figsize=(6, 6)) #设置饼图大小\n", - "plt.pie(x=overall_data.values(), labels=overall_data.keys(), explode=[0.01]*len(overall_data), autopct=\"%1.1f%%\")\n", - "plt.title(\"Model Profiling Time Distribution\")\n", - "plt.show()\n", - "print(bottleneck.get(\"overall_data\", \"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "6a1d82fb-a31b-49ab-a859-6d4bb898c512", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Computing Time Subtype Duration(s) Duration Ratio Kernel Number\n", - "0 Cube Time 3.956 63.06% 584\n", - "1 Vector Time 1.994 31.79% 5224\n", - "\n", - "Computing Time is 6.273s\n", - " if you want more detailed advice please go to compute_perf_analysis.ipynb\n" - ] - } - ], - "source": [ - "# 展示计算细分耗时,NPU开启level1或level2,aic_metric设为PipeUtilization\n", - "compute_time = data.get(\"computing\", {})\n", - "print(pd.DataFrame(compute_time))\n", - "print(\"\\n\", bottleneck.get(\"computing\", \"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "35df1f13", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []\n", - "\n" - ] - } - ], - "source": [ - "# 展示通信细分耗时,通信耗时受profiling性能膨胀的影响,以L0 + NPU采集的profiling为准\n", - "communication_time = data.get(\"communication\", {})\n", - "print(pd.DataFrame(communication_time))\n", - "print(\"\\n\", bottleneck.get(\"communication\", \"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "c5e6034e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Free Time Subtype Duration(s) Duration Ratio Kernel Number\n", - "0 SDMA Time 0.073 2.79% 852\n", - "\n", - "Free Time is 2.615s\n", - " if you want more detailed advice please go to timeline_perf_analysis.ipynb\n" - ] - } - ], - "source": [ - "# 展示空闲细分耗时,该耗时受profiling性能膨胀的影响,以L0 + NPU采集的profiling为准\n", - "free_time = data.get(\"free\", {})\n", - "print(pd.DataFrame(free_time))\n", - "print(\"\\n\", bottleneck.get(\"free\", \"\"))" - ] - }, - { - "cell_type": "markdown", - "id": "3511befaff513e8e", - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, - "source": [ - "## 2)有对标的GPU数据" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "2a1e617d2a117125", - "metadata": { - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+----------------------------------------------------------------------------------------------------------------+\n", - "| Model Profiling Time Distribution |\n", - "+-----+----------------+------------------+----------------+------------------------------+-----------+----------+\n", - "| | Cube Time(Num) | Vector Time(Num) | Computing Time | Uncovered Communication Time | Free Time | E2E Time |\n", - "+-----+----------------+------------------+----------------+------------------------------+-----------+----------+\n", - "| GPU | 3.149s(582) | 1.346s(3433) | 4.748s | 0.024s | 0.051s | 4.840s |\n", - "| NPU | 3.956s(584) | 1.994s(5224) | 6.273s | 0.464s | 2.615s | 9.352s |\n", - "+-----+----------------+------------------+----------------+------------------------------+-----------+----------+\n" - ] - } - ], - "source": [ - "# 有可对比的GPU数据情况下,展示比对结果\n", - "from prettytable import PrettyTable\n", - "comparison_result = data.get(\"comparison_result\", {})\n", - "if not comparison_result:\n", - " print(\"Invalid comparison data, you need to set the gpu_profiling_path.\")\n", - "if comparison_result:\n", - " for sheet_name, data in comparison_result.items():\n", - " if data.get(\"rows\", []):\n", - " table = PrettyTable()\n", - " table.title = sheet_name\n", - " table.field_names = data.get(\"headers\", [])\n", - " for row in data.get(\"rows\", []):\n", - " table.add_row(row)\n", - " print(table)\n", - " print(bottleneck.get(\"comparison_result\", \"\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d968851", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/profiler/advisor/result/__init__.py b/profiler/advisor/result/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/result/item.py b/profiler/advisor/result/item.py new file mode 100644 index 0000000000..500d32c987 --- /dev/null +++ b/profiler/advisor/result/item.py @@ -0,0 +1,61 @@ +class OptimizeItem: + + def __init__(self, problem, description, suggestion): + self.problem = problem + self.description = description + self.suggestion = suggestion + + @property + def data(self): + format_suggestions = [] + for index, suggesion in enumerate(self.suggestion): + format_suggestions.append(f"{index + 1}. {suggesion}") + suggestion_str = "\n".join(format_suggestions) + return [self.problem, self.description, suggestion_str] + + @property + def headers(self): + return ["problem", "description", "suggestion"] + + +class StatisticsItem: + def __init__(self, total_task_duration, task_duration, count, income=None): + self.total_task_duration = total_task_duration + self.task_duration = task_duration + self.count = count + self.income = income + if not isinstance(task_duration, str): + self.task_duration_ratio = round(task_duration / total_task_duration, 4) if total_task_duration != 0 else 0 + else: + self.task_duration_ratio = "" + + @property + def data(self): + + def _cal_ratio(divisor, dividend): + if divisor and dividend != 0: + return divisor, round(divisor / dividend, 4) + else: + return "", "" + + income, income_ratio = _cal_ratio(self.income, self.total_task_duration) + return [self.count, self.total_task_duration, self.task_duration_ratio, income, income_ratio] + + @property + def headers(self): + return ["problem count", "total_time(us)", "time ratio", "income(us)", "income ratio"] + + +class OptimizeRecord: + + def __init__(self, optimization_item, statistics_item=None) -> None: + self.optimization_item = optimization_item + self.statistics_item = statistics_item or StatisticsItem("", "", "") + + @property + def data(self): + return self.optimization_item.data + self.statistics_item.data + + @property + def headers(self): + return self.optimization_item.headers + self.statistics_item.headers diff --git a/profiler/advisor/result/result.py b/profiler/advisor/result/result.py new file mode 100644 index 0000000000..308db61231 --- /dev/null +++ b/profiler/advisor/result/result.py @@ -0,0 +1,201 @@ +import json +import os +import stat +from textwrap import fill +from collections import OrderedDict + +import click +import xlsxwriter +from prettytable import ALL, PrettyTable + +from profiler.advisor.common import constant as const +from profiler.advisor.utils.utils import singleton, logger +from profiler.advisor.config.config import Config + + +class ResultWriter: + def __init__(self, result_path=None): + self.result_path = result_path + self.workbook = xlsxwriter.Workbook(result_path) + + self.header_format = None + self.data_cell_format = None + self._init_header_format() + self._init_data_cell_format() + + def _init_header_format(self): + self.header_format = self.workbook.add_format({ + "bold": True, + "color": "#FFFFFF", + "bg_color": "#187498", + "align": "center", + "border": 1, + "font_name": "Arial", + }) + + def _init_data_cell_format(self): + self.data_cell_format = self.workbook.add_format({ + "bold": False, + "align": "left", + "valign": "top", + "border": 1, + "font_name": "Arial", + 'text_wrap': True + }) + + def add_data(self, sheet_name, headers, data_list): + sheet = self.workbook.add_worksheet(sheet_name) + + if headers: + for col_index, header in enumerate(headers): + sheet.write(0, col_index, header, self.header_format) + + if data_list: + for i, row_data in enumerate(data_list): + row_index = i + 1 + for col_index, value in enumerate(row_data): + sheet.write(row_index, col_index, value, self.data_cell_format) + + sheet.autofit() + + def save(self): + try: + self.workbook.close() + except Exception as e: + logger.error("Failed to save analysis results, reason is %s", e) + + +@singleton +class SheetRecoder: + + def __init__(self): + self._sheet_data = OrderedDict() + + @property + def sheet_data(self): + return self._sheet_data + + def _init_sheet_name(self, sheet_name): + if sheet_name not in self._sheet_data: + self._sheet_data[sheet_name] = {} + + def add_headers(self, sheet_name, headers): + self._init_sheet_name(sheet_name) + + if self._sheet_data[sheet_name].get("headers") is None: + self._sheet_data[sheet_name]["headers"] = headers + + def add_data(self, sheet_name, data): + self._init_sheet_name(sheet_name) + + if not isinstance(self._sheet_data[sheet_name].get("data"), list): + self._sheet_data[sheet_name]["data"] = [] + self._sheet_data[sheet_name]["data"].append(data) + + +@singleton +class OptimizeResult: + + def __init__(self): + self.result_writer = ResultWriter(Config().analysis_result_file) + self.sheet_recorder = SheetRecoder() + self.page_dict = False + self._tune_op_list = [] + + def add_tune_op_list(self, tune_op_list) -> None: + """ + add tune op name to tune op list + :param tune_op_list: tune op name list to be added + :return: None + """ + for op_name in tune_op_list: + if op_name not in self._tune_op_list: + self._tune_op_list.append(op_name) + + def add(self, overview_item): + sheet_name = "problems" + + headers = overview_item.headers + data = overview_item.data + self.sheet_recorder.add_headers(sheet_name, headers) + self.sheet_recorder.add_data(sheet_name, data) + + TerminalResult().add(overview_item.optimization_item.data) + self.page_dict = True + + def add_detail(self, sheet_name, headers=None, detail=None): + if headers: + self.sheet_recorder.add_headers(sheet_name, headers) + if detail: + self.sheet_recorder.add_data(sheet_name, detail) + self.page_dict = True + + def show(self): + for sheet_name, sheet_data in self.sheet_recorder.sheet_data.items(): + self.result_writer.add_data(sheet_name, sheet_data.get("headers"), sheet_data.get("data")) + self.result_writer.save() + self._save_op_file_list() + TerminalResult().print() + + def _save_op_file_list(self) -> None: + if not self._tune_op_list: + return + tune_op_dict = {"tune_ops_name": self._tune_op_list} + tune_ops_file = Config().tune_ops_file + try: + + with os.fdopen(os.open(tune_ops_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR), + 'w', encoding="utf-8") as op_tune_file: + json.dump(tune_op_dict, op_tune_file) + except OSError as error: + logger.error("Dump op_list to %s failed, %s", tune_ops_file, error) + return + logger.info("Save tune op name list to %s", tune_ops_file) + + +@singleton +class TerminalResult: + """ + Result output to screen + """ + + def __init__(self): + self.width, _ = self.get_terminal_size() + if self.width is None: + self.table = PrettyTable(["No.", "Problem", "Description", "Suggestion"]) + else: + self.table = PrettyTable(["No.", "Problem", "Description", "Suggestion"], + max_table_width=max(self.width - 20, 180)) + self.table.hrules = ALL + self.result_list = [] + + @staticmethod + def get_terminal_size(): + try: + width, height = os.get_terminal_size() + except OSError: + width, height = None, None + return width, height + + def add(self, result_str): + """ + add a result str + """ + self.result_list.append(result_str) + + def print(self): + """ + print screen result with format table + """ + table_row_cnt = 0 + for result in self.result_list: + table_row_cnt += 1 + result[1] = fill(result[1], width=40) + result[2] = fill(result[2], width=40) + self.table.add_row([table_row_cnt] + result) + self.table.align = "l" + + if table_row_cnt > 0: + click.echo(self.table) + else: + click.echo(click.style(const.SKIP_ANALYZE_PROMPT, fg='red')) diff --git a/profiler/advisor/rules/__init__.py b/profiler/advisor/rules/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/rules/timeline_fusion_ops.yaml b/profiler/advisor/rules/timeline_fusion_ops.yaml new file mode 100644 index 0000000000..764dd5d50f --- /dev/null +++ b/profiler/advisor/rules/timeline_fusion_ops.yaml @@ -0,0 +1,59 @@ +- cann_version: 6.3.RC2 + torch_version: 1.11.0 + unique_id: 0 + operator_rules: + aten: + add: + torch_npu.npu_confusion_transpose: ["(permute|transpose)-(contiguous){0,1}-(reshape|view)", + "(reshape|view)-(contiguous){0,1}-(permute|transpose)"] + torch_npu.fast_gelu: [gelu] + torch_npu.npu_linear: [linear] + torch_npu.npu_mish: [mish] + torch_npu.contrib.module.Mish: [mish] + torch_npu.npu_scaled_masked_softmax: [ "softmax-(mul){0,1}-(masked_fill_|add)" ] + torch_npu.npu_silu: [ silu, mul-sigmoid, sigmoid-mul ] + torch_npu.contrib.module.SiLU: [ silu, mul-sigmoid, sigmoid-mul ] + optimizer.clip_grad_norm_fused_: [add-reciprocal-mul] + Optimizer: + add: + torch_npu.optim.NpuFusedAdamW: [AdamW.step] + torch_npu.optim.NpuFusedSGD: [SGD.step] + torch_npu.optim.NpuFusedAdadelta: [Adadelta.step] + torch_npu.optim.NpuFusedLamb: [Lamb.step] + torch_npu.optim.NpuFusedAdamP: [AdamP.step] + torch_npu.optim.NpuFusedBertAdam: [BertAdam.step] + torch_npu.optim.NpuFusedRMSprop: [RMSprop.step] + torch_npu.optim.NpuFusedRMSpropTF: [RMSpropTF.step] + torch_npu.optim.NpuFusedAdam: [Adam.step] + + +- cann_version: 7.0.RC1 + torch_version: [1.11.0,2.1.0] + unique_id: 1 + inherit_unique_id: 0 + operator_rules: + aten: + add: + torch_npu.npu_fusion_attention: ["matmul-(add){0,1}-(mul){0,1}-(masked_fill_|add){0,1}-softmax-(dropout){0,1}-matmul"] + torch_npu.npu_rotary_mul: ["(chunk|slice)-neg-cat-(mul){0,2}-add"] + +- cann_version: 7.0.0 + torch_version: [1.11.0, 2.1.0] + unique_id: 2 + inherit_unique_id: 1 + operator_rules: + aten: + add: + torch_npu.npu_rms_norm: ["(pow){0,1}-(mean){0,1}-(add){0,1}-rsqrt-mul-(type_as){0,1}"] + torch_npu.npu_swiglu: [ "(slice|chunk)-silu-mul", "(slice|chunk)-mul-silu", + "(slice|chunk)-sigmoid-mul-mul", "(slice|chunk)-mul-sigmoid-mul", + "(slice|chunk)-mul-mul-sigmoid" ] + +- cann_version: 8.0.0 + torch_version: [1.11.0, 2.1.0] + unique_id: 3 + inherit_unique_id: 2 + operator_rules: + aten: + add: + torch_npu.npu_geglu: ["(slice|chunk)-gelu-mul", "(slice|chunk)-mul-gelu"] \ No newline at end of file diff --git a/profiler/advisor/timeline_perf_analysis.ipynb b/profiler/advisor/timeline_perf_analysis.ipynb deleted file mode 100644 index 34233db6fe..0000000000 --- a/profiler/advisor/timeline_perf_analysis.ipynb +++ /dev/null @@ -1,163 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from advisor_backend.interface import Interface\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Timeline调优分析\n", - "\n", - "## 1. Timeline分析的数据准备\n", - "我们当前支持Ascend PyTorch Profiler方式采集后的ascend_pt目录,并支持单独分析ascend_pt/ASCEND_PROFILER_OUTPUT目录下的trace_view.json文件。\n", - "\n", - "## 2. Timeline分析解决的问题\n", - "当前支持的功能:\n", - "1) 识别当前可选择的NPU亲和优化器。\n", - "2) 分析算子调度瓶颈。" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# EDIT THE PROFILING DATA PATH\n", - "timeline_path = \"[YOUR PATH]\"\n", - "interface = Interface(timeline_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1)亲和优化器识别" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO] Start to analyse the target file: [YOUR PATH]\n", - "['Optimizer.step#AdamW.step']\n", - "You can choose torch_npu.optim.NpuFusedAdamW to replace the current Optimizer: Optimizer.step#AdamW.step.\n" - ] - } - ], - "source": [ - "dataset = interface.get_data('timeline', 'optimizer')\n", - "# 打印当前使用的优化器\n", - "data = dataset.get('data')\n", - "print(data)\n", - "\n", - "# 如果使用了原生优化器,则打印优化建议\n", - "advice = dataset.get('advice')\n", - "print(advice)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2)算子调度分析\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO] Start to analyse the target file: [YOUR PATH]\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NPU Utilication: 71.33%, NPU Free Utilization: 28.67%.\n", - "Device synchronize 5 times, try to reduce synchronization statements to alleviate the bottleneck of operator delivery.\n", - "There are too many small operators, you can increase the batch size appropriately.\n" - ] - } - ], - "source": [ - "dataset = interface.get_data('timeline', 'op_schedule')\n", - "data = dataset.get(\"data\")\n", - "import math\n", - "op_dur = [math.log(i + 1) for i in data[0]]\n", - "op_free = [math.log(i + 1) for i in data[1]]\n", - "x = [i for i in range(len(op_dur))]\n", - "fig = plt.figure(figsize=(15, 8))\n", - "plt.plot(x, op_dur, c='r', ls='-', label='op duration')\n", - "plt.plot(x, op_free, c='g', ls='-', label='op wait')\n", - "\n", - "plt.xlabel('operator')\n", - "plt.ylabel('log(time + 1)')\n", - "plt.title('Op Schedule')\n", - "plt.legend(loc='upper right')\n", - "plt.show()\n", - "\n", - "print(dataset.get('bottleneck'))\n", - "print(dataset.get('advice'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.1" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/profiler/advisor/utils/__init__.py b/profiler/advisor/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/utils/log.py b/profiler/advisor/utils/log.py new file mode 100644 index 0000000000..1ca111592f --- /dev/null +++ b/profiler/advisor/utils/log.py @@ -0,0 +1,63 @@ +""" +log module +""" +import logging +import os + +from profiler.advisor.common import constant as const + + +def get_log_level(): + log_level = os.getenv(const.ADVISOR_LOG_LEVEL, const.DEFAULT_LOG_LEVEL).upper() + if not hasattr(logging, log_level): + raise AttributeError(f"module 'logging' has no attribute '{log_level}', " + f"supported log level: {', '.join(const.SUPPORTED_LOG_LEVEL)}") + return log_level + + +def init_logger(ctx, param, debug_mode) -> logging.Logger: + logging.logThreads = False + logging.logMultiprocessing = False + logging.logProcesses = False + + class LevelFilter(logging.Filter): + """ + level filter, filer only log with level out + """ + + # pylint:disable=too-few-public-methods + def filter(self, record): + if record.levelno == 60: + return False + return True + + console_log_level = getattr(logging, get_log_level()) + console_handle = logging.StreamHandler() + console_handle.setLevel(console_log_level) + console_handle.addFilter(LevelFilter()) + if debug_mode and not ctx.resilient_parsing: + formatter = logging.Formatter(fmt="[%(asctime)s][%(levelname)s][%(filename)s L%(lineno)s] %(message)s", + datefmt='%Y-%m-%d,%H:%M:%S') + else: + formatter = logging.Formatter(fmt="[%(asctime)s][%(levelname)s] %(message)s", + datefmt='%Y-%m-%d,%H:%M:%S') + console_handle.setFormatter(formatter) + + # add log level out + logging.addLevelName(60, 'OUT') + logger = logging.getLogger() + setattr(logger, 'out', lambda *args: logger.log(60, *args)) + output_handle = logging.StreamHandler() + output_handle.setLevel("OUT") + formatter = logging.Formatter("%(message)s") + output_handle.setFormatter(formatter) + + logger.setLevel("DEBUG") + logger.handlers = [] + if not logger.handlers: + logger.addHandler(console_handle) + logger.addHandler(output_handle) + else: + logger.info(logger.handlers) + logger.debug("The logger of analysis have initialized successfully.") + return logger diff --git a/profiler/advisor/utils/tools.py b/profiler/advisor/utils/tools.py new file mode 100644 index 0000000000..1189675e83 --- /dev/null +++ b/profiler/advisor/utils/tools.py @@ -0,0 +1,76 @@ +from functools import partial + +import click + +CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help']) + + +class ClickAliasedGroup(click.Group): + """ + Alias click command + """ + FORMAT_LIMIT_LEN = 6 + + def __init__(self, *args, **kwargs): + super(ClickAliasedGroup, self).__init__(*args, **kwargs) + self._alias_dict = {} + self._commands = {} + + def command(self, *args, **kwargs): + alias = kwargs.pop('alias', None) + decorator = super(ClickAliasedGroup, self).command(*args, **kwargs) + if not alias: + return decorator + + return partial(self._decorator_warpper, decorator, alias) + + def group(self, *args, **kwargs): + alias = kwargs.pop('alias', None) + decorator = super(ClickAliasedGroup, self).group(*args, **kwargs) + if not alias: + return decorator + + return partial(self._decorator_warpper, decorator, alias) + + def _decorator_warpper(self, decorator, alias, func=None): + cmd = decorator(func) + self._commands[cmd.name] = alias + self._alias_dict[alias] = cmd.name + return cmd + + def resolve_alias(self, cmd_name): + if cmd_name in self._alias_dict.keys(): + return self._alias_dict[cmd_name] + return cmd_name + + def get_command(self, ctx, cmd_name): + cmd_name = self.resolve_alias(cmd_name) + command = super(ClickAliasedGroup, self).get_command(ctx, cmd_name) + return command if command else None + + def format_commands(self, ctx, formatter): + rows = [] + sub_commands = self.list_commands(ctx) + max_len = 0 + if len(sub_commands) > 0: + max_len = max(len(cmd) for cmd in sub_commands) + + limit = formatter.width - self.FORMAT_LIMIT_LEN - max_len + for sub_command in sub_commands: + cmd = self.get_command(ctx, sub_command) + if cmd is None: + continue + if hasattr(cmd, 'hidden') and cmd.hidden: + continue + if sub_command in self._commands: + alias = self._commands[sub_command] + sub_command = f'{sub_command}, {alias}' + if click.__version__[0] < '7': + cmd_help = cmd.short_help or '' + else: + cmd_help = cmd.get_short_help_str(limit) + rows.append((sub_command, cmd_help)) + + if rows: + with formatter.section('Commands'): + formatter.write_dl(rows) diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py new file mode 100644 index 0000000000..d7837e1e40 --- /dev/null +++ b/profiler/advisor/utils/utils.py @@ -0,0 +1,499 @@ +import json +import logging +import multiprocessing as mp +import os +import queue +import re +import stat +import time +import traceback +import types +from functools import wraps +from typing import Any, Set + +import click +import requests +from requests.adapters import HTTPAdapter +from tqdm import tqdm + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.fusion_ops_db import FusionOperatorDB +from profiler.advisor.common.version_control import VersionControl +from profiler.advisor.utils.log import init_logger, get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) +permission_warned: Set = set() + + +def ignore_warning(exception: Exception = None): + return exception + + +class ContextObject(object): + def __init__(self): + self._debug = False + + def set_debug(self, debug=False): + self._debug = debug + + @property + def debug_mode(self): + return self._debug + + +def debug_option(f): + return click.option('--debug', '-D', + is_flag=True, + expose_value=False, + is_eager=True, + callback=init_logger, + help="Debug Mode. Shows full stack trace when error occurs.")(f) + + +def singleton(cls): + """ + :param cls: any class + :return: singleton handle + """ + _instance = {} + + def _singleton(*args: any, **kw: any) -> any: + if cls not in _instance: + _instance[cls] = cls(*args, **kw) + return _instance.get(cls) + + return _singleton + + +def lazy_property(func): + """ + Lazy loading of class attributes. + which is calculated only once when it is called for the first time, + and will not be repeated for each call after that. + """ + attr_name = "_lazy_" + func.__name__ + + @property + def _lazy_property(instance): + if not hasattr(instance, attr_name): + setattr(instance, attr_name, func(instance)) + return getattr(instance, attr_name) + + return _lazy_property + + +class CheckPathAccess: + """ + check path access permissions + """ + + # pylint: disable=no-member + def __init__(self, func): + wraps(func)(self) + self.warned = permission_warned + + def __call__(self, *args, **kwargs): + path = args[0] + if not os.access(path, os.R_OK) and path not in self.warned: + logger.warning("%s can not read, check the permissions", path) + self.warned.add(path) + return self.__wrapped__(*args, **kwargs) + + def __get__(self, instance, cls): + if instance is None: + return self + return types.MethodType(self, instance) + + +def walk_error_handler(error): + """ + handle dir walk error + """ + if error.filename not in permission_warned: + logger.warning(error) + permission_warned.add(error.filename) + + +@CheckPathAccess +def get_file_path_from_directory(path: str, check_func: Any) -> list: + """ + get file from directory + """ + file_list = [] + for root, _, files in os.walk(path, onerror=walk_error_handler): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +@singleton +class Timer: + def __init__(self): + self.strftime = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) + + +def get_analyze_processes(): + # n_processes not exposed to user through ma-advisor command arguments now + return min(int(os.getenv(const.MA_ADVISOR_ANALYZE_PROCESSES, 1)), const.MA_ADVISOR_MAX_PROCESSES) + + +def init_timeline_ops_db(cann_version=None, torch_version=None): + logger.debug("init operators database") + + return FusionOperatorDB(cann_version=cann_version, torch_version=torch_version) + + +def format_timeline_result(result: dict, dump_html=False): + """ + :Param result: json for api name and stack + :Return: json after format + """ + format_result = {} + if dump_html: + result = json.loads(json.dumps(result).replace("\\r\\n", "
").replace("", "<module>")) + + for key, stacks in result.items(): + api_name = key.split(":")[0] + format_result[api_name] = sorted(list(stacks.items()), key=lambda stack: stack[1], reverse=True) + return format_result + + +class ParallelJob: + + def __init__(self, src_func, ops_api_list, job_name=None): + if not callable(src_func): + raise TypeError(f"src_func should be callable") + + if not isinstance(ops_api_list, (list, tuple)): + raise TypeError(f"ops_api_list should be list or tuple") + + self.src_func = src_func + self.ops_api_list = ops_api_list + self.job_name = job_name + + def start(self, n_proccesses): + + queue = mp.Queue(len(self.ops_api_list)) + completed_queue = mp.Queue() + for i in range(len(self.ops_api_list)): + queue.put(i) + + processes = [] + listen = mp.Process(target=self.listener, args=(completed_queue, len(self.ops_api_list),)) + listen.start() + + for i in range(n_proccesses): + p = mp.Process(target=self.parallel_queue, args=(queue, completed_queue,)) + processes.append(p) + p.start() + + for p in processes: + p.join() + + completed_queue.put(None) + listen.join() + + def listener(self, completed_queue, num): + pbar = tqdm(total=num, position=0, leave=False, ncols=100, desc=self.job_name) + for _ in iter(completed_queue.get, None): + pbar.update() + pbar.refresh() + pbar.n = num + + def parallel_queue(self, job_queue, completed_queue): + while True: + try: + if job_queue.empty(): + break + token = job_queue.get(timeout=1) + except queue.Empty: + continue + self.src_func(*self.ops_api_list[token]) + completed_queue.put(token) + + +def mp_queue_to_list(job_queue): + queue_list = [] + while True: + try: + if job_queue.empty(): + break + token = job_queue.get(timeout=1) + queue_list.append(token) + except queue.Empty: + continue + return queue_list + + +def load_parameter(parameter, default): + if not os.environ.get(parameter, None): + return default + else: + return os.environ.get(parameter) + + +def get_supported_subclass(clazz: VersionControl.__class__, cann_version: str): + """ + Returns a list of subclasses that support the specified version + :param clazz: Class name which is extends to VersionControl.__class__ + :param cann_version: The CANN software version + :return: The list of subclasses that support the specified CANN version + """ + # 获取所有支持这个cann版本的子类 + dataset_classes = clazz.__subclasses__() + sub_class_list = [cls for cls in dataset_classes if cls.is_supported(cann_version)] + logger.debug("The support subclass list is %s, cann version is %s", str(sub_class_list), cann_version) + return sub_class_list + + +def to_percent(num: float) -> str: + """ + change float to percent format + """ + num = num * 100 + return f"{num:.2f}%" + + +def safe_division(numerator, denominator): + """Return 0 if denominator is 0.""" + return denominator and numerator / denominator + + +def safe_write(content, save_path): + if os.path.dirname(save_path) != "": + os.makedirs(os.path.dirname(save_path), exist_ok=True) + + with os.fdopen(os.open(save_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, + stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP), "w") as f: + f.write(content) + + +def create_directory_for_file(file: str) -> None: + """ + create directory for file + """ + dirname = os.path.dirname(file) + if not os.path.exists(dirname): + os.makedirs(dirname) + + +class CheckPathAccess: + """ + check path access permissions + """ + + # pylint: disable=no-member + def __init__(self, func): + wraps(func)(self) + self.warned = permission_warned + + def __call__(self, *args, **kwargs): + path = args[0] + if path and not os.access(path, os.R_OK) and path not in self.warned: + logger.warning("%s can not read, check the permissions", path) + self.warned.add(path) + return self.__wrapped__(*args, **kwargs) + + def __get__(self, instance, cls): + if instance is None: + return self + return types.MethodType(self, instance) + + +@CheckPathAccess +def get_file_path_from_directory(path, check_func): + """ + get file from directory + """ + file_list = [] + + if not path: + return file_list + + if not os.path.isdir(path): + logger.warning("Expected existed directory, but got %s", path) + + for root, _, files in os.walk(path): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +@CheckPathAccess +def get_dir_path_from_directory(path: str, check_func: Any) -> list: + """ + get file from directory + """ + file_list = [] + for root, _, files in os.walk(path, onerror=walk_error_handler): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +def is_regex_pattern(string: str): + """ + Check if str is a regular expression. + """ + escaped_string = re.escape(string) + return not (escaped_string == string) + + +def join_prof_path(root_dir: str, sub_dir: str) -> str: + """ + regular expression matching method for path concatenation + """ + if is_regex_pattern(sub_dir): + for root, _, _ in os.walk(root_dir, onerror=walk_error_handler): + if re.match(sub_dir, os.path.basename(root)): + return root + else: + sub_dir = os.path.join(root_dir, sub_dir) + if os.path.exists(sub_dir): + return sub_dir + return "" + + +def format_excel_title(title: str) -> str: + """ + format excel title + """ + title = title.lower() + title = title.replace("(us)", '') + title = title.replace("(ns)", '') + title = title.replace("(%)", '') + title = title.replace(" ", "_") + return title + + +def format_float(num: float) -> float: + """ + format float num, round to 2 decimal places + """ + return round(num, 2) + + +class SafeOpen: + """ + safe open to check file + """ + + # pylint: disable=consider-using-with + def __init__(self, name, mode='r', encoding=None): + self.file = None + if not os.path.exists(name): + logger.warning("%s not exist, please check", name) + return + + if os.access(name, os.R_OK): + self.file = open(name, mode, encoding=encoding, errors="ignore") + else: + logger.warning("%s can not read, check the permissions", name) + + def __enter__(self): + return self.file + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.file: + self.file.close() + return True + + +def save_downloaded_file(response, url_path, file_save_path): + """保存响应体中的文件 + + 参数: + response: 请求后获取的响应体 + url_path: url路径 + file_save_path: 保存路径 + 返回: + final_file_path: 文件保存绝对路径 + """ + # 获取url路径中的文件名, 拼接在保存路径下 + file_save_path = os.path.normpath(file_save_path) + file_name = os.path.basename(url_path) + final_file_path = os.path.join(file_save_path, file_name) + # 若目标保存路径不存在,则自动生成 + if not os.path.exists(file_save_path): + os.makedirs(file_save_path) + if response.status_code <= 300: + logger.debug("Response status code is %s", response.status_code) + flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL + modes = stat.S_IWUSR | stat.S_IRUSR + # 若文件已存在,则移除已有的文件并保存最新的文件 + if os.path.exists(final_file_path): + os.remove(final_file_path) + # 保存文件 + with os.fdopen(os.open(final_file_path, flags, modes), mode="wb") as f: + f.write(response.content) + logger.info("Success to save content in: %s", os.path.abspath(final_file_path)) + else: + # 若响应码不为预期的数值, 显示相应告警 + logger.warning("Failed to save the response body. The response status code is %s. " + "Please check the network or file URL", response.status_code) + + +def request_with_retry(url_path): + """使用requests请求获取文件, 失败则进行重试, 最多请求 max_retries+1 次 + + 参数: + url_path: URL路径 + file_save_path: 云文件保存路径 + """ + logger.debug("Requesting or retrying to get %s", url_path) + + # 若从环境变量指定了保存路径,优先从环境变量中获取,若为空则使用默认的云文件保存路径constant.CLOUD_RULE_PATH + file_save_path = os.path.join(os.path.expanduser("~"), const.CLOUD_RULE_PATH) + if os.getenv(const.ADVISOR_RULE_PATH): + file_save_path = os.getenv(const.ADVISOR_RULE_PATH) + + session = requests.Session() + # 使用session发起的所有请求, 默认最多会重试 max_retries 次, 计入最初请求, 最差情况下请求 max_retries+1 次 + adapter = HTTPAdapter(max_retries=const.MAX_RETRIES) + session.mount('http://', adapter) + session.mount('https://', adapter) + + logger.debug('Session try to get response') + response = None + try: + response = session.get(url_path, timeout=const.TIMEOUT) + except Exception as e: + logger.debug("Error: %s: %s", e, traceback.format_exc()) + + if response is None: + logger.warning("Fail to download: %s, response is None, " + "please use the environment variable %s for more detailed information", + url_path, const.ADVISOR_LOG_LEVEL) + else: + try: + # 若响应码为400~600之间,response.raise_for_status抛出HTTPError错误, 跳过调用save_downloaded_file函数逻辑 + response.raise_for_status() + save_downloaded_file(response, url_path=url_path, file_save_path=file_save_path) + except Exception as e: + logger.warning("Error: %s: %s", e, traceback.format_exc()) + # 关闭 session, 清除所有装配器 + session.close() + + +def read_csv(file): + import csv + + raw_data = [] + logger.debug("Parse file %s", file) + with SafeOpen(file, encoding="utf-8") as csv_file: + try: + csv_content = csv.reader(csv_file) + for row in csv_content: + raw_data.append(row) + except OSError as error: + logger.error("Read csv file failed : %s", error) + return [] + + return raw_data diff --git a/profiler/advisor/version.py b/profiler/advisor/version.py new file mode 100644 index 0000000000..caf2acb552 --- /dev/null +++ b/profiler/advisor/version.py @@ -0,0 +1,38 @@ +import sys + + +def get_package_version(package_name) -> str: + """ + Get package version info by importlib + Args: + package_name: package name + + Returns: + version: version info string + """ + if sys.version_info >= (3, 8): + # Because importlib_metadata has been changed to importlib.metadata in py3.8 + from importlib import metadata + from importlib.metadata import PackageNotFoundError + else: + import importlib_metadata as metadata + from importlib_metadata import PackageNotFoundError + + try: + version = metadata.version(package_name) + except PackageNotFoundError: + version = "UNKNOWN" + return version + + +def print_version_callback(ctx, param, value): # NOQA + import click + + if not value or ctx.resilient_parsing: + return + click.echo('Version {}'.format(get_package_version("att_advisor"))) + ctx.exit() + + +def cli_version(): + return get_package_version("att_advisor") -- Gitee From 9311bc327a1107bc6ac1dc677c37966d1491e515 Mon Sep 17 00:00:00 2001 From: wuyuhan Date: Tue, 9 Apr 2024 19:28:19 +0800 Subject: [PATCH 02/21] =?UTF-8?q?=E5=A2=9E=E5=8A=A0timeline=E8=9E=8D?= =?UTF-8?q?=E5=90=88=E7=AE=97=E5=AD=90API=E7=9A=84analyzer=E5=92=8Cdataset?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0,=20=E5=A2=9E=E5=8A=A0overall=E5=88=86?= =?UTF-8?q?=E6=9E=90=E7=9A=84=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../analyzer/overall/overall_analyzer.py | 44 +++ .../fusion_ops/fusion_ops_analyzer.py | 277 ++++++++++++++++++ .../advisor/dataset/timeline_event_dataset.py | 191 ++++++++++++ 3 files changed, 512 insertions(+) create mode 100644 profiler/advisor/analyzer/overall/overall_analyzer.py create mode 100644 profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py create mode 100644 profiler/advisor/dataset/timeline_event_dataset.py diff --git a/profiler/advisor/analyzer/overall/overall_analyzer.py b/profiler/advisor/analyzer/overall/overall_analyzer.py new file mode 100644 index 0000000000..93b227fb61 --- /dev/null +++ b/profiler/advisor/analyzer/overall/overall_analyzer.py @@ -0,0 +1,44 @@ +import logging + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.result.result import OptimizeResult +from profiler.compare_tools.compare_backend.utils.constant import Constant +from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface + +logger = logging.getLogger() + + +class OverallSummaryAnalyzer(BaseAnalyzer): + + def __init__(self, profiling_path, benchmark_profiling_path=None, **kwargs): + self.benchmark_profiling_path = benchmark_profiling_path or profiling_path + self.profiling_path = profiling_path + self.html_render = HTMLRender() + self.result = OptimizeResult() + + def optimize(self): + compare_result = ComparisonInterface(self.benchmark_profiling_path, self.profiling_path).compare( + Constant.OVERALL_COMPARE) + + headers = compare_result.get('Model Profiling Time Distribution').get("headers", []) + rows = compare_result.get('Model Profiling Time Distribution').get("rows", []) + + self.make_record() + self.make_render(headers=headers, rows=rows) + return compare_result + + def make_record(self): + pass + + def make_render(self, **kwargs): + headers = kwargs.get("headers") + rows = kwargs.get("rows") + + if not headers or not rows: + logger.info("Empty headers or rows, skip render overall analysis html") + self.html_render.render_template(key="overall", + template_dir="templates", + template_name="overall_analysis.html", + headers=kwargs.get("headers"), + rows=kwargs.get("rows")) diff --git a/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py b/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py new file mode 100644 index 0000000000..3db3ae55a1 --- /dev/null +++ b/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py @@ -0,0 +1,277 @@ +import multiprocessing +import logging +import re + +from tqdm import tqdm + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.utils.utils import format_timeline_result +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.utils.utils import init_timeline_ops_db + +logger = logging.getLogger() + + +class TimelineFusionOpsAnalyzer(BaseAnalyzer): + + def __init__(self, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): + self.n_processes = n_processes + self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() + self.matched_op_stacks = {} + self.cann_version = cann_version + self.torch_version = torch_version + self.empty_stacks = True + self.event_dataset = None + self.html_render = HTMLRender() + self.result = OptimizeResult() + + def optimize(self, timeline_dataset, **kwargs): + + self.event_dataset = timeline_dataset + + for mode in [const.ATEN.lower(), const.OPTIMIZER.lower()]: + + for op_combined, npu_apis in tqdm(getattr(init_timeline_ops_db(self.cann_version, self.torch_version), + f"_{mode}_op_api_map").items(), leave=False, ncols=100, + desc="Scanning timeline for affinity apis"): + for npu_api in npu_apis.split("/"): + self.find_fusion_ops(self.event_dataset, op_combined, npu_api, mode) + + self.query_stack(self.event_dataset) + + logger.info("Finish timeline analysis") + self.make_record() + self.make_render() + return self.result + + def find_fusion_ops(self, event_dataset: TimelineEventDataset, ops: str, npu_api: str, mode: str): + """ + :Param event_dataset: dataset of timeline event + :Param ops: operator combination with '-' as separator , e.g. permute-reshape + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + :Return: json of op_name and called times and detail stacks + """ + op_rule_pattern, enable_regex = self._format_rule_to_pattern(ops) + if not enable_regex: + self._match_ops(event_dataset, op_rule_pattern, npu_api, mode) + else: + try: + self._match_ops_with_regex(event_dataset, op_rule_pattern, npu_api, mode) + except Exception as e: + logger.warning("Failed to find fusion operators with regex %s, reason is %s", ops, e) + + def _match_ops(self, event_dataset: TimelineEventDataset, ops: str, npu_api: str, mode: str): + """ match operator based on fusion operators rule(without regex), + only strictly equals of op name list means matched + :Param event_dataset: dataset of timeline event + :Param ops: operator combination with '-' as separator , e.g. permute-reshape + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + """ + op_list = ops.split(const.OP_SEP) + + matched_op_index = set() + api_ops_matched = False + + for index, event in enumerate(getattr(event_dataset, mode)): + if self._replace_op_name_prefix(event.name, mode) != op_list[0]: + continue + tmp_dequeue_event_names = [self._replace_op_name_prefix(event.name, mode) for event in + getattr(event_dataset, mode)[index: index + len(op_list)]] + if tmp_dequeue_event_names != op_list: + continue + api_ops_matched = True + matched_op_index.add(event.dataset_index) + + if api_ops_matched: + self._matched_op_index[npu_api + f":{ops}"] = matched_op_index + + def _match_ops_with_regex(self, event_dataset: TimelineEventDataset, op_rule_pattern: str, npu_api: str, + mode: str): + """ match operator based on fusion operators rule(with regex), + using regex to support condition like 'a = torch.mul(xxx) if xxx else torch.add(xxx)' + :Param event_dataset: dataset of timeline event + :Param op_rule_pattern: fusion operators rule with regex definition , e.g. add-mul{0,10}, add-mul* + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + """ + matched_op_index = set() + total_op_name = "".join([f"{const.OP_SEP}{self._replace_op_name_prefix(event.name, mode)}{const.OP_SEP}" + for event in + getattr(event_dataset, mode)]) + + matched_pattern_index_tuple = [(x.start(0), x.end(0)) for x in re.finditer(op_rule_pattern, total_op_name)] + # convert list of index tuple to a whole list: [(3, 25), ...] -> [3, 25, ...] + total_ops_split_points = [num for sublist in matched_pattern_index_tuple for num in sublist] + + api_ops_matched = len(total_ops_split_points) != 0 + + op_index = [] + if 0 not in total_ops_split_points: + total_ops_split_points = [0] + total_ops_split_points + if len(list(total_op_name)) not in total_ops_split_points: + total_ops_split_points.append(len(list(total_op_name))) + + # convert total ops name like "-add-mul-xxx-div-" to small pieces like [["add", "mul"], [...], ["div"]] + # by the regex index and then calculate the real index for matched fusion operators in event dataset + for l, r in zip(total_ops_split_points, total_ops_split_points[1:]): + matched_op_flag = True if (l, r) in matched_pattern_index_tuple else False + matched_ops_list = total_op_name[l: r].strip(const.OP_SEP).split(const.OP_SEP + const.OP_SEP) + op_index.append([matched_op_flag, len(matched_ops_list)]) + for i, _ in enumerate(op_index): + if i > 0: + # calculate cumsum for indexing matched operator + op_index[i][1] = op_index[i][1] + op_index[i - 1][1] + op_index = [[False, 0]] + op_index + + for i, _ in enumerate(op_index): + if not op_index[i][0]: + continue + index = op_index[i - 1][1] + matched_op_index.add(index) + + if index > len(getattr(event_dataset, mode)) - 1: + continue + dataset_index = getattr(event_dataset, mode)[index].get("dataset_index") + matched_op_index.add(dataset_index) + + if api_ops_matched: + self._matched_op_index[npu_api + f":{op_rule_pattern}"] = sorted(list(matched_op_index)) + + def make_record(self): + """ + make record for what and how to optimize + """ + if not self.matched_op_stacks: + return + + desc = f"Found {len(format_timeline_result(self.matched_op_stacks))} apis to be replaced" \ + f" based on the runtime env cann-{self.cann_version} and torch-{self.torch_version}" + suggestion = "Please replace training api according to sub table 'Affinity training api'" + if self.empty_stacks: + desc += ", but with no stack" + suggestion = const.TIMELINE_EMPTY_STACKS_PROMPT.format( + timeline_profiling_doc_url=const.TIMELINE_WITH_STACK_DOC_URL + ) + + optimization_item = OptimizeItem( + const.AFFINITY_TRAINING_API, + desc, + [suggestion] + ) + + self.result.add(OptimizeRecord(optimization_item)) + + record_title = ["Affinity API", "Code stacks", "Stack called counts"] + self.result.add_detail(const.AFFINITY_TRAINING_API, headers=record_title) + + for api_name, stacks_info in format_timeline_result(self.matched_op_stacks).items(): + if not stacks_info: + detail = [api_name, "null", "null"] + self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) + else: + for stack in stacks_info: + detail = [api_name, *stack] + self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) + + def make_render(self): + format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True) + + self.html_render.render_template(key="scheduling", + template_dir="templates", + template_name="affinity_api.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + empty_stacks=self.empty_stacks, + with_stack_doc_url=const.TIMELINE_WITH_STACK_DOC_URL, + api_doc_url=const.TIMELINE_API_DOC_URL, + result=format_result_for_html) + + def query_stack(self, event_dataset: TimelineEventDataset): + if all([len(matched_index) == 0 for matched_index in self._matched_op_index.values()]): + return + + op_stack_list = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) + for op_stack in op_stack_list: + for op_rule, stack in op_stack.items(): + if op_rule not in self.matched_op_stacks: + self.matched_op_stacks[op_rule] = {} + if stack == const.TIMELINE_FUSION_OPS_NO_STACK_FLAG: + continue + if stack not in self.matched_op_stacks[op_rule]: + self.matched_op_stacks[op_rule][stack] = 0 + self.matched_op_stacks[op_rule][stack] += 1 + + def _query_stack_by_matched_index(self, index, event): + stack_record = {} + event = TimelineEvent(event) + + matched_op_rules = [] + for op_rule, matched_index in self._matched_op_index.items(): + if index not in matched_index: + continue + + matched_op_rules.append(op_rule) + stack = event.args.get(const.CALL_STACKS) + + if not stack: + logger.debug("Got empty '%s' for event %s", const.CALL_STACKS, event) + continue + + if self.empty_stacks and stack: + self.empty_stacks = False + + stack_record[op_rule] = stack + + if matched_op_rules and not stack_record: + for op_rule in matched_op_rules: + stack_record[op_rule] = const.TIMELINE_FUSION_OPS_NO_STACK_FLAG + + return stack_record + + def _replace_op_name_prefix(self, event_name, mode): + if mode == const.DEQUEUE.lower(): + op_name_prefix = f"{const.DEQUEUE}{const.DEQUEUE_SEP}" + elif mode == const.ATEN: + op_name_prefix = f"{const.ATEN}{const.ATEN_SEP}" + else: + op_name_prefix = f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}" + + return event_name.replace(op_name_prefix, "") + + def _format_rule_to_pattern(self, op_rule): + """ + Args: + op_rule: like (mul){0,1}-(add|neg){0,2}-dropout-(softmax)* + + Returns: op_pattern like (-mul-){0,1}(-add-|-neg-){0,2}(-dropout-)(-softmax-)* + """ + enable_regex = False + if "(" not in op_rule and ")" not in op_rule: + # op_rule which requires fuzzy matching mush consist of "()" + return op_rule, enable_regex + + enable_regex = True + op_pattern_list = op_rule.split(const.OP_SEP) + format_op_pattern = "" + for op_pattern in op_pattern_list: + matched_res = re.search(r'\((.*?)\)', op_pattern) + + ops_index_range = (matched_res.start() + 1, matched_res.end() - 1) if matched_res else ( + 0, len(op_pattern)) + + op_names = op_pattern[ops_index_range[0]: ops_index_range[1]] + tmp_op_names_record = [] + for op_name in op_names.split("|"): + tmp_op_names_record.append(f"{const.OP_SEP}{op_name.strip(' ')}{const.OP_SEP}") + op_suffix = op_pattern[ops_index_range[1] + 1:] + op_names_format = f"({'|'.join(tmp_op_names_record)}){op_suffix}" + + format_op_pattern += op_names_format + return format_op_pattern, enable_regex diff --git a/profiler/advisor/dataset/timeline_event_dataset.py b/profiler/advisor/dataset/timeline_event_dataset.py new file mode 100644 index 0000000000..c1134a9784 --- /dev/null +++ b/profiler/advisor/dataset/timeline_event_dataset.py @@ -0,0 +1,191 @@ +import logging +from typing import List + +import ijson +from tqdm import tqdm + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.utils.utils import get_file_path_from_directory +from profiler.advisor.utils.utils import singleton + +logger = logging.getLogger() + + +@singleton +class TimelineEventDataset: + + def __init__(self, root_dir, **kwargs) -> None: + self._ops_with_task_type = {} + self._ops_with_stack = {} + self._torch_to_npu = {} + self._acl_to_npu = set() + self._aten: List[str] = [] + self._optimizer: List[str] = [] + self.timeline_dir = root_dir + self.timeline_data_list = get_file_path_from_directory(root_dir, lambda file: file.endswith("trace_view.json")) + self.dataset_len = None + self.analysis_mode = kwargs.get("analysis_mode") + self.task_type = kwargs.get("task_type") + self.cann_version = kwargs.get("cann_version") + self.torch_version = kwargs.get("torch_version") + + if self.analysis_mode in ["fusion_ops", "all"]: + logger.info("Load fusion operators database for cann version '%s' and torch version '%s'", + self.cann_version, self.torch_version) + + self.parse() + + if self.analysis_mode in ["op_stack", "all"]: + self._task_op_names = list(set([event_key.split("-")[0] for event_key in self._ops_with_task_type.keys()])) + + self._post_process() + + @property + def ops_with_stack(self): + return self._ops_with_stack + + @property + def torch_to_npu(self): + return self._torch_to_npu + + @property + def acl_to_npu(self): + return self._acl_to_npu + + @property + def ops_with_task_type(self): + return self._ops_with_task_type + + @property + def task_op_names(self): + return self._task_op_names + + @property + def optimizer(self): + return self._optimizer + + @property + def aten(self): + return self._aten + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__module__.rsplit('.', maxsplit=1)[-1] + + def parse(self): + + if len(self.timeline_data_list) == 0: + logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) + return False + + if len(self.timeline_data_list) > 1: + logger.warning("Please ensure only one trace_view.json in %s, skip timeline analysis.", self.timeline_dir) + return False + + result = self.parse_data_with_generator(self._add_event) + + if not self.dataset_len: + self.dataset_len = len(result) + + return True + + def parse_data_with_generator(self, func): + result = [] + try: + with open(self.timeline_data_list[0], "r") as f: + for i, event in tqdm(enumerate(ijson.items(f, "item")), + leave=False, ncols=100, desc="Building dataset for timeline analysis", + total=self.dataset_len): + func_res = func(index=i, event=event) + if func_res is not None: + result.append(func_res) + except Exception as e: + logger.warning("Error %s while parsing file %s, continue to timeline analysis", e, + self.timeline_data_list[0]) + return result + + def _add_ops_with_task_type(self, event): + key = f"{event.name}-{event.ts}" + self._ops_with_task_type[key] = TimelineEvent( + { + const.TASK_TYPE: event.args.get(const.TASK_TYPE), + "task_id": event.args.get("Task Id"), + "tid": event.tid, + "name": event.name, + "ts": str(event.ts) + } + ) + + def _add_ops_with_stack(self, event): + self._ops_with_stack[str(event.ts)] = TimelineEvent({"name": event.name, "dataset_index": event.dataset_index}) + + def _add_torch_to_npu(self, event): + key = f"{event.ph}-{event.id}" + self._torch_to_npu[key] = TimelineEvent({"tid": event.tid, "ts": str(event.ts)}) + + def _add_acl_to_npu(self, event): + # op with task type equals to ai_cpu which derived from acl_to_npu do not have stacks + self._acl_to_npu.add(str(event.ts)) + + def _add_optimizer(self, event: TimelineEvent): + self._optimizer.append(TimelineEvent({"name": event.name, "dataset_index": event.dataset_index})) + + def _add_aten(self, event: TimelineEvent): + self._aten.append(TimelineEvent({ + "name": event.name, "dataset_index": event.dataset_index, "ts": event.ts, "dur": event.dur + })) + + def _add_event(self, index, event): + event["dataset_index"] = index + if not isinstance(event, TimelineEvent): + event = TimelineEvent(event) + + if self.analysis_mode == "fusion_ops": + self._add_event_for_fusion_ops(event) + elif self.analysis_mode == "op_stack": + self._add_event_for_op_stack(event) + else: + self._add_event_for_fusion_ops(event) + self._add_event_for_op_stack(event) + return True + + def _add_event_for_fusion_ops(self, event): + if event.name.lower().startswith(f"{const.ATEN}{const.ATEN_SEP}") or event.name.lower().startswith( + f"{const.NPU}{const.ATEN_SEP}"): + self._add_aten(event) + return + + if event.name.startswith(f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}"): + self._add_optimizer(event) + return + + def _add_event_for_op_stack(self, event): + if event.name.lower() == const.TORCH_TO_NPU: + self._add_torch_to_npu(event) + return + + if event.args.get(const.CALL_STACKS): + self._add_ops_with_stack(event) + return + + if event.args.get(const.TASK_TYPE) and event.args.get(const.TASK_TYPE) in [const.AI_CORE, const.AI_CPU]: + self._add_ops_with_task_type(event) + return + + if event.name and event.ts and event.name == const.ACL_TO_NPU: + self._add_acl_to_npu(event) + return + + def _post_process(self): + # eliminate sub aten operator of the first level aten operator by 'ts' and 'dur', + # keep the first level aten operator contiguous + formated_atens = [] + for aten_event in sorted(self._aten, key=lambda x: x.get("ts", -1)): + if not formated_atens or not formated_atens[-1].ts_include(aten_event): + formated_atens.append(aten_event) + self._aten = formated_atens -- Gitee From b9dc511efb93e12d65b97d3ccfb647c5553c4c41 Mon Sep 17 00:00:00 2001 From: wuyuhan Date: Tue, 9 Apr 2024 19:32:41 +0800 Subject: [PATCH 03/21] =?UTF-8?q?=E5=A2=9E=E5=8A=A0cli=E4=BD=9C=E4=B8=BAad?= =?UTF-8?q?visor,=20compare=20tools,=20cluster=20analyse=E7=9A=84=E5=85=A5?= =?UTF-8?q?=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- profiler/cli/__init__.py | 4 + profiler/cli/analyze_cli.py | 145 +++++++++++++++++++++++++++++++++++ profiler/cli/cluster_cli.py | 0 profiler/cli/compare_cli.py | 38 +++++++++ profiler/cli/complete_cli.py | 29 +++++++ profiler/cli/entrance.py | 59 ++++++++++++++ profiler/cli/query_cli.py | 0 7 files changed, 275 insertions(+) create mode 100644 profiler/cli/__init__.py create mode 100644 profiler/cli/analyze_cli.py create mode 100644 profiler/cli/cluster_cli.py create mode 100644 profiler/cli/compare_cli.py create mode 100644 profiler/cli/complete_cli.py create mode 100644 profiler/cli/entrance.py create mode 100644 profiler/cli/query_cli.py diff --git a/profiler/cli/__init__.py b/profiler/cli/__init__.py new file mode 100644 index 0000000000..4666e2d531 --- /dev/null +++ b/profiler/cli/__init__.py @@ -0,0 +1,4 @@ +from profiler.advisor.config.config import Config +from profiler.advisor.utils.utils import Timer + +Config().set_log_path(f"ma_advisor_{Timer().strftime}.xlsx") diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py new file mode 100644 index 0000000000..34215d12d1 --- /dev/null +++ b/profiler/cli/analyze_cli.py @@ -0,0 +1,145 @@ +import click +import sys +import os +import logging + +sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), "compare_tools")) +sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), "cluster_analyse")) + +from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup +from profiler.advisor.common import constant +from profiler.advisor.utils.utils import debug_option +from profiler.advisor.common.module_lib import AnalysisScope +from profiler.advisor.interface.interface import Interface + +logger = logging.getLogger() + + +def _analyze(dimensions, **kwargs): + is_inference = kwargs.get("is_inference", False) + user_input_mode = kwargs.get("mode") + result_list = [] + job_list = [] + + for dimension in dimensions: + + valid_modes = AnalysisScope.analyzer_list(dimension, is_inference) + if not valid_modes: + logger.info("Skip analysis of dimension %s, no analyzer", dimension) + continue + if user_input_mode and user_input_mode not in valid_modes: + logger.error("Got error mode %s for analysis dimension %s, optionals are %s", user_input_mode, dimension, + valid_modes) + continue + + analysis_modes = [user_input_mode] if user_input_mode else valid_modes + + for mode in analysis_modes: + interface = Interface(**kwargs) + job_list.append((dimension, mode, interface)) + + for i, (dimension, mode, interface) in enumerate(job_list[::-1]): + result_list.append( + interface.get_result(dimension, mode, render_html=i == len(job_list) - 1, is_inference=is_inference)) + + for result in result_list[::-1]: + if result and hasattr(result, "show"): + result.show() + break + + +@click.group(name="analyze", cls=ClickAliasedGroup) +def analyze_cli(**kwargs): + """Analyze profiling datasets and give performance optimization suggestion.""" + pass + + +@analyze_cli.command(context_settings=CONTEXT_SETTINGS, + name="all", + short_help='Analyze timeline, operators and graph.') +@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, + help='path of trace_view.json in profiling') +@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) +@click.option('--cann_version', '-cv', 'cann_version', + type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), + default=constant.DEFAULT_CANN_VERSION, + help='The CANN software version, which can be viewed by executing the following command: ' + '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') +@click.option('--torch_version', '-tv', 'torch_version', + type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), + default=constant.DEFAULT_TORCH_VERSION, + help='The runtime torch version, which can be detected by exec command "pip show torch"') +@click.option('--is_inference', is_flag=True) +@debug_option +def analyze_all(**kwargs) -> None: + # 当前compare_tools必须输入两个profiling路径,att-advisor有等价功能支持输入一个Profiling路径,后续替换成对应实现 + if not kwargs.get("benchmark_profiling_path"): + kwargs["benchmark_profiling_path"] = kwargs.get("profiling_path") + + _analyze(AnalysisScope.supported_dims, **kwargs) + + +@analyze_cli.command(context_settings=CONTEXT_SETTINGS, + name="communication", + short_help='Analyze timeline, operators and graph.') +@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, + help='path of trace_view.json in profiling') +@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) +@click.option('--cann_version', '-cv', 'cann_version', + type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), + default=constant.DEFAULT_CANN_VERSION, + help='The CANN software version, which can be viewed by executing the following command: ' + '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') +@click.option('--torch_version', '-tv', 'torch_version', + type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), + default=constant.DEFAULT_TORCH_VERSION, + help='The runtime torch version, which can be detected by exec command "pip show torch"') +@click.option('--mode', '-m', 'mode', default=None) +@click.option('--is_inference', is_flag=True) +@debug_option +def analyze_communication(**kwargs) -> None: + _analyze(["communication"], **kwargs) + + +@analyze_cli.command(context_settings=CONTEXT_SETTINGS, + name="scheduling", + short_help='Analyze timeline, operators and graph.') +@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, + help='path of trace_view.json in profiling') +@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) +@click.option('--cann_version', '-cv', 'cann_version', + type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), + default=constant.DEFAULT_CANN_VERSION, + help='The CANN software version, which can be viewed by executing the following command: ' + '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') +@click.option('--torch_version', '-tv', 'torch_version', + type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), + default=constant.DEFAULT_TORCH_VERSION, + help='The runtime torch version, which can be detected by exec command "pip show torch"') +@click.option('--mode', '-m', 'mode', default=None) +@click.option('--is_inference', is_flag=True) +@debug_option +def analyze_scheduling(**kwargs) -> None: + _analyze(["scheduling"], **kwargs) + + +@analyze_cli.command(context_settings=CONTEXT_SETTINGS, + name="computing", + short_help='Analyze timeline, operators and graph.') +@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, + help='path of trace_view.json in profiling') +@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) +@click.option('--cann_version', '-cv', 'cann_version', + type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), + default=constant.DEFAULT_CANN_VERSION, + help='The CANN software version, which can be viewed by executing the following command: ' + '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') +@click.option('--torch_version', '-tv', 'torch_version', + type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), + default=constant.DEFAULT_TORCH_VERSION, + help='The runtime torch version, which can be detected by exec command "pip show torch"') +@click.option('--mode', '-m', 'mode', default=None) +@click.option('--is_inference', is_flag=True) +@debug_option +def analyze_computing(**kwargs) -> None: + _analyze(["computing"], **kwargs) diff --git a/profiler/cli/cluster_cli.py b/profiler/cli/cluster_cli.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/cli/compare_cli.py b/profiler/cli/compare_cli.py new file mode 100644 index 0000000000..a4e69653f2 --- /dev/null +++ b/profiler/cli/compare_cli.py @@ -0,0 +1,38 @@ +import ast +import click +import os +import sys + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup +from profiler.advisor.utils.utils import debug_option +from profiler.advisor.common.timeline.event import AdvisorDict +from profiler.compare_tools.compare_backend.comparison_generator import ComparisonGenerator + + +@click.group(name="compare", cls=ClickAliasedGroup) +def compare_cli(**kwargs): + """Query operator details from timeline.""" + pass + + +@compare_cli.command(context_settings=CONTEXT_SETTINGS, + name="profiling", + short_help='Analyze timeline for specific operator and report detail code stacks.') +@click.option('--profiling_path', '-d', 'base_profiling_path', type=click.Path(), required=True, + help='path of trace_view.json in profiling') +@click.option('--benchmark_profiling_path', '-bp', 'comparison_profiling_path', type=click.Path()) +@click.option('--enable_profiling_compare', is_flag=True) +@click.option('--enable_operator_compare', is_flag=True) +@click.option('--enable_memory_compare', is_flag=True) +@click.option('--enable_communication_compare', is_flag=True) +@click.option('--output_path', '-o', 'output_path', type=click.Path()) +@click.option('--max_kernel_num', 'max_kernel_num', type=int, help="每个torch op的kernel数量限制") +@click.option('--op_name_map', type=dict, default={}, help="配置GPU与NPU等价的算子名称映射关系,以字典的形式传入", required=False) +@click.option('--use_input_shape', is_flag=True) +@click.option('--gpu_flow_cat', type=str, default='', help="gpu flow event的分类标识") +@debug_option +def compare_profiling(**kwargs) -> None: + args = AdvisorDict(kwargs) + ComparisonGenerator(args).run() diff --git a/profiler/cli/complete_cli.py b/profiler/cli/complete_cli.py new file mode 100644 index 0000000000..e4fa0caf3f --- /dev/null +++ b/profiler/cli/complete_cli.py @@ -0,0 +1,29 @@ +import click + +from profiler.advisor.utils.tools import CONTEXT_SETTINGS + + +@click.command(context_settings=CONTEXT_SETTINGS, + short_help='Auto complete ma-advisor command in terminal, support "bash(default)/zsh/fish".') +@click.argument('shell_type', nargs=1, default="Bash", type=click.Choice(["Bash", "Zsh", "Fish"], case_sensitive=False)) +def auto_complete_cli(shell_type): + """ + Auto complete ma-advisor command in terminal. + + Example: + + \b + # print bash auto complete command to terminal + ma-advisor auto-completion Bash + """ + click.echo("Tips: please paste following shell command to your terminal to activate auto completion.\n") + if shell_type.lower() == "bash": + bash_str = 'eval "$(_advisor_COMPLETE=bash_source ma-advisor)"' + elif shell_type.lower() == "zsh": + bash_str = 'eval "$(_advisor_COMPLETE=zsh_source ma-advisor)"' + elif shell_type.lower() == "fish": + bash_str = 'eval (env _advisor_COMPLETE=fish_source ma-advisor)' + else: + click.echo(f'Unsupported shell type {shell_type}.') + return + click.echo(f'{bash_str}\n') diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py new file mode 100644 index 0000000000..b14d3dfd86 --- /dev/null +++ b/profiler/cli/entrance.py @@ -0,0 +1,59 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import logging +import click + +from profiler.cli.analyze_cli import analyze_cli +from profiler.cli.complete_cli import auto_complete_cli +from profiler.cli.compare_cli import compare_cli + +from profiler.advisor.version import print_version_callback, cli_version + +logger = logging.getLogger() +CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help'], + max_content_width=160) + +COMMAND_PRIORITY = { + "analyze": 1, + "query": 2, + "env": 3, + "auto-completion": 4 +} + + +class SpecialHelpOrder(click.Group): + + def __init__(self, *args, **kwargs): + super(SpecialHelpOrder, self).__init__(*args, **kwargs) + + def list_commands_for_help(self, ctx): + """ + reorder the list of commands when listing the help + """ + commands = super(SpecialHelpOrder, self).list_commands(ctx) + return [item[1] for item in sorted((COMMAND_PRIORITY.get(command, float('INF')), + command) for command in commands)] + + def get_help(self, ctx): + self.list_commands = self.list_commands_for_help + return super(SpecialHelpOrder, self).get_help(ctx) + + +@click.group(context_settings=CONTEXT_SETTINGS, cls=SpecialHelpOrder) +@click.option('--version', '-V', '-v', is_flag=True, + callback=print_version_callback, expose_value=False, + is_eager=True, help=cli_version()) +def advisor_cli(**kwargs): + pass + + +advisor_cli.add_command(analyze_cli, name="analyze") +advisor_cli.add_command(auto_complete_cli, name="auto-completion") +advisor_cli.add_command(compare_cli, name="compare") + +if __name__ == '__main__': + advisor_cli.main( + ["analyze", "scheduling", "-d", + r"/home/ma-user/work/profiling", + ] + ) diff --git a/profiler/cli/query_cli.py b/profiler/cli/query_cli.py new file mode 100644 index 0000000000..e69de29bb2 -- Gitee From 21c339e6278b6faca5640d301a36aa5aee5c930f Mon Sep 17 00:00:00 2001 From: wuyuhan Date: Tue, 9 Apr 2024 19:34:51 +0800 Subject: [PATCH 04/21] =?UTF-8?q?profiler=E6=89=93=E5=8C=85=E6=88=90att=5F?= =?UTF-8?q?advisor-xxx.whl?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MANIFEST.in | 5 +++++ requirements.txt | 2 ++ requirements/build.txt | 12 ++++++++++++ requirements/test.txt | 5 +++++ setup.cfg | 32 ++++++++++++++++++++++++++++++++ setup.py | 42 ++++++++++++++++++++++++++++++++++++++++++ version.txt | 1 + 7 files changed, 99 insertions(+) create mode 100644 MANIFEST.in create mode 100644 requirements.txt create mode 100644 requirements/build.txt create mode 100644 requirements/test.txt create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 version.txt diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000..d86534656d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,5 @@ +recursive-include profiler/advisor/display * +recursive-include profiler/advisor/third_party/simulation/display * +recursive-include profiler/advisor/checker * +global-exclude */__pycache__/* +global-exclude *.pyc diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..9d7eaf19f7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +-r requirements/build.txt +-r requirements/tests.txt \ No newline at end of file diff --git a/requirements/build.txt b/requirements/build.txt new file mode 100644 index 0000000000..2c5638eaef --- /dev/null +++ b/requirements/build.txt @@ -0,0 +1,12 @@ +click +tabulate +networkx +jinja2 +PyYaml +tqdm +prettytable +ijson +requests +xlsxwriter +sqlalchemy +urllib3<2.0 \ No newline at end of file diff --git a/requirements/test.txt b/requirements/test.txt new file mode 100644 index 0000000000..3bacb7ca55 --- /dev/null +++ b/requirements/test.txt @@ -0,0 +1,5 @@ +pytest==6.2.4 +pytest-cov==2.12.0 +pytest-mock==3.6.1 +pytest-cookies==0.6.1 +mock==4.0.3 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000..cf9acbbc4f --- /dev/null +++ b/setup.cfg @@ -0,0 +1,32 @@ +[isort] +line_length = 120 +multi_line_output = 0 +known_standard_library = setuptools +no_lines_before = STDLIB,LOCALFOLDER +default_section = THIRDPARTY +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true + +[flake8] +exclude = tests/* +max-line-length = 120 + +[pycodestyle] +max-line-length = 120 +exclude = tests/* + +[yapf] +BASED_ON_STYLE = pep8 +BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true +SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true +COLUMN_LIMIT = 120 + +[aliases] +test=pytest + +[mypy] +ignore_missing_imports = True + +[mypy-tests.*] +ignore_errors = True diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000..8ee18763b5 --- /dev/null +++ b/setup.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +from setuptools import find_packages, setup # type: ignore + + +extras = { + "test": [ + "pytest==6.2.4", + "pytest-cookies==0.6.1", + "pytest-cov==2.12.0", + "mock==4.0.3", + ] +} + +with open('requirements/build.txt', 'r') as f: + requires = f.read().splitlines() + +with open('requirements/test.txt', 'r') as f: + tests_requires = f.read().splitlines() +tests_requires.extend(set(requires)) + +with open('version.txt', 'r') as f: + version = f.read().strip() + +setup( + name="att-advisor", + version=version, + description="Ascend advisor tools", + packages=find_packages(), + include_package_data=True, + python_requires='>=3.7', + install_requires=requires, + package_data={'': ['*.json', '*.ini', '*.txt', '*.yaml', '*.html']}, + tests_require=tests_requires, + entry_points=""" + [console_scripts] + att-advisor=profiler.cli.entrance:advisor_cli + """ +) + +# build cmd: pip install --editable . diff --git a/version.txt b/version.txt new file mode 100644 index 0000000000..7bcd0e3612 --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +0.0.2 \ No newline at end of file -- Gitee From 808e677a8de8f64c29593395257ead0dd2325d36 Mon Sep 17 00:00:00 2001 From: fanxiaotong Date: Tue, 16 Apr 2024 20:37:29 +0800 Subject: [PATCH 05/21] framework --- profiler/advisor/analyzer/base_analyzer.py | 18 +++ .../analyzer/cluster/slow_rank_analyser.py | 69 ++++++++ .../fusion_ops/fusion_ops_analyzer.py | 20 +-- profiler/advisor/common/module_lib.py | 87 ---------- .../dataset/cluster/cluster_dataset.py | 152 ++++++++++++++++++ .../cluster/cluster_step_trace_time_bean.py | 67 ++++++++ profiler/advisor/interface/interface.py | 83 ++++------ profiler/cli/analyze_cli.py | 25 +-- profiler/cluster_analyse/cluster_analysis.py | 4 +- 9 files changed, 351 insertions(+), 174 deletions(-) create mode 100644 profiler/advisor/analyzer/cluster/slow_rank_analyser.py delete mode 100644 profiler/advisor/common/module_lib.py create mode 100644 profiler/advisor/dataset/cluster/cluster_dataset.py create mode 100644 profiler/advisor/dataset/cluster/cluster_step_trace_time_bean.py diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index f698865266..ff945da5cf 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -1,7 +1,17 @@ from abc import abstractmethod, ABCMeta +from profiler.advisor.display.html.render import HTMLRender +dataset_cls_list = [] class BaseAnalyzer(metaclass=ABCMeta): + def __init__(self, collection_path, dataset_cls_list, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): + self.n_processes = n_processes + self.cann_version = cann_version + self.torch_version = torch_version + self.html_render = HTMLRender() + self.collection_path = collection_path + self.kwargs = kwargs + self.event_dataset_list = self.get_dataset_dict(dataset_cls_list) @abstractmethod def optimize(self): @@ -14,3 +24,11 @@ class BaseAnalyzer(metaclass=ABCMeta): @abstractmethod def make_render(self): pass + + def get_dataset_dict(self, dataset_cls_list): + datasets = {key: [] for key in dataset_cls_list} + + for dataset_cls in dataset_cls_list: + if dataset_cls and callable(dataset_cls): + datasets[dataset_cls] = dataset_cls(self.collection_path, **self.kwargs) + return datasets diff --git a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py new file mode 100644 index 0000000000..36a30d4d98 --- /dev/null +++ b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py @@ -0,0 +1,69 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataSet + + +class SlowRankAnalyzer(BaseAnalyzer): + RANK = "rank" + RATIO_THRESHOLD = 0.05 + BOTTLENECK_LIST = ['Computing', 'Communication', "Free"] + dataset_cls_list = [ClusterStepTraceTimeDataSet] + + def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): + super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) + self.step_trace_timeData = self.event_dataset_list[ClusterStepTraceTimeDataSet] + + def optimize(self, **kwargs): + step_dict = self.process() + self.output(step_dict) + return self.output_format_data + + def process(self): + step_dict = defaultdict(lambda: [0, 0, 0, 0]) + for step_bean in self.step_trace_timeData: + if step_bean.type == self.RANK: + step_dict[step_bean.index][0] += step_bean.compute + step_dict[step_bean.index][1] += step_bean.communication + step_dict[step_bean.index][2] += step_bean.free + total_time_list = [sum(data_tuple) for rank_id, data_tuple in step_dict.items()] + if total_time_list: + mean_total_time = sum(total_time_list) / len(total_time_list) + for i in range(len(self.BOTTLENECK_LIST)): + self.produce_bottleneck(step_dict, i, mean_total_time) + return step_dict + + def produce_bottleneck(self, step_dict: dict, produce_type: int, mean_total_time: float): + data_list = [data_tuple[produce_type] for rank_id, data_tuple in step_dict.items()] + max_ratio = self.compute_max_gap_ratio(data_list, mean_total_time) + if max_ratio > self.RATIO_THRESHOLD: + self.bottelneck += f'{self.BOTTLENECK_LIST[produce_type]} has some issues in the cluster, ' \ + f'because the max difference of {self.BOTTLENECK_LIST[produce_type]} time ' \ + f'has reached {round(max_ratio * mean_total_time / 1000, 3)}ms. \n' + + def output(self, step_dict: dict): + self.output_format_data[self.DATA] = step_dict + self.output_format_data[self.BOTTLENECK] = self.bottelneck + + def make_record(self): + """ + make record for what and how to optimize + """ + pass + + def make_render(self): + pass diff --git a/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py b/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py index 3db3ae55a1..ca10dcb1f5 100644 --- a/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py +++ b/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py @@ -11,38 +11,32 @@ from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.utils.utils import format_timeline_result -from profiler.advisor.display.html.render import HTMLRender from profiler.advisor.utils.utils import init_timeline_ops_db logger = logging.getLogger() class TimelineFusionOpsAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] - def __init__(self, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): - self.n_processes = n_processes + def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): + super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() self.matched_op_stacks = {} - self.cann_version = cann_version - self.torch_version = torch_version self.empty_stacks = True - self.event_dataset = None - self.html_render = HTMLRender() self.result = OptimizeResult() + self.timeline_event_dataset = self.event_dataset_list[TimelineEventDataset] - def optimize(self, timeline_dataset, **kwargs): - - self.event_dataset = timeline_dataset - + def optimize(self, **kwargs): for mode in [const.ATEN.lower(), const.OPTIMIZER.lower()]: for op_combined, npu_apis in tqdm(getattr(init_timeline_ops_db(self.cann_version, self.torch_version), f"_{mode}_op_api_map").items(), leave=False, ncols=100, desc="Scanning timeline for affinity apis"): for npu_api in npu_apis.split("/"): - self.find_fusion_ops(self.event_dataset, op_combined, npu_api, mode) + self.find_fusion_ops(self.timeline_event_dataset, op_combined, npu_api, mode) - self.query_stack(self.event_dataset) + self.query_stack(self.timeline_event_dataset) logger.info("Finish timeline analysis") self.make_record() diff --git a/profiler/advisor/common/module_lib.py b/profiler/advisor/common/module_lib.py deleted file mode 100644 index 697e37f736..0000000000 --- a/profiler/advisor/common/module_lib.py +++ /dev/null @@ -1,87 +0,0 @@ -import logging - -from profiler.advisor.analyzer.scheduling.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer -from profiler.advisor.analyzer.overall.overall_analyzer import OverallSummaryAnalyzer - -from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset - -logger = logging.getLogger() - - -class AnalysisScope: - supported_dims = ["computing", "scheduling", "communication", "overall", "dataloader"] - - @staticmethod - def get_analyzer(dimension, analyzer_name, is_inference=False): - if is_inference: - return getattr(InferenceAnalysisScope, dimension)().get(analyzer_name) - return getattr(TrainAnalysisScope, dimension)().get(analyzer_name) - - @staticmethod - def analyzer_list(dim=None, is_inference=False): - analyzer_list = [] - dims = [dim] if dim else AnalysisScope.supported_dims - for dim in dims: - analyzer_list += list(getattr(InferenceAnalysisScope, dim)().keys()) if is_inference else list( - getattr(TrainAnalysisScope, dim)().keys()) - return analyzer_list - - -class TrainAnalysisScope(AnalysisScope): - - @staticmethod - def computing(): - return dict() - - @staticmethod - def scheduling(): - return dict( - timeline_fusion_ops=TimelineFusionOpsAnalyzer - ) - - @staticmethod - def communication(): - return dict() - - @staticmethod - def overall(): - return dict( - overall_summary=OverallSummaryAnalyzer - ) - - @staticmethod - def dataloader(): - return dict() - - -class InferenceAnalysisScope(AnalysisScope): - @staticmethod - def computing(): - return dict() - - @staticmethod - def scheduling(): - return dict() - - @staticmethod - def communication(): - return dict() - - @staticmethod - def overall(): - return dict() - - @staticmethod - def dataloader(): - return dict() - - -class AnalyzerToDataset: - analyzer_to_dataset = { - "overall_summary": [], - "timeline_fusion_ops": [TimelineEventDataset] - } - - @staticmethod - def get_dataset(analyzer_name): - return AnalyzerToDataset.analyzer_to_dataset.get(analyzer_name) diff --git a/profiler/advisor/dataset/cluster/cluster_dataset.py b/profiler/advisor/dataset/cluster/cluster_dataset.py new file mode 100644 index 0000000000..40579e858c --- /dev/null +++ b/profiler/advisor/dataset/cluster/cluster_dataset.py @@ -0,0 +1,152 @@ +import logging + +import os + +import profiler.advisor.dataset.cluster.cluster_step_trace_time_bean +from profiler.advisor.utils.utils import singleton +from profiler.cluster_analyse.common_func.file_manager import FileManager +from profiler.cluster_analyse.common_func.constant import Constant +from collections import defaultdict +from profiler.cluster_analyse.cluster_analysis import ClusterAnalysis +from profiler.advisor.dataset.cluster.cluster_step_trace_time_bean import ClusterStepTraceTimeBean + +logger = logging.getLogger() + + +class ClusterDataset: + + def __init__(self, collection_path, **kwargs) -> None: + self.collection_path = os.path.realpath(collection_path) + if not self.is_cluster_analysis_output_exist(): + self.cluster_analyze() + + def is_cluster_analysis_output_exist(self): + """ + check whether input path is valid + """ + for file in os.listdir(self.collection_path): + if file == 'cluster_analysis_output': + print("[INFO]Cluster has been analyzed " + "because of the existence of cluster analysis output directory.") + print("[INFO]Skip Cluster analyze backend.") + return True + return False + + def cluster_analyze(self): + parameter = { + Constant.COLLECTION_PATH: self.collection_path, + Constant.ANALYSIS_MODE: "all" + } + print("[INFO] cluster analysis is in the process, please wait...") + try: + ClusterAnalysis(parameter).run() + except Exception as e: + raise ValueError(f"Cluster analyze backend failed:{e}") from e + + def load_csv_data(self, file_name, dataBean): + csv_path = os.path.join(self.collection_path, Constant.CLUSTER_ANALYSIS_OUTPUT, file_name) + if not os.path.exists(csv_path): + msg = "[ERROR] cluster_step_trace_time.csv doesn't exist, terminate analysis." + raise RuntimeError(msg) + data = FileManager.read_csv_file(csv_path, dataBean) + return data + + def load_json_data(self, file_name): + json_path = os.path.join(self.collection_path, Constant.CLUSTER_ANALYSIS_OUTPUT, file_name) + if not os.path.exists(json_path): + msg = "[ERROR] cluster_communication.json doesn't exist, terminate analysis." + raise RuntimeError(msg) + data = FileManager.read_json_file(json_path) + return data + + +@singleton +class ClusterStepTraceTimeDataSet(ClusterDataset): + RANK = "rank" + + def __init__(self, collection_path: str, kwargs: dict = None): + super().__init__(collection_path) + self._step_dict = defaultdict() + + def parse(self): + self.path_check() + step_data = self.load_csv_data(Constant.CLUSTER_STEP_TIME_CSV, ClusterStepTraceTimeBean) + self.step_dict = self.formate_data(step_data) + + def formate_data(self, step_data: list): + step_dict = defaultdict(lambda: [0, 0, 0, 0]) + for step_bean in step_data: + if step_bean.type == self.RANK: + step_dict[step_bean.index][0] += step_bean.compute + step_dict[step_bean.index][1] += step_bean.communication + step_dict[step_bean.index][2] += step_bean.free + return step_dict + + def get_data(self): + return self._step_dict + + +@singleton +class ClusterCommunicationDataSet(ClusterDataset): + RDMA_TIME_MS = "RDMA time(ms)" + RDMA_SIZE_MB = "RDMA size(mb)" + SDMA_TIME_MS = "SDMA time(ms)" + SDMA_SIZE_MB = "SDMA size(mb)" + RDMA_BANDWIDTH = "RDMA bandwidth(GB/s)" + SDMA_BANDWIDTH = "SDMA bandwidth(GB/s)" + COMMUNICATION_BANDWIDTH_INFO = "Communication Bandwidth Info" + TRANSIT_TIME = "Transit Time(ms)" + TRANSIT_SIZE = "Transit Size(MB)" + SDMA = "SDMA" + RDMA = "RDMA" + + def __init__(self, collection_path: str, kwargs: dict = None): + super().__init__(collection_path) + self.rank_bw_dict = defaultdict(lambda: { + self.RDMA_TIME_MS: 0, + self.RDMA_SIZE_MB: 0, + self.SDMA_TIME_MS: 0, + self.SDMA_SIZE_MB: 0, + }) + + @staticmethod + def compute_ratio(dividend: float, divisor: float): + if abs(divisor) < 1e-15: + return 0 + else: + return round(dividend / divisor, 4) + + def parse(self): + self.path_check() + communication_json = self.load_json_data() + self.process(communication_json) + + def process(self, communication_json: dict): + for comm_group, group_dict in communication_json.items(): + for step, step_dict in group_dict.items(): + for op, op_dict in step_dict.items(): + self.compute_bandwidth(op_dict) + + def compute_bandwidth(self, op_dict: dict): + for rank_id, rank_dict in op_dict.items(): + try: + rank = int(rank_id) + except ValueError as e: + msg = "[ERROR] Cluster_communication.json has invalid structure." + raise ValueError(msg) from e + for comm_type, bw_dict in rank_dict.get(self.COMMUNICATION_BANDWIDTH_INFO, {}).items(): + if comm_type == self.SDMA: + self.rank_bw_dict[rank][self.SDMA_SIZE_MB] += bw_dict.get(self.TRANSIT_SIZE) + self.rank_bw_dict[rank][self.SDMA_TIME_MS] += bw_dict.get(self.TRANSIT_TIME) + if comm_type == self.RDMA: + self.rank_bw_dict[rank][self.RDMA_SIZE_MB] += bw_dict.get(self.TRANSIT_SIZE) + self.rank_bw_dict[rank][self.RDMA_TIME_MS] += bw_dict.get(self.TRANSIT_TIME) + + for rank, rank_dict in self.rank_bw_dict.items(): + self.rank_bw_dict[rank][self.RDMA_BANDWIDTH] = self.compute_ratio( + self.rank_bw_dict[rank][self.RDMA_SIZE_MB], self.rank_bw_dict[rank][self.RDMA_TIME_MS]) + self.rank_bw_dict[rank][self.SDMA_BANDWIDTH] = self.compute_ratio( + self.rank_bw_dict[rank][self.SDMA_SIZE_MB], self.rank_bw_dict[rank][self.SDMA_TIME_MS]) + + def get_data(self): + return self.rank_bw_dict diff --git a/profiler/advisor/dataset/cluster/cluster_step_trace_time_bean.py b/profiler/advisor/dataset/cluster/cluster_step_trace_time_bean.py new file mode 100644 index 0000000000..b108fc77a3 --- /dev/null +++ b/profiler/advisor/dataset/cluster/cluster_step_trace_time_bean.py @@ -0,0 +1,67 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class ClusterStepTraceTimeBean: + STEP = "Step" + TYPE = "Type" + INDEX = "Index" + COMPUTING = "Computing" + COMMUNICATION = "Communication(Not Overlapped)" + FREE = "Free" + + def __init__(self, data: dict): + self._data = data + + @property + def step(self) -> str: + return self._data.get(self.STEP, '') + + @property + def type(self) -> str: + return self._data.get(self.TYPE, '') + + @property + def index(self) -> int: + try: + return int(self._data.get(self.INDEX)) + except ValueError as e: + msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Index'." + raise ValueError(msg) from e + + @property + def compute(self) -> float: + try: + return float(self._data.get(self.COMPUTING, '')) + except ValueError as e: + msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Computing'." + raise ValueError(msg) from e + + @property + def communication(self) -> float: + try: + return float(self._data.get(self.COMMUNICATION, '')) + except ValueError as e: + msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Communication'." + raise ValueError(msg) from e + + @property + def free(self) -> float: + try: + return float(self._data.get(self.FREE, '')) + except ValueError as e: + msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Free'." + raise ValueError(msg) from e + diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 4a2eae244a..54e7dcdc3c 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -1,66 +1,47 @@ import os -from profiler.advisor.common.module_lib import AnalysisScope, AnalyzerToDataset +from profiler.advisor.analyzer.scheduling.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer +from profiler.advisor.analyzer.overall.overall_analyzer import OverallSummaryAnalyzer +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset from profiler.advisor.utils.utils import Timer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyzer class Interface: + supported_analysiser = { + "computing": [], + "scheduling": [TimelineFusionOpsAnalyzer], + "communication": [], + "overall": [], + "dataloader": [], + "cluster": [SlowRankAnalyzer] + } + + all_dimension = supported_analysiser.keys() + def __init__(self, **kwargs): self.collection_path = os.path.realpath(kwargs.get("profiling_path")) - self._analyzer_controller = AnalyzerController(**kwargs) - self._dataset_controller = DatasetController(collection_path=self.collection_path, **kwargs) - def get_result(self: any, dimension: str, mode: str=None, render_html=False, **kwargs): + @staticmethod + def get_analyzer(dimension, is_inference=False): + return Interface.supported_analysiser.get(dimension, []) + + def get_result(self: any, dimension: str, render_html=False, **kwargs): """ :Param mode: affinity apis, ai cpu and so on. """ - analyzer = self._analyzer_controller.create_analyzer(dimension, mode, kwargs.get("is_inference", False)) - - datasets = self._dataset_controller.create_dataset(mode) - if not analyzer: - return - - if datasets: - result = analyzer.optimize(*datasets) - else: - result = analyzer.optimize() - - if render_html: - if hasattr(analyzer, "html_render"): - analyzer.html_render.render_html() - analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') - - return result - - -class AnalyzerController: - - def __init__(self, **kwargs): - self.temp_input_path = None - self.kwargs = kwargs - - def create_analyzer(self, dimension, mode: str, is_inference=False): - clss = AnalysisScope.get_analyzer(dimension, mode, is_inference) - if clss and callable(clss): - return clss(**self.kwargs) - return None - - -class DatasetController: - - def __init__(self, **kwargs): - self.collection_path = kwargs.get("collection_path") - self.kwargs = kwargs - - def create_dataset(self, mode): - dataset_cls_list = AnalyzerToDataset.get_dataset(mode) - - datasets = [] - for dataset_cls in dataset_cls_list: - if dataset_cls and callable(dataset_cls): - datasets.append(dataset_cls(self.collection_path, **self.kwargs)) - - return datasets + result_list = [] + analysiser_list = self.get_analyzer(dimension, kwargs.get("is_inference", False)) + for clss in analysiser_list: + if clss and callable(clss): + analysiser = clss(collection_path = self.collection_path, render_html=render_html, **kwargs) + result_list.append(analysiser.optimize()) + if render_html: + if hasattr(analysiser, "html_render"): + analysiser.html_render.render_html() + analysiser.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') + return result_list if __name__ == "__main__": diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 34215d12d1..2efecffcb7 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -9,38 +9,21 @@ sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), "cluste from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup from profiler.advisor.common import constant from profiler.advisor.utils.utils import debug_option -from profiler.advisor.common.module_lib import AnalysisScope from profiler.advisor.interface.interface import Interface logger = logging.getLogger() def _analyze(dimensions, **kwargs): - is_inference = kwargs.get("is_inference", False) - user_input_mode = kwargs.get("mode") result_list = [] job_list = [] for dimension in dimensions: - - valid_modes = AnalysisScope.analyzer_list(dimension, is_inference) - if not valid_modes: - logger.info("Skip analysis of dimension %s, no analyzer", dimension) - continue - if user_input_mode and user_input_mode not in valid_modes: - logger.error("Got error mode %s for analysis dimension %s, optionals are %s", user_input_mode, dimension, - valid_modes) - continue - - analysis_modes = [user_input_mode] if user_input_mode else valid_modes - - for mode in analysis_modes: interface = Interface(**kwargs) - job_list.append((dimension, mode, interface)) + job_list.append((dimension, interface)) - for i, (dimension, mode, interface) in enumerate(job_list[::-1]): - result_list.append( - interface.get_result(dimension, mode, render_html=i == len(job_list) - 1, is_inference=is_inference)) + for i, (dimension, interface) in enumerate(job_list[::-1]): + result_list += interface.get_result(dimension, render_html=i == len(job_list) - 1) for result in result_list[::-1]: if result and hasattr(result, "show"): @@ -76,7 +59,7 @@ def analyze_all(**kwargs) -> None: if not kwargs.get("benchmark_profiling_path"): kwargs["benchmark_profiling_path"] = kwargs.get("profiling_path") - _analyze(AnalysisScope.supported_dims, **kwargs) + _analyze(Interface.all_dimension, **kwargs) @analyze_cli.command(context_settings=CONTEXT_SETTINGS, diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index 2445462211..fd127fdc03 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -25,7 +25,7 @@ from common_func.path_manager import PathManager from analysis.analysis_facade import AnalysisFacade -class Interface: +class ClusterAnalysis: ASCEND_PT = "ascend_pt" ASCEND_MS = "ascend_ms" @@ -88,4 +88,4 @@ if __name__ == "__main__": Constant.COLLECTION_PATH: args_parsed.collection_path, Constant.ANALYSIS_MODE: args_parsed.mode } - Interface(parameter).run() + ClusterAnalysis(parameter).run() -- Gitee From d251c21d769d7db97b71d8d4026b311aa087f799 Mon Sep 17 00:00:00 2001 From: zhaolei Date: Thu, 25 Apr 2024 09:58:17 +0800 Subject: [PATCH 06/21] =?UTF-8?q?att=20advisor=20html=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../html/templates/cluster_analysis.html | 49 +++++++++++++++++++ .../html/templates/compute_analysis.html | 29 +++++++++++ .../advisor/display/html/templates/main.html | 3 +- .../html/templates/timeline_analysis.html | 34 +++++++++++++ 4 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 profiler/advisor/display/html/templates/cluster_analysis.html create mode 100644 profiler/advisor/display/html/templates/compute_analysis.html create mode 100644 profiler/advisor/display/html/templates/timeline_analysis.html diff --git a/profiler/advisor/display/html/templates/cluster_analysis.html b/profiler/advisor/display/html/templates/cluster_analysis.html new file mode 100644 index 0000000000..32379d56fc --- /dev/null +++ b/profiler/advisor/display/html/templates/cluster_analysis.html @@ -0,0 +1,49 @@ +
+

{{title|safe}}

+
+
+ + {% if result.get("Description") %} +
Description
+ + {% endif %} + + {% if result.get("Suggestion") %} +
Suggestion
+ + {% endif %} + + {% if result.get("details") %} +
details
+
+ {% for item in result.get("details") %} + + + {% for header in item.get("headers") %} + + {% endfor %} + + {% for row in item.get("data") %} + + {% for element in row %} + {% if element is number %} + + {% else %} + + {% endif %} + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|round(2) }}{{ element }}
+ {% endfor %} +
+ {% endif %} + +
+ +
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/compute_analysis.html b/profiler/advisor/display/html/templates/compute_analysis.html new file mode 100644 index 0000000000..e1907c091b --- /dev/null +++ b/profiler/advisor/display/html/templates/compute_analysis.html @@ -0,0 +1,29 @@ +
+

Abnormal Performance Operator

+
+ {{table.get("title")}} + + + + {% for header in table.get("headers") %} + + {% endfor %} + + {% for row in table.get("rows") %} + + {% for element in row %} + {% if element is number %} + + {% else %} + + {% endif %} + {% endfor %} + + {% endfor %} +
{{ header }}
{{ element|round(2) }}{{ element }}
+ {% if call_stack %} + call stack:
+ {{call_stack}} + {% endif %} +
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/main.html b/profiler/advisor/display/html/templates/main.html index 1a9392d2b2..f1703c7d8c 100644 --- a/profiler/advisor/display/html/templates/main.html +++ b/profiler/advisor/display/html/templates/main.html @@ -72,7 +72,7 @@ table { width: 100%; - table-layout: fixed; + table-layout: auto; border-collapse: collapse; margin-top: 2px; margin-bottom: 5px; @@ -82,6 +82,7 @@ padding: 10px; word-wrap: break-word; word-break: break-all; + white-space: nowrap; border: 1px solid rgb(170, 169, 169); text-align: left; } diff --git a/profiler/advisor/display/html/templates/timeline_analysis.html b/profiler/advisor/display/html/templates/timeline_analysis.html new file mode 100644 index 0000000000..b5ea891242 --- /dev/null +++ b/profiler/advisor/display/html/templates/timeline_analysis.html @@ -0,0 +1,34 @@ +
+

{{title|safe}}

+
+
+
+ {% if result.get("img") %} +
+ Image +
+ {% endif %} + + {% if result.get("current") %} + + {% endif %} + + {% if result.get("bottlenect") %} + + {% endif %} + + {% if result.get("advice") %} + + {% endif %} + +
+
+
+
-- Gitee From e2e693e2fb2fd4642bd3bef59f5e91584c2033af Mon Sep 17 00:00:00 2001 From: fanxiaotong Date: Tue, 16 Apr 2024 20:37:29 +0800 Subject: [PATCH 07/21] framework --- profiler/advisor/analyzer/base_analyzer.py | 18 +++ .../analyzer/cluster/slow_link_analyser.py | 104 ++++++++++++ .../analyzer/cluster/slow_rank_analyser.py | 101 ++++++++++++ .../fusion_ops/fusion_ops_analyzer.py | 20 +-- profiler/advisor/common/constant.py | 8 + profiler/advisor/common/module_lib.py | 87 ---------- .../dataset/cluster/cluster_dataset.py | 152 ++++++++++++++++++ .../cluster/cluster_step_trace_time_bean.py | 67 ++++++++ profiler/advisor/interface/interface.py | 84 ++++------ profiler/cli/analyze_cli.py | 25 +-- profiler/cluster_analyse/cluster_analysis.py | 4 +- 11 files changed, 496 insertions(+), 174 deletions(-) create mode 100644 profiler/advisor/analyzer/cluster/slow_link_analyser.py create mode 100644 profiler/advisor/analyzer/cluster/slow_rank_analyser.py delete mode 100644 profiler/advisor/common/module_lib.py create mode 100644 profiler/advisor/dataset/cluster/cluster_dataset.py create mode 100644 profiler/advisor/dataset/cluster/cluster_step_trace_time_bean.py diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index f698865266..ff945da5cf 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -1,7 +1,17 @@ from abc import abstractmethod, ABCMeta +from profiler.advisor.display.html.render import HTMLRender +dataset_cls_list = [] class BaseAnalyzer(metaclass=ABCMeta): + def __init__(self, collection_path, dataset_cls_list, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): + self.n_processes = n_processes + self.cann_version = cann_version + self.torch_version = torch_version + self.html_render = HTMLRender() + self.collection_path = collection_path + self.kwargs = kwargs + self.event_dataset_list = self.get_dataset_dict(dataset_cls_list) @abstractmethod def optimize(self): @@ -14,3 +24,11 @@ class BaseAnalyzer(metaclass=ABCMeta): @abstractmethod def make_render(self): pass + + def get_dataset_dict(self, dataset_cls_list): + datasets = {key: [] for key in dataset_cls_list} + + for dataset_cls in dataset_cls_list: + if dataset_cls and callable(dataset_cls): + datasets[dataset_cls] = dataset_cls(self.collection_path, **self.kwargs) + return datasets diff --git a/profiler/advisor/analyzer/cluster/slow_link_analyser.py b/profiler/advisor/analyzer/cluster/slow_link_analyser.py new file mode 100644 index 0000000000..d4212ada2c --- /dev/null +++ b/profiler/advisor/analyzer/cluster/slow_link_analyser.py @@ -0,0 +1,104 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataSet + + +class SlowLinkAnalyzer(BaseAnalyzer): + RDMA_TIME_MS = "RDMA time(ms)" + RDMA_SIZE_MB = "RDMA size(mb)" + SDMA_TIME_MS = "SDMA time(ms)" + SDMA_SIZE_MB = "SDMA size(mb)" + RDMA_BANDWIDTH = "RDMA bandwidth(GB/s)" + SDMA_BANDWIDTH = "SDMA bandwidth(GB/s)" + COMMUNICATION_BANDWIDTH_INFO = "Communication Bandwidth Info" + TRANSIT_TIME = "Transit Time(ms)" + TRANSIT_SIZE = "Transit Size(MB)" + SDMA = "SDMA" + RDMA = "RDMA" + SLOW_LINK_ANALYSIS = "slow_link_analysis" + dataset_cls_list = [ClusterCommunicationDataSet] + + def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): + super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) + self.communication_data_class = self.event_dataset_list[ClusterCommunicationDataSet] + self.rank_bw_dict = self.communication_data_class.get_data() + self.result = OptimizeResult() + self.bottelneck = '' + self.suggestion = '' + + def optimize(self, **kwargs): + self.process() + self.make_record() + self.make_render() + return self.result + + def process(self): + if self.rank_bw_dict: + self.produce_bottleneck(self.RDMA_BANDWIDTH) + self.produce_bottleneck(self.SDMA_BANDWIDTH) + + def produce_bottleneck(self, link_type: str): + data_list = [rank_dict.get(link_type, 0) for rank_id, rank_dict in self.rank_bw_dict.items()] + avg_bw = round(sum(data_list) / len(data_list), 3) + if avg_bw == 0: + return + self.bottelneck += f'{link_type}: \n' \ + f'The average is {avg_bw}, ' \ + f'while the maximum is {round(max(data_list), 3)}GB/s and ' \ + f'the minimum is {round(min(data_list), 3)}GB/s. ' \ + f'the difference is {round(max(data_list) - min(data_list), 3)}GB/s. \n' + + def format_details(self): + details_dict = {} + headers = ['rank_id'] + list(self.rank_bw_dict[0].keys()) + data_list = [] + for rank_id, rank_bw in self.rank_bw_dict.items(): + data_list.append([rank_id] + list(rank_bw.keys())) + + details_dict["headers"] = headers + details_dict["data"] = data_list + + return [details_dict] + + def make_record(self): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem( + SlowLinkAnalyzer.SLOW_LINK_ANALYSIS, + self.bottelneck, + [""] + ) + self.result.add(OptimizeRecord(optimization_item)) + + def make_render(self): + result_for_html = { + "Description" : self.bottelneck, + "suggestion" : self.suggestion, + "details" : self.format_details() + } + + self.html_render.render_template(key="cluster", + title=SlowLinkAnalyzer.SLOW_LINK_ANALYSIS, + template_dir="templates", + template_name="cluster_analysis.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + result=result_for_html) \ No newline at end of file diff --git a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py new file mode 100644 index 0000000000..35b4663d38 --- /dev/null +++ b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py @@ -0,0 +1,101 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataSet + + +class SlowRankAnalyzer(BaseAnalyzer): + SLOW_RANK_ANALYSIS = "slow_rank_analysis" + RANK = "rank" + RATIO_THRESHOLD = 0.05 + BOTTLENECK_LIST = ['Computing', 'Communication', "Free"] + dataset_cls_list = [ClusterStepTraceTimeDataSet] + + def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): + super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) + self.step_trace_class = self.event_dataset_list[ClusterStepTraceTimeDataSet] + self.step_trace_dict = self.step_trace_class.get_data() + self.result = OptimizeResult() + self.bottelneck = '' + self.suggestion = '' + + def optimize(self, **kwargs): + self.process() + + self.make_record() + self.make_render() + return self.result + + def process(self): + total_time_list = [sum(data_tuple) for rank_id, data_tuple in self.step_trace_dict.items()] + if total_time_list: + mean_total_time = sum(total_time_list) / len(total_time_list) + for i in range(len(self.BOTTLENECK_LIST)): + self.produce_bottleneck(self.step_trace_dict, i, mean_total_time) + + def produce_bottleneck(self, step_dict: dict, produce_type: int, mean_total_time: float): + data_list = [data_tuple[produce_type] for rank_id, data_tuple in step_dict.items()] + max_ratio = self.compute_max_gap_ratio(data_list, mean_total_time) + if max_ratio > self.RATIO_THRESHOLD: + self.bottelneck += f'{self.BOTTLENECK_LIST[produce_type]} has some issues in the cluster, ' \ + f'because the max difference of {self.BOTTLENECK_LIST[produce_type]} time ' \ + f'has reached {round(max_ratio * mean_total_time / 1000, 3)}ms. \n' + + def make_record(self): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem( + SlowRankAnalyzer.SLOW_RANK_ANALYSIS, + self.bottelneck, + [""] + ) + self.result.add(OptimizeRecord(optimization_item)) + + def format_details(self): + details_dict = {} + headers = ["rank_id", "comupte", "communication", "free"] + data_list = [] + for key,value in self.step_trace_dict.items(): + data_list.append([key] + value) + details_dict["headers"] = headers + details_dict["data"] = data_list + return [details_dict] + + def make_render(self): + result_for_html = { + "Description" : self.bottelneck, + "suggestion" : self.suggestion, + "details" : self.format_details() + } + + self.html_render.render_template(key="cluster", + title=SlowRankAnalyzer.SLOW_RANK_ANALYSIS, + template_dir="templates", + template_name="cluster_analysis.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + result=result_for_html) + + @staticmethod + def compute_max_gap_ratio(data: list, mean: float): + if mean == 0: + return 0 + else: + return (max(data) - min(data)) / mean diff --git a/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py b/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py index 3db3ae55a1..ca10dcb1f5 100644 --- a/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py +++ b/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py @@ -11,38 +11,32 @@ from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.utils.utils import format_timeline_result -from profiler.advisor.display.html.render import HTMLRender from profiler.advisor.utils.utils import init_timeline_ops_db logger = logging.getLogger() class TimelineFusionOpsAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] - def __init__(self, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): - self.n_processes = n_processes + def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): + super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() self.matched_op_stacks = {} - self.cann_version = cann_version - self.torch_version = torch_version self.empty_stacks = True - self.event_dataset = None - self.html_render = HTMLRender() self.result = OptimizeResult() + self.timeline_event_dataset = self.event_dataset_list[TimelineEventDataset] - def optimize(self, timeline_dataset, **kwargs): - - self.event_dataset = timeline_dataset - + def optimize(self, **kwargs): for mode in [const.ATEN.lower(), const.OPTIMIZER.lower()]: for op_combined, npu_apis in tqdm(getattr(init_timeline_ops_db(self.cann_version, self.torch_version), f"_{mode}_op_api_map").items(), leave=False, ncols=100, desc="Scanning timeline for affinity apis"): for npu_api in npu_apis.split("/"): - self.find_fusion_ops(self.event_dataset, op_combined, npu_api, mode) + self.find_fusion_ops(self.timeline_event_dataset, op_combined, npu_api, mode) - self.query_stack(self.event_dataset) + self.query_stack(self.timeline_event_dataset) logger.info("Finish timeline analysis") self.make_record() diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index 9703e78c00..df12fd76d3 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -104,3 +104,11 @@ DEFAULT_RULE_PATH = "./rules/" TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID = -1 DEFAULT_TEMPLATE_HEADER = "Performance Optimization Suggestions" + +PT_PROF_SUFFIX = "ascend_pt" +ASCEND_PROFILER_OUTPUT = "ASCEND_PROFILER_OUTPUT" +COLLECTION_PATH = "collection_path" +CLUSTER_ANALYSIS_OUTPUT = "cluster_analysis_output" +KERNEL_DETAILS_CSV = "kernel_details.csv" +CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" +CLUSTER_COMM_JSON = "cluster_communication.json" diff --git a/profiler/advisor/common/module_lib.py b/profiler/advisor/common/module_lib.py deleted file mode 100644 index 697e37f736..0000000000 --- a/profiler/advisor/common/module_lib.py +++ /dev/null @@ -1,87 +0,0 @@ -import logging - -from profiler.advisor.analyzer.scheduling.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer -from profiler.advisor.analyzer.overall.overall_analyzer import OverallSummaryAnalyzer - -from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset - -logger = logging.getLogger() - - -class AnalysisScope: - supported_dims = ["computing", "scheduling", "communication", "overall", "dataloader"] - - @staticmethod - def get_analyzer(dimension, analyzer_name, is_inference=False): - if is_inference: - return getattr(InferenceAnalysisScope, dimension)().get(analyzer_name) - return getattr(TrainAnalysisScope, dimension)().get(analyzer_name) - - @staticmethod - def analyzer_list(dim=None, is_inference=False): - analyzer_list = [] - dims = [dim] if dim else AnalysisScope.supported_dims - for dim in dims: - analyzer_list += list(getattr(InferenceAnalysisScope, dim)().keys()) if is_inference else list( - getattr(TrainAnalysisScope, dim)().keys()) - return analyzer_list - - -class TrainAnalysisScope(AnalysisScope): - - @staticmethod - def computing(): - return dict() - - @staticmethod - def scheduling(): - return dict( - timeline_fusion_ops=TimelineFusionOpsAnalyzer - ) - - @staticmethod - def communication(): - return dict() - - @staticmethod - def overall(): - return dict( - overall_summary=OverallSummaryAnalyzer - ) - - @staticmethod - def dataloader(): - return dict() - - -class InferenceAnalysisScope(AnalysisScope): - @staticmethod - def computing(): - return dict() - - @staticmethod - def scheduling(): - return dict() - - @staticmethod - def communication(): - return dict() - - @staticmethod - def overall(): - return dict() - - @staticmethod - def dataloader(): - return dict() - - -class AnalyzerToDataset: - analyzer_to_dataset = { - "overall_summary": [], - "timeline_fusion_ops": [TimelineEventDataset] - } - - @staticmethod - def get_dataset(analyzer_name): - return AnalyzerToDataset.analyzer_to_dataset.get(analyzer_name) diff --git a/profiler/advisor/dataset/cluster/cluster_dataset.py b/profiler/advisor/dataset/cluster/cluster_dataset.py new file mode 100644 index 0000000000..ee8b3563b7 --- /dev/null +++ b/profiler/advisor/dataset/cluster/cluster_dataset.py @@ -0,0 +1,152 @@ +import logging + +import os + +from profiler.advisor.utils.utils import singleton +from profiler.cluster_analyse.common_func.file_manager import FileManager +from profiler.advisor.common import constant as const +from profiler.cluster_analyse.common_func.constant import Constant +from collections import defaultdict +from profiler.cluster_analyse.cluster_analysis import ClusterAnalysis +from profiler.advisor.dataset.cluster.cluster_step_trace_time_bean import ClusterStepTraceTimeBean + +logger = logging.getLogger() + + +class ClusterDataset: + + def __init__(self, collection_path, **kwargs) -> None: + self.collection_path = os.path.realpath(collection_path) + if not self.is_cluster_analysis_output_exist(): + self.cluster_analyze() + + def is_cluster_analysis_output_exist(self): + """ + check whether input path is valid + """ + for file in os.listdir(self.collection_path): + if file == 'cluster_analysis_output': + print("[INFO]Cluster has been analyzed " + "because of the existence of cluster analysis output directory.") + print("[INFO]Skip Cluster analyze backend.") + return True + return False + + def cluster_analyze(self): + parameter = { + Constant.COLLECTION_PATH: self.collection_path, + Constant.ANALYSIS_MODE: "all" + } + print("[INFO] cluster analysis is in the process, please wait...") + try: + ClusterAnalysis(parameter).run() + except Exception as e: + raise ValueError(f"Cluster analyze backend failed:{e}") from e + + def load_csv_data(self, file_name, dataBean): + csv_path = os.path.join(self.collection_path, const.CLUSTER_ANALYSIS_OUTPUT, file_name) + if not os.path.exists(csv_path): + msg = "[ERROR] cluster_step_trace_time.csv doesn't exist, terminate analysis." + raise RuntimeError(msg) + data = FileManager.read_csv_file(csv_path, dataBean) + return data + + def load_json_data(self, file_name): + json_path = os.path.join(self.collection_path, const.CLUSTER_ANALYSIS_OUTPUT, file_name) + if not os.path.exists(json_path): + msg = "[ERROR] cluster_communication.json doesn't exist, terminate analysis." + raise RuntimeError(msg) + data = FileManager.read_json_file(json_path) + return data + + +@singleton +class ClusterStepTraceTimeDataSet(ClusterDataset): + RANK = "rank" + + def __init__(self, collection_path: str, kwargs: dict = None): + super().__init__(collection_path) + self._step_dict = defaultdict() + self.parse() + + def parse(self): + step_data = self.load_csv_data(const.CLUSTER_STEP_TIME_CSV, ClusterStepTraceTimeBean) + self._step_dict = self.formate_data(step_data) + + def formate_data(self, step_data: list): + step_dict = defaultdict(lambda: [0, 0, 0]) + for step_bean in step_data: + if step_bean.type == self.RANK: + step_dict[step_bean.index][0] += step_bean.compute + step_dict[step_bean.index][1] += step_bean.communication + step_dict[step_bean.index][2] += step_bean.free + return step_dict + + def get_data(self): + return self._step_dict + + +@singleton +class ClusterCommunicationDataSet(ClusterDataset): + RDMA_TIME_MS = "RDMA time(ms)" + RDMA_SIZE_MB = "RDMA size(mb)" + SDMA_TIME_MS = "SDMA time(ms)" + SDMA_SIZE_MB = "SDMA size(mb)" + RDMA_BANDWIDTH = "RDMA bandwidth(GB/s)" + SDMA_BANDWIDTH = "SDMA bandwidth(GB/s)" + COMMUNICATION_BANDWIDTH_INFO = "Communication Bandwidth Info" + TRANSIT_TIME = "Transit Time(ms)" + TRANSIT_SIZE = "Transit Size(MB)" + SDMA = "SDMA" + RDMA = "RDMA" + + def __init__(self, collection_path: str, kwargs: dict = None): + super().__init__(collection_path) + self.rank_bw_dict = defaultdict(lambda: { + self.RDMA_TIME_MS: 0, + self.RDMA_SIZE_MB: 0, + self.SDMA_TIME_MS: 0, + self.SDMA_SIZE_MB: 0, + }) + self.parse() + + @staticmethod + def compute_ratio(dividend: float, divisor: float): + if abs(divisor) < 1e-15: + return 0 + else: + return round(dividend / divisor, 4) + + def parse(self): + communication_json = self.load_json_data(const.CLUSTER_COMM_JSON) + self.process(communication_json) + + def process(self, communication_json: dict): + for comm_group, group_dict in communication_json.items(): + for step, step_dict in group_dict.items(): + for op, op_dict in step_dict.items(): + self.compute_bandwidth(op_dict) + + def compute_bandwidth(self, op_dict: dict): + for rank_id, rank_dict in op_dict.items(): + try: + rank = int(rank_id) + except ValueError as e: + msg = "[ERROR] Cluster_communication.json has invalid structure." + raise ValueError(msg) from e + for comm_type, bw_dict in rank_dict.get(self.COMMUNICATION_BANDWIDTH_INFO, {}).items(): + if comm_type == self.SDMA: + self.rank_bw_dict[rank][self.SDMA_SIZE_MB] += bw_dict.get(self.TRANSIT_SIZE) + self.rank_bw_dict[rank][self.SDMA_TIME_MS] += bw_dict.get(self.TRANSIT_TIME) + if comm_type == self.RDMA: + self.rank_bw_dict[rank][self.RDMA_SIZE_MB] += bw_dict.get(self.TRANSIT_SIZE) + self.rank_bw_dict[rank][self.RDMA_TIME_MS] += bw_dict.get(self.TRANSIT_TIME) + + for rank, rank_dict in self.rank_bw_dict.items(): + self.rank_bw_dict[rank][self.RDMA_BANDWIDTH] = self.compute_ratio( + self.rank_bw_dict[rank][self.RDMA_SIZE_MB], self.rank_bw_dict[rank][self.RDMA_TIME_MS]) + self.rank_bw_dict[rank][self.SDMA_BANDWIDTH] = self.compute_ratio( + self.rank_bw_dict[rank][self.SDMA_SIZE_MB], self.rank_bw_dict[rank][self.SDMA_TIME_MS]) + + def get_data(self): + return self.rank_bw_dict diff --git a/profiler/advisor/dataset/cluster/cluster_step_trace_time_bean.py b/profiler/advisor/dataset/cluster/cluster_step_trace_time_bean.py new file mode 100644 index 0000000000..b108fc77a3 --- /dev/null +++ b/profiler/advisor/dataset/cluster/cluster_step_trace_time_bean.py @@ -0,0 +1,67 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class ClusterStepTraceTimeBean: + STEP = "Step" + TYPE = "Type" + INDEX = "Index" + COMPUTING = "Computing" + COMMUNICATION = "Communication(Not Overlapped)" + FREE = "Free" + + def __init__(self, data: dict): + self._data = data + + @property + def step(self) -> str: + return self._data.get(self.STEP, '') + + @property + def type(self) -> str: + return self._data.get(self.TYPE, '') + + @property + def index(self) -> int: + try: + return int(self._data.get(self.INDEX)) + except ValueError as e: + msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Index'." + raise ValueError(msg) from e + + @property + def compute(self) -> float: + try: + return float(self._data.get(self.COMPUTING, '')) + except ValueError as e: + msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Computing'." + raise ValueError(msg) from e + + @property + def communication(self) -> float: + try: + return float(self._data.get(self.COMMUNICATION, '')) + except ValueError as e: + msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Communication'." + raise ValueError(msg) from e + + @property + def free(self) -> float: + try: + return float(self._data.get(self.FREE, '')) + except ValueError as e: + msg = "[ERROR] Cluster step trace time.csv has invalid value in column 'Free'." + raise ValueError(msg) from e + diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 4a2eae244a..19da350a02 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -1,66 +1,48 @@ import os -from profiler.advisor.common.module_lib import AnalysisScope, AnalyzerToDataset +from profiler.advisor.analyzer.scheduling.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer +from profiler.advisor.analyzer.overall.overall_analyzer import OverallSummaryAnalyzer +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset from profiler.advisor.utils.utils import Timer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyzer +from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer class Interface: + supported_analysiser = { + "computing": [], + "scheduling": [TimelineFusionOpsAnalyzer], + "communication": [], + "overall": [], + "dataloader": [], + "cluster": [SlowRankAnalyzer, SlowLinkAnalyzer] + } + + all_dimension = supported_analysiser.keys() + def __init__(self, **kwargs): self.collection_path = os.path.realpath(kwargs.get("profiling_path")) - self._analyzer_controller = AnalyzerController(**kwargs) - self._dataset_controller = DatasetController(collection_path=self.collection_path, **kwargs) - def get_result(self: any, dimension: str, mode: str=None, render_html=False, **kwargs): + @staticmethod + def get_analyzer(dimension, is_inference=False): + return Interface.supported_analysiser.get(dimension, []) + + def get_result(self: any, dimension: str, render_html=False, **kwargs): """ :Param mode: affinity apis, ai cpu and so on. """ - analyzer = self._analyzer_controller.create_analyzer(dimension, mode, kwargs.get("is_inference", False)) - - datasets = self._dataset_controller.create_dataset(mode) - if not analyzer: - return - - if datasets: - result = analyzer.optimize(*datasets) - else: - result = analyzer.optimize() - - if render_html: - if hasattr(analyzer, "html_render"): - analyzer.html_render.render_html() - analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') - - return result - - -class AnalyzerController: - - def __init__(self, **kwargs): - self.temp_input_path = None - self.kwargs = kwargs - - def create_analyzer(self, dimension, mode: str, is_inference=False): - clss = AnalysisScope.get_analyzer(dimension, mode, is_inference) - if clss and callable(clss): - return clss(**self.kwargs) - return None - - -class DatasetController: - - def __init__(self, **kwargs): - self.collection_path = kwargs.get("collection_path") - self.kwargs = kwargs - - def create_dataset(self, mode): - dataset_cls_list = AnalyzerToDataset.get_dataset(mode) - - datasets = [] - for dataset_cls in dataset_cls_list: - if dataset_cls and callable(dataset_cls): - datasets.append(dataset_cls(self.collection_path, **self.kwargs)) - - return datasets + result_list = [] + analysiser_list = self.get_analyzer(dimension, kwargs.get("is_inference", False)) + for idx, clss in enumerate(analysiser_list): + if clss and callable(clss): + analysiser = clss(collection_path = self.collection_path, **kwargs) + result_list.append(analysiser.optimize()) + if render_html and idx == len(analysiser_list) - 1: + if hasattr(analysiser, "html_render"): + analysiser.html_render.render_html() + analysiser.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') + return result_list if __name__ == "__main__": diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 34215d12d1..2efecffcb7 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -9,38 +9,21 @@ sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), "cluste from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup from profiler.advisor.common import constant from profiler.advisor.utils.utils import debug_option -from profiler.advisor.common.module_lib import AnalysisScope from profiler.advisor.interface.interface import Interface logger = logging.getLogger() def _analyze(dimensions, **kwargs): - is_inference = kwargs.get("is_inference", False) - user_input_mode = kwargs.get("mode") result_list = [] job_list = [] for dimension in dimensions: - - valid_modes = AnalysisScope.analyzer_list(dimension, is_inference) - if not valid_modes: - logger.info("Skip analysis of dimension %s, no analyzer", dimension) - continue - if user_input_mode and user_input_mode not in valid_modes: - logger.error("Got error mode %s for analysis dimension %s, optionals are %s", user_input_mode, dimension, - valid_modes) - continue - - analysis_modes = [user_input_mode] if user_input_mode else valid_modes - - for mode in analysis_modes: interface = Interface(**kwargs) - job_list.append((dimension, mode, interface)) + job_list.append((dimension, interface)) - for i, (dimension, mode, interface) in enumerate(job_list[::-1]): - result_list.append( - interface.get_result(dimension, mode, render_html=i == len(job_list) - 1, is_inference=is_inference)) + for i, (dimension, interface) in enumerate(job_list[::-1]): + result_list += interface.get_result(dimension, render_html=i == len(job_list) - 1) for result in result_list[::-1]: if result and hasattr(result, "show"): @@ -76,7 +59,7 @@ def analyze_all(**kwargs) -> None: if not kwargs.get("benchmark_profiling_path"): kwargs["benchmark_profiling_path"] = kwargs.get("profiling_path") - _analyze(AnalysisScope.supported_dims, **kwargs) + _analyze(Interface.all_dimension, **kwargs) @analyze_cli.command(context_settings=CONTEXT_SETTINGS, diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index 2445462211..fd127fdc03 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -25,7 +25,7 @@ from common_func.path_manager import PathManager from analysis.analysis_facade import AnalysisFacade -class Interface: +class ClusterAnalysis: ASCEND_PT = "ascend_pt" ASCEND_MS = "ascend_ms" @@ -88,4 +88,4 @@ if __name__ == "__main__": Constant.COLLECTION_PATH: args_parsed.collection_path, Constant.ANALYSIS_MODE: args_parsed.mode } - Interface(parameter).run() + ClusterAnalysis(parameter).run() -- Gitee From 67193fae1648f85581cd027f5103da5c467d3c98 Mon Sep 17 00:00:00 2001 From: PersonalC Date: Wed, 8 May 2024 19:35:20 +0800 Subject: [PATCH 08/21] =?UTF-8?q?att=20advisor=E6=96=B0=E5=A2=9Edataset?= =?UTF-8?q?=E6=A1=86=E6=9E=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- profiler/advisor/analyzer/base_analyzer.py | 73 +- .../analyzer/cluster/slow_link_analyser.py | 10 +- .../analyzer/cluster/slow_rank_analyser.py | 10 +- .../{computing => computation}/__init__.py | 0 .../aicpu/__init__.py | 0 .../bound/__init__.py | 0 .../op_compile/__init__.py | 0 .../analyzer/overall/overall_analyzer.py | 1 + .../{scheduling => schedule}/__init__.py | 0 .../free_event/__init__.py | 0 .../fusion_ops/__init__.py | 0 .../fusion_ops/fusion_ops_analyzer.py | 542 ++++----- profiler/advisor/common/constant.py | 12 +- .../advisor/common/timeline/fusion_ops_db.py | 340 +----- .../common/timeline/fusion_ops_rule.py | 110 ++ .../timeline/fusion_ops_rule_handler.py | 193 +++ profiler/advisor/config/config.ini | 8 +- .../dataset/cluster/cluster_dataset.py | 22 +- profiler/advisor/dataset/dataset.py | 38 + .../advisor/dataset/timeline_event_dataset.py | 26 +- profiler/advisor/interface/interface.py | 33 +- profiler/advisor/utils/utils.py | 1048 +++++++++-------- profiler/cli/analyze_cli.py | 72 +- profiler/cli/entrance.py | 2 +- profiler/cli/update_cli.py | 40 + requirements/{test.txt => tests.txt} | 8 +- version.txt | 2 +- 27 files changed, 1413 insertions(+), 1177 deletions(-) rename profiler/advisor/analyzer/{computing => computation}/__init__.py (100%) rename profiler/advisor/analyzer/{computing => computation}/aicpu/__init__.py (100%) rename profiler/advisor/analyzer/{computing => computation}/bound/__init__.py (100%) rename profiler/advisor/analyzer/{computing => computation}/op_compile/__init__.py (100%) rename profiler/advisor/analyzer/{scheduling => schedule}/__init__.py (100%) rename profiler/advisor/analyzer/{scheduling => schedule}/free_event/__init__.py (100%) rename profiler/advisor/analyzer/{scheduling => schedule}/fusion_ops/__init__.py (100%) rename profiler/advisor/analyzer/{scheduling => schedule}/fusion_ops/fusion_ops_analyzer.py (91%) create mode 100644 profiler/advisor/common/timeline/fusion_ops_rule.py create mode 100644 profiler/advisor/common/timeline/fusion_ops_rule_handler.py create mode 100644 profiler/advisor/dataset/dataset.py create mode 100644 profiler/cli/update_cli.py rename requirements/{test.txt => tests.txt} (95%) diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index ff945da5cf..6f9438ba75 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -1,17 +1,60 @@ +import logging +from functools import wraps +from typing import Dict, List, Union from abc import abstractmethod, ABCMeta + +from profiler.advisor.common import constant +from profiler.advisor.common.version_control import VersionControl +from profiler.advisor.dataset.dataset import Dataset +from profiler.advisor.result.result import OptimizeResult from profiler.advisor.display.html.render import HTMLRender -dataset_cls_list = [] +logger = logging.getLogger() + + +class BaseAnalyzer(VersionControl, metaclass=ABCMeta): + _SUPPORT_VERSIONS = constant.SUPPORTED_CANN_VERSION -class BaseAnalyzer(metaclass=ABCMeta): - def __init__(self, collection_path, dataset_cls_list, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): + dataset_cls_list = [] + + def __init__(self, collection_path, n_processes: int = 1, cann_version=constant.DEFAULT_CANN_VERSION, + torch_version=constant.DEFAULT_TORCH_VERSION, **kwargs): self.n_processes = n_processes self.cann_version = cann_version self.torch_version = torch_version self.html_render = HTMLRender() self.collection_path = collection_path self.kwargs = kwargs - self.event_dataset_list = self.get_dataset_dict(dataset_cls_list) + self.dataset_list: Dict[str, List[Dataset]] = {} + self.init_dataset_list() + self.result = OptimizeResult() + self.record_list: Dict[str, List] = {} + + @classmethod + def check_data(cls, data_list: tuple): + """ + check if all data in data list is contained + :param data_list: data list to check + :return: func ptr if check success + """ + + def decorate(func): + + @wraps(func) + def wrapper(self): + data = self.dataset_list + if data is None: + return None + for data_key in data_list: + if data_key not in data: + return None + + logger.info("Enable analysis %s with %s", self.__class__.__name__, ",".join(data_list)) + return func(self, data) + + return wrapper + + return decorate @abstractmethod def optimize(self): @@ -25,10 +68,24 @@ class BaseAnalyzer(metaclass=ABCMeta): def make_render(self): pass - def get_dataset_dict(self, dataset_cls_list): - datasets = {key: [] for key in dataset_cls_list} + def init_dataset_list(self)->None: + dataset_cls_list = self.dataset_cls_list + if len(dataset_cls_list) == 0: + logger.warning(f"Analyser: %s don't rely on any dataset!", self.__class__.__name__) + return for dataset_cls in dataset_cls_list: if dataset_cls and callable(dataset_cls): - datasets[dataset_cls] = dataset_cls(self.collection_path, **self.kwargs) - return datasets + dataset_cls(collection_path=self.collection_path, data=self.dataset_list, **self.kwargs) + + @staticmethod + def get_first_data_by_key(data, key) -> Union[Dataset, None]: + """ + get the first member from data with key + :param data: input data + :param key: data key + :return: the first dataset in dataset list + """ + if key in data and len(data[key]) > 0: + return data[key][0] + return None diff --git a/profiler/advisor/analyzer/cluster/slow_link_analyser.py b/profiler/advisor/analyzer/cluster/slow_link_analyser.py index d4212ada2c..7da50a8180 100644 --- a/profiler/advisor/analyzer/cluster/slow_link_analyser.py +++ b/profiler/advisor/analyzer/cluster/slow_link_analyser.py @@ -14,7 +14,9 @@ # limitations under the License. from collections import defaultdict +from typing import Dict, List from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataSet @@ -35,9 +37,11 @@ class SlowLinkAnalyzer(BaseAnalyzer): SLOW_LINK_ANALYSIS = "slow_link_analysis" dataset_cls_list = [ClusterCommunicationDataSet] - def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): - super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) - self.communication_data_class = self.event_dataset_list[ClusterCommunicationDataSet] + def __init__(self, collection_path, n_processes: int = 1, cann_version=constant.DEFAULT_CANN_VERSION, + torch_version=constant.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + key = ClusterCommunicationDataSet.get_key() + self.communication_data_class = self.get_first_data_by_key(self.dataset_list, key) self.rank_bw_dict = self.communication_data_class.get_data() self.result = OptimizeResult() self.bottelneck = '' diff --git a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py index 35b4663d38..b49ef5ec8c 100644 --- a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py +++ b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py @@ -14,7 +14,9 @@ # limitations under the License. from collections import defaultdict +from typing import Dict, List from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataSet @@ -27,9 +29,11 @@ class SlowRankAnalyzer(BaseAnalyzer): BOTTLENECK_LIST = ['Computing', 'Communication', "Free"] dataset_cls_list = [ClusterStepTraceTimeDataSet] - def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): - super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) - self.step_trace_class = self.event_dataset_list[ClusterStepTraceTimeDataSet] + def __init__(self, collection_path, n_processes: int = 1, cann_version=constant.DEFAULT_CANN_VERSION, + torch_version=constant.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + key = ClusterStepTraceTimeDataSet.get_key() + self.step_trace_class = self.get_first_data_by_key(self.dataset_list, key) self.step_trace_dict = self.step_trace_class.get_data() self.result = OptimizeResult() self.bottelneck = '' diff --git a/profiler/advisor/analyzer/computing/__init__.py b/profiler/advisor/analyzer/computation/__init__.py similarity index 100% rename from profiler/advisor/analyzer/computing/__init__.py rename to profiler/advisor/analyzer/computation/__init__.py diff --git a/profiler/advisor/analyzer/computing/aicpu/__init__.py b/profiler/advisor/analyzer/computation/aicpu/__init__.py similarity index 100% rename from profiler/advisor/analyzer/computing/aicpu/__init__.py rename to profiler/advisor/analyzer/computation/aicpu/__init__.py diff --git a/profiler/advisor/analyzer/computing/bound/__init__.py b/profiler/advisor/analyzer/computation/bound/__init__.py similarity index 100% rename from profiler/advisor/analyzer/computing/bound/__init__.py rename to profiler/advisor/analyzer/computation/bound/__init__.py diff --git a/profiler/advisor/analyzer/computing/op_compile/__init__.py b/profiler/advisor/analyzer/computation/op_compile/__init__.py similarity index 100% rename from profiler/advisor/analyzer/computing/op_compile/__init__.py rename to profiler/advisor/analyzer/computation/op_compile/__init__.py diff --git a/profiler/advisor/analyzer/overall/overall_analyzer.py b/profiler/advisor/analyzer/overall/overall_analyzer.py index 93b227fb61..e31a5d4288 100644 --- a/profiler/advisor/analyzer/overall/overall_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_analyzer.py @@ -1,4 +1,5 @@ import logging +from typing import Dict, List from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.advisor.display.html.render import HTMLRender diff --git a/profiler/advisor/analyzer/scheduling/__init__.py b/profiler/advisor/analyzer/schedule/__init__.py similarity index 100% rename from profiler/advisor/analyzer/scheduling/__init__.py rename to profiler/advisor/analyzer/schedule/__init__.py diff --git a/profiler/advisor/analyzer/scheduling/free_event/__init__.py b/profiler/advisor/analyzer/schedule/free_event/__init__.py similarity index 100% rename from profiler/advisor/analyzer/scheduling/free_event/__init__.py rename to profiler/advisor/analyzer/schedule/free_event/__init__.py diff --git a/profiler/advisor/analyzer/scheduling/fusion_ops/__init__.py b/profiler/advisor/analyzer/schedule/fusion_ops/__init__.py similarity index 100% rename from profiler/advisor/analyzer/scheduling/fusion_ops/__init__.py rename to profiler/advisor/analyzer/schedule/fusion_ops/__init__.py diff --git a/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py similarity index 91% rename from profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py rename to profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py index ca10dcb1f5..4259db093b 100644 --- a/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py +++ b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py @@ -1,271 +1,271 @@ -import multiprocessing -import logging -import re - -from tqdm import tqdm - -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.common import constant as const -from profiler.advisor.common.timeline.event import TimelineEvent -from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset -from profiler.advisor.result.result import OptimizeResult -from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.utils.utils import format_timeline_result -from profiler.advisor.utils.utils import init_timeline_ops_db - -logger = logging.getLogger() - - -class TimelineFusionOpsAnalyzer(BaseAnalyzer): - dataset_cls_list = [TimelineEventDataset] - - def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): - super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) - self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() - self.matched_op_stacks = {} - self.empty_stacks = True - self.result = OptimizeResult() - self.timeline_event_dataset = self.event_dataset_list[TimelineEventDataset] - - def optimize(self, **kwargs): - for mode in [const.ATEN.lower(), const.OPTIMIZER.lower()]: - - for op_combined, npu_apis in tqdm(getattr(init_timeline_ops_db(self.cann_version, self.torch_version), - f"_{mode}_op_api_map").items(), leave=False, ncols=100, - desc="Scanning timeline for affinity apis"): - for npu_api in npu_apis.split("/"): - self.find_fusion_ops(self.timeline_event_dataset, op_combined, npu_api, mode) - - self.query_stack(self.timeline_event_dataset) - - logger.info("Finish timeline analysis") - self.make_record() - self.make_render() - return self.result - - def find_fusion_ops(self, event_dataset: TimelineEventDataset, ops: str, npu_api: str, mode: str): - """ - :Param event_dataset: dataset of timeline event - :Param ops: operator combination with '-' as separator , e.g. permute-reshape - :Param npu_api: api of torch_npu, generally more efficient than torch api - :Param mode: aten or dequeue or optimizer - :Return: json of op_name and called times and detail stacks - """ - op_rule_pattern, enable_regex = self._format_rule_to_pattern(ops) - if not enable_regex: - self._match_ops(event_dataset, op_rule_pattern, npu_api, mode) - else: - try: - self._match_ops_with_regex(event_dataset, op_rule_pattern, npu_api, mode) - except Exception as e: - logger.warning("Failed to find fusion operators with regex %s, reason is %s", ops, e) - - def _match_ops(self, event_dataset: TimelineEventDataset, ops: str, npu_api: str, mode: str): - """ match operator based on fusion operators rule(without regex), - only strictly equals of op name list means matched - :Param event_dataset: dataset of timeline event - :Param ops: operator combination with '-' as separator , e.g. permute-reshape - :Param npu_api: api of torch_npu, generally more efficient than torch api - :Param mode: aten or dequeue or optimizer - """ - op_list = ops.split(const.OP_SEP) - - matched_op_index = set() - api_ops_matched = False - - for index, event in enumerate(getattr(event_dataset, mode)): - if self._replace_op_name_prefix(event.name, mode) != op_list[0]: - continue - tmp_dequeue_event_names = [self._replace_op_name_prefix(event.name, mode) for event in - getattr(event_dataset, mode)[index: index + len(op_list)]] - if tmp_dequeue_event_names != op_list: - continue - api_ops_matched = True - matched_op_index.add(event.dataset_index) - - if api_ops_matched: - self._matched_op_index[npu_api + f":{ops}"] = matched_op_index - - def _match_ops_with_regex(self, event_dataset: TimelineEventDataset, op_rule_pattern: str, npu_api: str, - mode: str): - """ match operator based on fusion operators rule(with regex), - using regex to support condition like 'a = torch.mul(xxx) if xxx else torch.add(xxx)' - :Param event_dataset: dataset of timeline event - :Param op_rule_pattern: fusion operators rule with regex definition , e.g. add-mul{0,10}, add-mul* - :Param npu_api: api of torch_npu, generally more efficient than torch api - :Param mode: aten or dequeue or optimizer - """ - matched_op_index = set() - total_op_name = "".join([f"{const.OP_SEP}{self._replace_op_name_prefix(event.name, mode)}{const.OP_SEP}" - for event in - getattr(event_dataset, mode)]) - - matched_pattern_index_tuple = [(x.start(0), x.end(0)) for x in re.finditer(op_rule_pattern, total_op_name)] - # convert list of index tuple to a whole list: [(3, 25), ...] -> [3, 25, ...] - total_ops_split_points = [num for sublist in matched_pattern_index_tuple for num in sublist] - - api_ops_matched = len(total_ops_split_points) != 0 - - op_index = [] - if 0 not in total_ops_split_points: - total_ops_split_points = [0] + total_ops_split_points - if len(list(total_op_name)) not in total_ops_split_points: - total_ops_split_points.append(len(list(total_op_name))) - - # convert total ops name like "-add-mul-xxx-div-" to small pieces like [["add", "mul"], [...], ["div"]] - # by the regex index and then calculate the real index for matched fusion operators in event dataset - for l, r in zip(total_ops_split_points, total_ops_split_points[1:]): - matched_op_flag = True if (l, r) in matched_pattern_index_tuple else False - matched_ops_list = total_op_name[l: r].strip(const.OP_SEP).split(const.OP_SEP + const.OP_SEP) - op_index.append([matched_op_flag, len(matched_ops_list)]) - for i, _ in enumerate(op_index): - if i > 0: - # calculate cumsum for indexing matched operator - op_index[i][1] = op_index[i][1] + op_index[i - 1][1] - op_index = [[False, 0]] + op_index - - for i, _ in enumerate(op_index): - if not op_index[i][0]: - continue - index = op_index[i - 1][1] - matched_op_index.add(index) - - if index > len(getattr(event_dataset, mode)) - 1: - continue - dataset_index = getattr(event_dataset, mode)[index].get("dataset_index") - matched_op_index.add(dataset_index) - - if api_ops_matched: - self._matched_op_index[npu_api + f":{op_rule_pattern}"] = sorted(list(matched_op_index)) - - def make_record(self): - """ - make record for what and how to optimize - """ - if not self.matched_op_stacks: - return - - desc = f"Found {len(format_timeline_result(self.matched_op_stacks))} apis to be replaced" \ - f" based on the runtime env cann-{self.cann_version} and torch-{self.torch_version}" - suggestion = "Please replace training api according to sub table 'Affinity training api'" - if self.empty_stacks: - desc += ", but with no stack" - suggestion = const.TIMELINE_EMPTY_STACKS_PROMPT.format( - timeline_profiling_doc_url=const.TIMELINE_WITH_STACK_DOC_URL - ) - - optimization_item = OptimizeItem( - const.AFFINITY_TRAINING_API, - desc, - [suggestion] - ) - - self.result.add(OptimizeRecord(optimization_item)) - - record_title = ["Affinity API", "Code stacks", "Stack called counts"] - self.result.add_detail(const.AFFINITY_TRAINING_API, headers=record_title) - - for api_name, stacks_info in format_timeline_result(self.matched_op_stacks).items(): - if not stacks_info: - detail = [api_name, "null", "null"] - self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) - else: - for stack in stacks_info: - detail = [api_name, *stack] - self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) - - def make_render(self): - format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True) - - self.html_render.render_template(key="scheduling", - template_dir="templates", - template_name="affinity_api.html", - cann_version=self.cann_version, - torch_version=self.torch_version, - empty_stacks=self.empty_stacks, - with_stack_doc_url=const.TIMELINE_WITH_STACK_DOC_URL, - api_doc_url=const.TIMELINE_API_DOC_URL, - result=format_result_for_html) - - def query_stack(self, event_dataset: TimelineEventDataset): - if all([len(matched_index) == 0 for matched_index in self._matched_op_index.values()]): - return - - op_stack_list = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) - for op_stack in op_stack_list: - for op_rule, stack in op_stack.items(): - if op_rule not in self.matched_op_stacks: - self.matched_op_stacks[op_rule] = {} - if stack == const.TIMELINE_FUSION_OPS_NO_STACK_FLAG: - continue - if stack not in self.matched_op_stacks[op_rule]: - self.matched_op_stacks[op_rule][stack] = 0 - self.matched_op_stacks[op_rule][stack] += 1 - - def _query_stack_by_matched_index(self, index, event): - stack_record = {} - event = TimelineEvent(event) - - matched_op_rules = [] - for op_rule, matched_index in self._matched_op_index.items(): - if index not in matched_index: - continue - - matched_op_rules.append(op_rule) - stack = event.args.get(const.CALL_STACKS) - - if not stack: - logger.debug("Got empty '%s' for event %s", const.CALL_STACKS, event) - continue - - if self.empty_stacks and stack: - self.empty_stacks = False - - stack_record[op_rule] = stack - - if matched_op_rules and not stack_record: - for op_rule in matched_op_rules: - stack_record[op_rule] = const.TIMELINE_FUSION_OPS_NO_STACK_FLAG - - return stack_record - - def _replace_op_name_prefix(self, event_name, mode): - if mode == const.DEQUEUE.lower(): - op_name_prefix = f"{const.DEQUEUE}{const.DEQUEUE_SEP}" - elif mode == const.ATEN: - op_name_prefix = f"{const.ATEN}{const.ATEN_SEP}" - else: - op_name_prefix = f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}" - - return event_name.replace(op_name_prefix, "") - - def _format_rule_to_pattern(self, op_rule): - """ - Args: - op_rule: like (mul){0,1}-(add|neg){0,2}-dropout-(softmax)* - - Returns: op_pattern like (-mul-){0,1}(-add-|-neg-){0,2}(-dropout-)(-softmax-)* - """ - enable_regex = False - if "(" not in op_rule and ")" not in op_rule: - # op_rule which requires fuzzy matching mush consist of "()" - return op_rule, enable_regex - - enable_regex = True - op_pattern_list = op_rule.split(const.OP_SEP) - format_op_pattern = "" - for op_pattern in op_pattern_list: - matched_res = re.search(r'\((.*?)\)', op_pattern) - - ops_index_range = (matched_res.start() + 1, matched_res.end() - 1) if matched_res else ( - 0, len(op_pattern)) - - op_names = op_pattern[ops_index_range[0]: ops_index_range[1]] - tmp_op_names_record = [] - for op_name in op_names.split("|"): - tmp_op_names_record.append(f"{const.OP_SEP}{op_name.strip(' ')}{const.OP_SEP}") - op_suffix = op_pattern[ops_index_range[1] + 1:] - op_names_format = f"({'|'.join(tmp_op_names_record)}){op_suffix}" - - format_op_pattern += op_names_format - return format_op_pattern, enable_regex +import multiprocessing +import logging +import re + +from tqdm import tqdm + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.utils.utils import format_timeline_result +from profiler.advisor.common.timeline.fusion_ops_db import init_timeline_ops_db + +logger = logging.getLogger() + + +class TimelineFusionOpsAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, + torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() + self.matched_op_stacks = {} + self.empty_stacks = True + key = TimelineEventDataset.get_key() + self.timeline_event_dataset = self.get_first_data_by_key(self.dataset_list, key) + + def optimize(self, **kwargs): + for mode in [const.ATEN.lower(), const.OPTIMIZER.lower()]: + + for op_combined, npu_apis in tqdm(getattr(init_timeline_ops_db(self.cann_version, self.torch_version), + f"_{mode}_op_api_map").items(), leave=False, ncols=100, + desc="Scanning timeline for affinity apis"): + for npu_api in npu_apis.split("/"): + self.find_fusion_ops(self.timeline_event_dataset, op_combined, npu_api, mode) + + self.query_stack(self.timeline_event_dataset) + + logger.info("Finish timeline analysis") + self.make_record() + self.make_render() + return self.result + + def find_fusion_ops(self, event_dataset, ops: str, npu_api: str, mode: str): + """ + :Param event_dataset: dataset of timeline event + :Param ops: operator combination with '-' as separator , e.g. permute-reshape + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + :Return: json of op_name and called times and detail stacks + """ + op_rule_pattern, enable_regex = self._format_rule_to_pattern(ops) + if not enable_regex: + self._match_ops(event_dataset, op_rule_pattern, npu_api, mode) + else: + try: + self._match_ops_with_regex(event_dataset, op_rule_pattern, npu_api, mode) + except Exception as e: + logger.warning("Failed to find fusion operators with regex %s, reason is %s", ops, e) + + def _match_ops(self, event_dataset, ops: str, npu_api: str, mode: str): + """ match operator based on fusion operators rule(without regex), + only strictly equals of op name list means matched + :Param event_dataset: dataset of timeline event + :Param ops: operator combination with '-' as separator , e.g. permute-reshape + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + """ + op_list = ops.split(const.OP_SEP) + + matched_op_index = set() + api_ops_matched = False + + for index, event in enumerate(getattr(event_dataset, mode)): + if self._replace_op_name_prefix(event.name, mode) != op_list[0]: + continue + tmp_dequeue_event_names = [self._replace_op_name_prefix(event.name, mode) for event in + getattr(event_dataset, mode)[index: index + len(op_list)]] + if tmp_dequeue_event_names != op_list: + continue + api_ops_matched = True + matched_op_index.add(event.dataset_index) + + if api_ops_matched: + self._matched_op_index[npu_api + f":{ops}"] = matched_op_index + + def _match_ops_with_regex(self, event_dataset, op_rule_pattern: str, npu_api: str, + mode: str): + """ match operator based on fusion operators rule(with regex), + using regex to support condition like 'a = torch.mul(xxx) if xxx else torch.add(xxx)' + :Param event_dataset: dataset of timeline event + :Param op_rule_pattern: fusion operators rule with regex definition , e.g. add-mul{0,10}, add-mul* + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + """ + matched_op_index = set() + total_op_name = "".join([f"{const.OP_SEP}{self._replace_op_name_prefix(event.name, mode)}{const.OP_SEP}" + for event in + getattr(event_dataset, mode)]) + + matched_pattern_index_tuple = [(x.start(0), x.end(0)) for x in re.finditer(op_rule_pattern, total_op_name)] + # convert list of index tuple to a whole list: [(3, 25), ...] -> [3, 25, ...] + total_ops_split_points = [num for sublist in matched_pattern_index_tuple for num in sublist] + + api_ops_matched = len(total_ops_split_points) != 0 + + op_index = [] + if 0 not in total_ops_split_points: + total_ops_split_points = [0] + total_ops_split_points + if len(list(total_op_name)) not in total_ops_split_points: + total_ops_split_points.append(len(list(total_op_name))) + + # convert total ops name like "-add-mul-xxx-div-" to small pieces like [["add", "mul"], [...], ["div"]] + # by the regex index and then calculate the real index for matched fusion operators in event dataset + for l, r in zip(total_ops_split_points, total_ops_split_points[1:]): + matched_op_flag = True if (l, r) in matched_pattern_index_tuple else False + matched_ops_list = total_op_name[l: r].strip(const.OP_SEP).split(const.OP_SEP + const.OP_SEP) + op_index.append([matched_op_flag, len(matched_ops_list)]) + for i, _ in enumerate(op_index): + if i > 0: + # calculate cumsum for indexing matched operator + op_index[i][1] = op_index[i][1] + op_index[i - 1][1] + op_index = [[False, 0]] + op_index + + for i, _ in enumerate(op_index): + if not op_index[i][0]: + continue + index = op_index[i - 1][1] + matched_op_index.add(index) + + if index > len(getattr(event_dataset, mode)) - 1: + continue + dataset_index = getattr(event_dataset, mode)[index].get("dataset_index") + matched_op_index.add(dataset_index) + + if api_ops_matched: + self._matched_op_index[npu_api + f":{op_rule_pattern}"] = sorted(list(matched_op_index)) + + def make_record(self): + """ + make record for what and how to optimize + """ + if not self.matched_op_stacks: + return + + desc = f"Found {len(format_timeline_result(self.matched_op_stacks))} apis to be replaced" \ + f" based on the runtime env cann-{self.cann_version} and torch-{self.torch_version}" + suggestion = "Please replace training api according to sub table 'Affinity training api'" + if self.empty_stacks: + desc += ", but with no stack" + suggestion = const.TIMELINE_EMPTY_STACKS_PROMPT.format( + timeline_profiling_doc_url=const.TIMELINE_WITH_STACK_DOC_URL + ) + + optimization_item = OptimizeItem( + const.AFFINITY_TRAINING_API, + desc, + [suggestion] + ) + + self.result.add(OptimizeRecord(optimization_item)) + + record_title = ["Affinity API", "Code stacks", "Stack called counts"] + self.result.add_detail(const.AFFINITY_TRAINING_API, headers=record_title) + + for api_name, stacks_info in format_timeline_result(self.matched_op_stacks).items(): + if not stacks_info: + detail = [api_name, "null", "null"] + self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) + else: + for stack in stacks_info: + detail = [api_name, *stack] + self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) + + def make_render(self): + format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True) + + self.html_render.render_template(key="schedule", + template_dir="templates", + template_name="affinity_api.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + empty_stacks=self.empty_stacks, + with_stack_doc_url=const.TIMELINE_WITH_STACK_DOC_URL, + api_doc_url=const.TIMELINE_API_DOC_URL, + result=format_result_for_html) + + def query_stack(self, event_dataset): + if all([len(matched_index) == 0 for matched_index in self._matched_op_index.values()]): + return + + op_stack_list = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) + for op_stack in op_stack_list: + for op_rule, stack in op_stack.items(): + if op_rule not in self.matched_op_stacks: + self.matched_op_stacks[op_rule] = {} + if stack == const.TIMELINE_FUSION_OPS_NO_STACK_FLAG: + continue + if stack not in self.matched_op_stacks[op_rule]: + self.matched_op_stacks[op_rule][stack] = 0 + self.matched_op_stacks[op_rule][stack] += 1 + + def _query_stack_by_matched_index(self, index, event): + stack_record = {} + event = TimelineEvent(event) + + matched_op_rules = [] + for op_rule, matched_index in self._matched_op_index.items(): + if index not in matched_index: + continue + + matched_op_rules.append(op_rule) + stack = event.args.get(const.CALL_STACKS) + + if not stack: + logger.debug("Got empty '%s' for event %s", const.CALL_STACKS, event) + continue + + if self.empty_stacks and stack: + self.empty_stacks = False + + stack_record[op_rule] = stack + + if matched_op_rules and not stack_record: + for op_rule in matched_op_rules: + stack_record[op_rule] = const.TIMELINE_FUSION_OPS_NO_STACK_FLAG + + return stack_record + + def _replace_op_name_prefix(self, event_name, mode): + if mode == const.DEQUEUE.lower(): + op_name_prefix = f"{const.DEQUEUE}{const.DEQUEUE_SEP}" + elif mode == const.ATEN: + op_name_prefix = f"{const.ATEN}{const.ATEN_SEP}" + else: + op_name_prefix = f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}" + + return event_name.replace(op_name_prefix, "") + + def _format_rule_to_pattern(self, op_rule): + """ + Args: + op_rule: like (mul){0,1}-(add|neg){0,2}-dropout-(softmax)* + + Returns: op_pattern like (-mul-){0,1}(-add-|-neg-){0,2}(-dropout-)(-softmax-)* + """ + enable_regex = False + if "(" not in op_rule and ")" not in op_rule: + # op_rule which requires fuzzy matching mush consist of "()" + return op_rule, enable_regex + + enable_regex = True + op_pattern_list = op_rule.split(const.OP_SEP) + format_op_pattern = "" + for op_pattern in op_pattern_list: + matched_res = re.search(r'\((.*?)\)', op_pattern) + + ops_index_range = (matched_res.start() + 1, matched_res.end() - 1) if matched_res else ( + 0, len(op_pattern)) + + op_names = op_pattern[ops_index_range[0]: ops_index_range[1]] + tmp_op_names_record = [] + for op_name in op_names.split("|"): + tmp_op_names_record.append(f"{const.OP_SEP}{op_name.strip(' ')}{const.OP_SEP}") + op_suffix = op_pattern[ops_index_range[1] + 1:] + op_names_format = f"({'|'.join(tmp_op_names_record)}){op_suffix}" + + format_op_pattern += op_names_format + return format_op_pattern, enable_regex diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index df12fd76d3..664753c724 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -56,8 +56,8 @@ CANN_VERSION_C13 = '7.0.RC1' CANN_VERSION_C15 = '7.0.0' CANN_VERSION_C17 = '8.0.0' SUPPORTED_CANN_VERSION = [CANN_VERSION_C30, CANN_VERSION_C13, CANN_VERSION_C15, CANN_VERSION_C17] -DEFAULT_CANN_VERSION = CANN_VERSION_C15 -ASCEND_PYTORCH_PROFILER = "ascend_pytorch_proflier" +DEFAULT_CANN_VERSION = CANN_VERSION_C17 +ASCEND_PYTORCH_PROFILER = "ascend_pytorch_profiler" MSLITE = "mslite" MSPROF = "msprof" SUPPORTED_PROFILING_TYPE = [ASCEND_PYTORCH_PROFILER, MSLITE, MSPROF] @@ -83,12 +83,20 @@ ADVISOR_LOG_LEVEL = "ADVISOR_LOG_LEVEL" DEFAULT_LOG_LEVEL = "INFO" SUPPORTED_LOG_LEVEL = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] +RULE_BUCKET = "RULE-BUCKET" CLOUD_RULE_REGION_CN_NORTH_9 = "cn-north-9" CLOUD_RULE_REGION_CN_NORTH_7 = "cn-north-7" CLOUD_RULE_REGION_CN_SOUTHWEST_2 = "cn-southwest-2" CLOUD_RULE_REGION_LIST = [CLOUD_RULE_REGION_CN_NORTH_7, CLOUD_RULE_REGION_CN_NORTH_9, CLOUD_RULE_REGION_CN_SOUTHWEST_2] +INNER_REGION_LIST = [CLOUD_RULE_REGION_CN_NORTH_7] DEFAULT_CLOUD_RULE_REGION = CLOUD_RULE_REGION_CN_SOUTHWEST_2 +HTTP_PREFIXES = "http://" +HTTPS_PREFIXES = "https://" +COMMON_YAML_DIR = "modelarts/solution/ma_advisor_rules/" +COMMON_ENDPOINT_SUFFIX = "obs.{}.myhuaweicloud.com" +INNER_ENDPOINT_SUFFIX= "obs.{}.ulanqab.huawei.com" + AICPU_RULES_YAML_NAME = "aicpu_rules.yaml" FUSSION_PASS_YAML_NAME = "op_fussion_pass.yaml" TIMELINE_FUSION_OPS_YAML_NAME = "timeline_fusion_ops.yaml" diff --git a/profiler/advisor/common/timeline/fusion_ops_db.py b/profiler/advisor/common/timeline/fusion_ops_db.py index 19a86437e0..f37cfe50d1 100644 --- a/profiler/advisor/common/timeline/fusion_ops_db.py +++ b/profiler/advisor/common/timeline/fusion_ops_db.py @@ -1,330 +1,44 @@ -import copy import logging import os import yaml -from profiler.advisor.common import constant as const +from profiler.advisor.common import constant +from profiler.advisor.common.timeline.fusion_ops_rule import OpRule +from profiler.advisor.common.timeline.fusion_ops_rule_handler import TimelineOpRuleHandler from profiler.advisor.utils.log import get_log_level +from profiler.advisor.utils.utils import get_file_path_by_walk logger = logging.getLogger() logger.setLevel(get_log_level()) -class TimelineOpRuleHandler: - """基于线性规划思想保存OpRule,用于局部继承、全局继承等功能""" - - def __init__(self): - self._db_content = None - # 具体生成的timeline规则,key为unique_id - self._all_tmp_timeline_op_rule = {} - # 所有timeline规则的dict集合,key为unique_id - self._all_origin_timeline_op_rule_dict = {} - # 已生成timeline规则的id数组 - self._exist_timeline_op_rule_unique_id_list = [] - - @staticmethod - def _get_local_inherit_id_list(op_rule: dict): - local_inherit_id_list = [] - for _, val in op_rule.items(): - if val.get("inherit_unique_id") is not None: - local_inherit_id_list.append(val.get("inherit_unique_id")) - return local_inherit_id_list - - @staticmethod - def _is_duplicated_element_in_lists(list_a, list_b): - """检查两个数组中是否存在重复的元素,若有任意元素重复,返回True""" - if not isinstance(list_a, list): - list_a = [list_a] - if not isinstance(list_b, list): - list_b = [list_b] - for element in list_a: - if element in list_b: - return True - return False - - def set_db_content(self, db_content): - # 过滤非 dict 格式, 或 dict 中没有定义 unique_id 的数据, 并保存到 _all_origin_timeline_op_rule_dict 中 - self._db_content = copy.deepcopy(db_content) - for rule_dic in self._db_content: - if not isinstance(rule_dic, dict) or rule_dic.get("unique_id") is None: - continue - self._all_origin_timeline_op_rule_dict[rule_dic.get("unique_id")] = rule_dic - if self._all_origin_timeline_op_rule_dict: - self.generate_all_timeline_op_rule() - - def generate_basic_timeline_op_rules(self): - """用于实现获取无全局继承规则, 无全局继承的规则认为是基础版本规则, 默认不会存在局部继承""" - for _, rule_dic in self._all_origin_timeline_op_rule_dict.items(): - if rule_dic.get("inherit_unique_id") is None: - self.add_basic_timeline_op_rule(rule_dic) - - def add_basic_timeline_op_rule(self, rule_dic): - # 若基础规则中存在局部继承的规则,则跳过 - local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) - if local_inherit_id_list: - return - - temp_rule = OpRule() - temp_rule.merge(rule_dic.get("operator_rules")) - - unique_id = rule_dic.get("unique_id") - logger.debug("The rule of version %s is basic rule.", unique_id) - self.add_new_timeline_op_rule(unique_id, temp_rule.tmp_rule) - - def add_empty_timeline_op_rule(self, unique_id): - if self._all_origin_timeline_op_rule_dict.get(unique_id) is None: - self._all_origin_timeline_op_rule_dict[unique_id] = {} - tmp_rule = {} - logger.debug("The rule of version %s is empty.", unique_id) - self.add_new_timeline_op_rule(unique_id, tmp_rule) - - def add_new_timeline_op_rule(self, unique_id, tmp_rule): - if unique_id not in self._exist_timeline_op_rule_unique_id_list: - self._exist_timeline_op_rule_unique_id_list.append(unique_id) - self._all_tmp_timeline_op_rule[unique_id] = tmp_rule - logger.debug("The rule of version %s is successfully generated.", unique_id) - - def generate_specified_list_timeline_op_rule(self, specified_unique_id_list, kid_id_list=None): - for specified_unique_id in specified_unique_id_list: - if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: - self.generate_specified_timeline_op_rule(specified_unique_id, kid_id_list) - - def generate_specified_timeline_op_rule(self, specified_unique_id, kid_id_list=None): - """用于实现生成特定版本规则 - - 若不存在相应specified_unique_id的规则、或是已生成、循环继承等情况,将该规则置空并返回 - 规则库文件结构设置为多叉树, 结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成, - 直到specified_unique_id规则依赖继承的规则库全部生成完毕, 再生成该指定规则库, 将specified_unique_id的规则库归档 - - 参数: - specified_unique_id: 指定版本规则id - kid_id_list: 子规则id数组, 用于防止循环继承, 如间接继承自身或直接继承自身等情况 - 返回: - None - """ - if kid_id_list is None: - kid_id_list = [] - - # 若该unique_id规则在timeline_fusion_ops.yaml中没有相应的规则, 生成该id规则,置为空 - if self._all_origin_timeline_op_rule_dict.get(specified_unique_id) is None: - logger.warning("The specified version %s does not exist in the rule library. " - "Ensure that the corresponding rule is configured in the YAML file. " - "The version %s is left blank.", - specified_unique_id, - specified_unique_id) - self.add_empty_timeline_op_rule(specified_unique_id) - return - - # 若该unique_id规则已经生成,则无需再次生成 - if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: - logger.warning("The rule has been generated and does not need to be generated again. " - "Check whether unique id %s in the YAML file is duplicate.", - specified_unique_id) - return - - # 若kid_id_list不为空,且间接继承自身,则尝试生成空规则用于继承 - if kid_id_list and self._is_duplicated_element_in_lists(specified_unique_id, kid_id_list): - logger.warning("It cannot be inherited indirectly. Ensure that the corresponding rules are correctly " - "configured in the YAML file and leave Version %s blank.", - specified_unique_id) - self.add_empty_timeline_op_rule(specified_unique_id) - return - - rule_dic = self._all_origin_timeline_op_rule_dict.get(specified_unique_id) - if rule_dic is not None: - kid_id_list.append(specified_unique_id) - - global_inherit_id = rule_dic.get("inherit_unique_id") - if global_inherit_id and global_inherit_id not in self._exist_timeline_op_rule_unique_id_list: - logger.debug("The rule of version %s global inherit the rule of version %s", - specified_unique_id, global_inherit_id) - self.generate_specified_timeline_op_rule(global_inherit_id, kid_id_list) - - # 若局部继承的规则未生成, 生成该规则 - local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) - if local_inherit_id_list: - logger.debug("The rule of version %s local inherit the rule of version %s", - specified_unique_id, local_inherit_id_list) - self.generate_specified_list_timeline_op_rule(specified_unique_id_list=local_inherit_id_list, - kid_id_list=kid_id_list) - logger.debug("Start to generate rule of version %s", specified_unique_id) - # 实现全局继承与局部继承 - temp_rule = OpRule(timeline_op_rule_handler=self, - rule=self._all_tmp_timeline_op_rule.get(global_inherit_id)) - temp_rule.merge(rule_dic.get("operator_rules")) - # 将生成的规则归档保存 - self.add_new_timeline_op_rule(specified_unique_id, temp_rule.tmp_rule) - return - logger.error("Failed to generate the rule whose unique_id is %s. Ensure that the rule is configured in " - "the YAML file and the version %s is empty.", specified_unique_id, specified_unique_id) - self.add_empty_timeline_op_rule(specified_unique_id) - - def generate_all_timeline_op_rule(self): - """用于实现获取所有版本规则 - - 查找db_content中的规则库, 规则库文件结构设置为多叉树, 优先生成无继承的基础规则版本 - 循环并生成其他版本, 文件结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成,直到全部规则库生成后退出函数 - - 参数: - None - 返回: - None - """ - self.generate_basic_timeline_op_rules() - _unique_id_list = copy.deepcopy(list(self._all_origin_timeline_op_rule_dict.keys())) - for unique_id in _unique_id_list: - if unique_id in self._exist_timeline_op_rule_unique_id_list: - continue - self.generate_specified_timeline_op_rule(unique_id) - - def get_tmp_timeline_op_rule_with_unique_id(self, unique_id): - if unique_id not in self._exist_timeline_op_rule_unique_id_list: - logger.error("The specified unique_id does not exist in the rule library. Ensure that the " - "corresponding rule is configured in the YAML file and the version %s is empty." - "If the value of unique_id is a negative number, the version may not be supported.", - unique_id) - self.add_empty_timeline_op_rule(unique_id) - if unique_id < 0: - logger.error("Advise to use a positive integer as the unique id of rules. " - "Negative numbers: %s are not recommended to use as unique id. " - "If specified invalid unique id: %s is used, an empty rule is returned by default.", - unique_id, const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) - return self._all_tmp_timeline_op_rule.get(unique_id) - - -class OpRule: - - def __init__(self, rule=None, timeline_op_rule_handler=None): - if rule is None: - self._tmp_rule = {} - else: - self._tmp_rule = copy.deepcopy(rule) - if timeline_op_rule_handler is None: - self.timeline_op_rule_handler = {} - else: - self.timeline_op_rule_handler = copy.deepcopy(timeline_op_rule_handler) - self._rule = {} - - @property - def tmp_rule(self): - return self._tmp_rule - - @staticmethod - def _format_rule(rule): - """格式化规则函数, 将额外规则格式化为{key,数组list}形式, 使得yaml文件中operator_rules若写成key:str形式也能正常读取""" - format_rule = {} - for key, val in rule.items(): - if not isinstance(val, list): - val = [val] - format_rule[key] = val - return format_rule - - def merge(self, extra_rule): - """合并函数, 将已有规则库与额外规则合并, 若无继承则已有规则库应为空""" - for key, val in extra_rule.items(): - for func, op_rules in val.items(): - try: - getattr(self, f"{func}")(key, op_rules) - except AttributeError: - logger.error("Undefined field and function name. Ensure that %s is correct in the rule " - "library.", func) - - def get_final_rules(self): - """获取最终的规则库""" - self._restore_rule() - return self._rule - - def add(self, key, add_rules: dict): - """新增函数, 新增已有规则库不存在的额外规则""" - if add_rules is None: - return - if self._tmp_rule.get(key) is None: - self._tmp_rule[key] = {} - format_add_rule = self._format_rule(add_rules) - for add_key, add_val in format_add_rule.items(): - logger.debug("add: %s: %s", add_key, add_val) - if add_key not in self._tmp_rule: - self._tmp_rule[key][add_key] = add_val - else: - logger.warning("This key has been written to the rule, " - "%s: %s should be written in the overwrite section", add_key, add_val) - self._tmp_rule[key][add_key].update(add_val) - - def overwrite(self, key, overwrite_rules: dict): - """重写函数, 重写已有规则库中已经存在的规则""" - if overwrite_rules is None: - return - if self._tmp_rule.get(key) is None: - self._tmp_rule[key] = {} - format_overwrite_rules = self._format_rule(overwrite_rules) - for overwrite_key, overwrite_val in format_overwrite_rules.items(): - logger.debug("overwrite: %s: %s", overwrite_key, overwrite_val) - if overwrite_key not in self._tmp_rule: - logger.warning("This key is not written to the rule. " - "%s: %s should be written in the add section", overwrite_key, overwrite_val) - self._tmp_rule[key][overwrite_key] = overwrite_val - else: - self._tmp_rule[key][overwrite_key].update(overwrite_val) - - def exclude(self, key, exclude_rules: list): - """除外函数, 将已有规则库已有的规则除外删除""" - if exclude_rules is None: - return - for exclude_key in exclude_rules: - logger.debug("exclude: %s", exclude_key) - if isinstance(exclude_key, str): - if exclude_key not in self._tmp_rule[key]: - logger.warning("This key is not written to the rule. " - "do not need to exclude: %s.", exclude_key) - continue - self._tmp_rule[key].pop(exclude_key) - else: - logger.warning("Error type rule in exclude: %s", exclude_key) - - def inherit_unique_id(self, key, inherit_unique_id): - """局部继承函数, 将规则库中指定unique_id版本覆盖指定位置""" - result_rule = self.timeline_op_rule_handler.get_tmp_timeline_op_rule_with_unique_id(inherit_unique_id) - if result_rule is not None and result_rule.get(key) is not None: - self._tmp_rule[key] = copy.deepcopy(result_rule.get(key)) - return - logger.error("Rule library version %s does not exist. ", inherit_unique_id) - - def _restore_rule(self): - for key, op_api_map in self._tmp_rule.items(): - self._rule[key] = [{op_combined: api} for op_combined, api in op_api_map.items()] - +def init_timeline_ops_db(cann_version=None, torch_version=None): + logger.debug("init operators database") -def get_file_path_by_walk(root, filename): - file_path = "" - for root, _, files in os.walk(root, topdown=True): - for name in files: - if name == filename: - file_path = os.path.join(root, name) - return file_path - return file_path + return FusionOperatorDB(cann_version=cann_version, torch_version=torch_version) def get_timeline_fusion_ops_yaml_path(): # 环境变量 ADVISOR_RULE_PATH 不为空且该路径存在, os.walk遍历其下文件, 若存在相应的规则文件则返回路径 - advisor_rule_path = os.getenv(const.ADVISOR_RULE_PATH) + advisor_rule_path = os.getenv(constant.ADVISOR_RULE_PATH) if advisor_rule_path and os.path.exists(advisor_rule_path): - specified_file_path = get_file_path_by_walk(advisor_rule_path, const.TIMELINE_FUSION_OPS_YAML_NAME) + specified_file_path = get_file_path_by_walk(advisor_rule_path, constant.TIMELINE_FUSION_OPS_YAML_NAME) if len(specified_file_path.strip()) and os.path.exists(specified_file_path): logger.debug("Successfully find The %s file which is specified by the environment variable: %s.", - specified_file_path, const.ADVISOR_RULE_PATH) + specified_file_path, constant.ADVISOR_RULE_PATH) return specified_file_path logger.warning("The %s does not exist in path: %s. Try to use cloud or default local YAML file.", - const.TIMELINE_FUSION_OPS_YAML_NAME, os.path.normpath(advisor_rule_path)) + constant.TIMELINE_FUSION_OPS_YAML_NAME, os.path.normpath(advisor_rule_path)) # 检查云文件默认保存路径文件夹下是否存在相应文件, 默认路径 ~/rules/cloud/ - cloud_file_path = os.path.join(os.path.expanduser("~"), const.CLOUD_RULE_PATH, const.TIMELINE_FUSION_OPS_YAML_NAME) + cloud_file_path = os.path.join(os.path.expanduser("~"), constant.CLOUD_RULE_PATH, constant.TIMELINE_FUSION_OPS_YAML_NAME) if os.path.exists(cloud_file_path): - logger.debug("Successfully find The cloud %s file in %s.", const.TIMELINE_FUSION_OPS_YAML_NAME, + logger.debug("Successfully find The cloud %s file in %s.", constant.TIMELINE_FUSION_OPS_YAML_NAME, cloud_file_path) return cloud_file_path # 检查本地默认文件 local_file_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), - const.DEFAULT_RULE_PATH, const.TIMELINE_FUSION_OPS_YAML_NAME) + constant.DEFAULT_RULE_PATH, constant.TIMELINE_FUSION_OPS_YAML_NAME) if not os.path.exists(local_file_path): # 若本地默认文件不存在, 则log异常信息并 logger.error("The default local YAML file does not exist. Please check the YAML file in the default path %s.", @@ -337,8 +51,8 @@ class FusionOperatorDB: def __init__(self, file_path=None, cann_version=None, torch_version=None): self.timeline_fusion_ops_yaml_path = os.path.normpath(get_timeline_fusion_ops_yaml_path()) - self.cann_version = cann_version or const.DEFAULT_CANN_VERSION - self.torch_version = torch_version or const.DEFAULT_TORCH_VERSION + self.cann_version = cann_version or constant.DEFAULT_CANN_VERSION + self.torch_version = torch_version or constant.DEFAULT_TORCH_VERSION self._supported_version_dict = {} @@ -379,9 +93,9 @@ class FusionOperatorDB: return self._optimizer_op_api_map def get_fusion_operator_with_unique_id(self, unique_id): - if unique_id == const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID: + if unique_id == constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID: logger.warning("The specified unique id: %s is invalid.Please check whether the rule of the unique id " - "exists and modify the rule.", const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) + "exists and modify the rule.", constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) return {} result_tmp_rule = self.timeline_op_rule_handler.get_tmp_timeline_op_rule_with_unique_id(unique_id) result_op_rule = OpRule(result_tmp_rule) @@ -411,7 +125,7 @@ class FusionOperatorDB: def _is_version_supported(self, db_content): """校验当前版本是否被规则库中的版本支持, 保存版本支持信息数组, 按数组或字符串的可变方式保存""" - if db_content is None : + if db_content is None: logger.warning( "The rule library is empty. Check the rule library file: %s", self.timeline_fusion_ops_yaml_path @@ -455,18 +169,18 @@ class FusionOperatorDB: def _is_version_supported_in_supported_version_dict(self, cann_version=None, torch_version=None): """校验当前版本是否存在在规则库中的版本支持字典中""" for _, supported_version in self._supported_version_dict.items(): - if self._is_version_supported_in_version(supported_version, cann_version, torch_version): + if self._is_version_supported_in_versions(supported_version, cann_version, torch_version): return True return False def _get_unique_id_in_supported_version_dict(self, cann_version=None, torch_version=None) -> int: """校验当前版本是否存在在规则库中的版本支持字典中, 在使用前请检查是否支持该版本""" for key_unique_id, supported_version in self._supported_version_dict.items(): - if self._is_version_supported_in_version(supported_version, cann_version, torch_version): + if self._is_version_supported_in_versions(supported_version, cann_version, torch_version): return key_unique_id - return const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID + return constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID - def _is_version_supported_in_version(self, supported_version, cann_version=None, torch_version=None): + def _is_version_supported_in_versions(self, supported_version, cann_version=None, torch_version=None): """校验当前cann版本和torch版本是否存在在规则库中的版本支持数组的元素中""" cann_version_list = supported_version[0] if not isinstance(cann_version_list, list): @@ -485,9 +199,9 @@ class FusionOperatorDB: def _parse_db(self): """生成输出的规则库""" - self._parse(const.ATEN) - self._parse(const.DEQUEUE) - self._parse(const.OPTIMIZER) + self._parse(constant.ATEN) + self._parse(constant.DEQUEUE) + self._parse(constant.OPTIMIZER) def _parse(self, mode): """生成输出的规则库中指定部分, 如aten, Optimizer等""" @@ -521,7 +235,7 @@ class FusionOperatorDB: if not os.path.exists(file_path): logger.warning("Path: '%s' does not exist, please specific existed path of " "fusion operators yaml file by setting env '%s'", - os.path.abspath(file_path), const.ADVISOR_RULE_PATH) + os.path.abspath(file_path), constant.ADVISOR_RULE_PATH) self.is_empty = True return {} diff --git a/profiler/advisor/common/timeline/fusion_ops_rule.py b/profiler/advisor/common/timeline/fusion_ops_rule.py new file mode 100644 index 0000000000..deee68edb9 --- /dev/null +++ b/profiler/advisor/common/timeline/fusion_ops_rule.py @@ -0,0 +1,110 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. +import copy +import logging + +from profiler.advisor.utils.log import get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) + + +class OpRule: + + def __init__(self, rule=None, timeline_op_rule_handler=None): + if rule is None: + self._tmp_rule = {} + else: + self._tmp_rule = copy.deepcopy(rule) + if timeline_op_rule_handler is None: + self.timeline_op_rule_handler = {} + else: + self.timeline_op_rule_handler = copy.deepcopy(timeline_op_rule_handler) + self._rule = {} + + @property + def tmp_rule(self): + return self._tmp_rule + + @staticmethod + def _format_rule(rule): + """格式化规则函数, 将额外规则格式化为{key,数组list}形式, 使得yaml文件中operator_rules若写成key:str形式也能正常读取""" + format_rule = {} + for key, val in rule.items(): + if not isinstance(val, list): + val = [val] + format_rule[key] = val + return format_rule + + def merge(self, extra_rule): + """合并函数, 将已有规则库与额外规则合并, 若无继承则已有规则库应为空""" + for key, val in extra_rule.items(): + for func, op_rules in val.items(): + try: + getattr(self, f"{func}")(key, op_rules) + except AttributeError: + logger.error("Undefined field and function name. Ensure that %s is correct in the rule " + "library.", func) + + def get_final_rules(self): + """获取最终的规则库""" + self._restore_rule() + return self._rule + + def add(self, key, add_rules: dict): + """新增函数, 新增已有规则库不存在的额外规则""" + if add_rules is None: + return + if self._tmp_rule.get(key) is None: + self._tmp_rule[key] = {} + format_add_rule = self._format_rule(add_rules) + for add_key, add_val in format_add_rule.items(): + logger.debug("add: %s: %s", add_key, add_val) + if add_key not in self._tmp_rule: + self._tmp_rule[key][add_key] = add_val + else: + logger.warning("This key has been written to the rule, " + "%s: %s should be written in the overwrite section", add_key, add_val) + self._tmp_rule[key][add_key].update(add_val) + + def overwrite(self, key, overwrite_rules: dict): + """重写函数, 重写已有规则库中已经存在的规则""" + if overwrite_rules is None: + return + if self._tmp_rule.get(key) is None: + self._tmp_rule[key] = {} + format_overwrite_rules = self._format_rule(overwrite_rules) + for overwrite_key, overwrite_val in format_overwrite_rules.items(): + logger.debug("overwrite: %s: %s", overwrite_key, overwrite_val) + if overwrite_key not in self._tmp_rule: + logger.warning("This key is not written to the rule. " + "%s: %s should be written in the add section", overwrite_key, overwrite_val) + self._tmp_rule[key][overwrite_key] = overwrite_val + else: + self._tmp_rule[key][overwrite_key].update(overwrite_val) + + def exclude(self, key, exclude_rules: list): + """除外函数, 将已有规则库已有的规则除外删除""" + if exclude_rules is None: + return + for exclude_key in exclude_rules: + logger.debug("exclude: %s", exclude_key) + if isinstance(exclude_key, str): + if exclude_key not in self._tmp_rule[key]: + logger.warning("This key is not written to the rule. " + "do not need to exclude: %s.", exclude_key) + continue + self._tmp_rule[key].pop(exclude_key) + else: + logger.warning("Error type rule in exclude: %s", exclude_key) + + def inherit_unique_id(self, key, inherit_unique_id): + """局部继承函数, 将规则库中指定unique_id版本覆盖指定位置""" + result_rule = self.timeline_op_rule_handler.get_tmp_timeline_op_rule_with_unique_id(inherit_unique_id) + if result_rule is not None and result_rule.get(key) is not None: + self._tmp_rule[key] = copy.deepcopy(result_rule.get(key)) + return + logger.error("Rule library version %s does not exist. ", inherit_unique_id) + + def _restore_rule(self): + for key, op_api_map in self._tmp_rule.items(): + self._rule[key] = [{op_combined: api} for op_combined, api in op_api_map.items()] diff --git a/profiler/advisor/common/timeline/fusion_ops_rule_handler.py b/profiler/advisor/common/timeline/fusion_ops_rule_handler.py new file mode 100644 index 0000000000..b0558cca6d --- /dev/null +++ b/profiler/advisor/common/timeline/fusion_ops_rule_handler.py @@ -0,0 +1,193 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. +import copy +import logging + +from profiler.advisor.common import constant +from profiler.advisor.common.timeline.fusion_ops_rule import OpRule +from profiler.advisor.utils.log import get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) + + +class TimelineOpRuleHandler: + """基于线性规划思想保存OpRule,用于局部继承、全局继承等功能""" + + def __init__(self): + self._db_content = None + # 具体生成的timeline规则,key为unique_id + self._all_tmp_timeline_op_rule = {} + # 所有timeline规则的dict集合,key为unique_id + self._all_origin_timeline_op_rule_dict = {} + # 已生成timeline规则的id数组 + self._exist_timeline_op_rule_unique_id_list = [] + + @staticmethod + def _get_local_inherit_id_list(op_rule: dict): + local_inherit_id_list = [] + for _, val in op_rule.items(): + if val.get("inherit_unique_id") is not None: + local_inherit_id_list.append(val.get("inherit_unique_id")) + return local_inherit_id_list + + @staticmethod + def _is_duplicated_element_in_lists(list_a, list_b): + """检查两个数组中是否存在重复的元素,若有任意元素重复,返回True""" + if not isinstance(list_a, list): + list_a = [list_a] + if not isinstance(list_b, list): + list_b = [list_b] + # 将两个数组合并为一个列表,使用集合(set)判断列表中是否存在重复元素 + combined_list = list_a + list_b + if len(combined_list) != len(set(combined_list)): + return True + return False + + def set_db_content(self, db_content): + # 过滤非 dict 格式, 或 dict 中没有定义 unique_id 的数据, 并保存到 _all_origin_timeline_op_rule_dict 中 + self._db_content = copy.deepcopy(db_content) + for rule_dic in self._db_content: + if not isinstance(rule_dic, dict) or rule_dic.get("unique_id") is None: + continue + self._all_origin_timeline_op_rule_dict[rule_dic.get("unique_id")] = rule_dic + if self._all_origin_timeline_op_rule_dict: + self.generate_all_timeline_op_rule() + + def generate_basic_timeline_op_rules(self): + """用于实现获取无全局继承规则, 无全局继承的规则认为是基础版本规则, 默认不会存在局部继承""" + for _, rule_dic in self._all_origin_timeline_op_rule_dict.items(): + if rule_dic.get("inherit_unique_id") is None: + self.add_basic_timeline_op_rule(rule_dic) + + def add_basic_timeline_op_rule(self, rule_dic): + # 若基础规则中存在局部继承的规则,则跳过 + local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) + if local_inherit_id_list: + return + + temp_rule = OpRule() + temp_rule.merge(rule_dic.get("operator_rules")) + + unique_id = rule_dic.get("unique_id") + logger.debug("The rule of version %s is basic rule.", unique_id) + self.add_new_timeline_op_rule(unique_id, temp_rule.tmp_rule) + + def add_empty_timeline_op_rule(self, unique_id): + if self._all_origin_timeline_op_rule_dict.get(unique_id) is None: + self._all_origin_timeline_op_rule_dict[unique_id] = {} + tmp_rule = {} + logger.debug("The rule of version %s is empty.", unique_id) + self.add_new_timeline_op_rule(unique_id, tmp_rule) + + def add_new_timeline_op_rule(self, unique_id, tmp_rule): + if unique_id not in self._exist_timeline_op_rule_unique_id_list: + self._exist_timeline_op_rule_unique_id_list.append(unique_id) + self._all_tmp_timeline_op_rule[unique_id] = tmp_rule + logger.debug("The rule of version %s is successfully generated.", unique_id) + + def generate_specified_list_timeline_op_rule(self, specified_unique_id_list, kid_id_list=None): + for specified_unique_id in specified_unique_id_list: + if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: + self.generate_specified_timeline_op_rule(specified_unique_id, kid_id_list) + + def generate_specified_timeline_op_rule(self, specified_unique_id, kid_id_list=None): + """用于实现生成特定版本规则 + + 若不存在相应specified_unique_id的规则、或是已生成、循环继承等情况,将该规则置空并返回 + 规则库文件结构设置为多叉树, 结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成, + 直到specified_unique_id规则依赖继承的规则库全部生成完毕, 再生成该指定规则库, 将specified_unique_id的规则库归档 + + 参数: + specified_unique_id: 指定版本规则id + kid_id_list: 子规则id数组, 用于防止循环继承, 如间接继承自身或直接继承自身等情况 + 返回: + None + """ + if kid_id_list is None: + kid_id_list = [] + + # 若该unique_id规则在timeline_fusion_ops.yaml中没有相应的规则, 生成该id规则,置为空 + if self._all_origin_timeline_op_rule_dict.get(specified_unique_id) is None: + logger.warning("The specified version %s does not exist in the rule library. " + "Ensure that the corresponding rule is configured in the YAML file. " + "The version %s is left blank.", + specified_unique_id, + specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + return + + # 若该unique_id规则已经生成,则无需再次生成 + if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: + logger.warning("The rule has been generated and does not need to be generated again. " + "Check whether unique id %s in the YAML file is duplicate.", + specified_unique_id) + return + + # 若kid_id_list不为空,且间接继承自身,则尝试生成空规则用于继承 + if kid_id_list and self._is_duplicated_element_in_lists(specified_unique_id, kid_id_list): + logger.warning("It cannot be inherited indirectly. Ensure that the corresponding rules are correctly " + "configured in the YAML file and leave Version %s blank.", + specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + return + + rule_dic = self._all_origin_timeline_op_rule_dict.get(specified_unique_id) + if rule_dic is not None: + kid_id_list.append(specified_unique_id) + + global_inherit_id = rule_dic.get("inherit_unique_id") + if global_inherit_id and global_inherit_id not in self._exist_timeline_op_rule_unique_id_list: + logger.debug("The rule of version %s global inherit the rule of version %s", + specified_unique_id, global_inherit_id) + self.generate_specified_timeline_op_rule(global_inherit_id, kid_id_list) + + # 若局部继承的规则未生成, 生成该规则 + local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) + if local_inherit_id_list: + logger.debug("The rule of version %s local inherit the rule of version %s", + specified_unique_id, local_inherit_id_list) + self.generate_specified_list_timeline_op_rule(specified_unique_id_list=local_inherit_id_list, + kid_id_list=kid_id_list) + logger.debug("Start to generate rule of version %s", specified_unique_id) + # 实现全局继承与局部继承 + temp_rule = OpRule(timeline_op_rule_handler=self, + rule=self._all_tmp_timeline_op_rule.get(global_inherit_id)) + temp_rule.merge(rule_dic.get("operator_rules")) + # 将生成的规则归档保存 + self.add_new_timeline_op_rule(specified_unique_id, temp_rule.tmp_rule) + return + logger.error("Failed to generate the rule whose unique_id is %s. Ensure that the rule is configured in " + "the YAML file and the version %s is empty.", specified_unique_id, specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + + def generate_all_timeline_op_rule(self): + """用于实现获取所有版本规则 + + 查找db_content中的规则库, 规则库文件结构设置为多叉树, 优先生成无继承的基础规则版本 + 循环并生成其他版本, 文件结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成,直到全部规则库生成后退出函数 + + 参数: + None + 返回: + None + """ + self.generate_basic_timeline_op_rules() + _unique_id_list = copy.deepcopy(list(self._all_origin_timeline_op_rule_dict.keys())) + for unique_id in _unique_id_list: + if unique_id in self._exist_timeline_op_rule_unique_id_list: + continue + self.generate_specified_timeline_op_rule(unique_id) + + def get_tmp_timeline_op_rule_with_unique_id(self, unique_id): + if unique_id not in self._exist_timeline_op_rule_unique_id_list: + logger.error("The specified unique_id does not exist in the rule library. Ensure that the " + "corresponding rule is configured in the YAML file and the version %s is empty." + "If the value of unique_id is a negative number, the version may not be supported.", + unique_id) + self.add_empty_timeline_op_rule(unique_id) + if unique_id < 0: + logger.error("Advise to use a positive integer as the unique id of rules. " + "Negative numbers: %s are not recommended to use as unique id. " + "If specified invalid unique id: %s is used, an empty rule is returned by default.", + unique_id, constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) + return self._all_tmp_timeline_op_rule.get(unique_id) diff --git a/profiler/advisor/config/config.ini b/profiler/advisor/config/config.ini index b8f6703685..7185a6b3bf 100644 --- a/profiler/advisor/config/config.ini +++ b/profiler/advisor/config/config.ini @@ -9,8 +9,8 @@ tune_ops_file = operator_tuning_file.cfg [THRESHOLD] # operator_bound_ratio: (mte, cube, vector, scalar) ratio greater than this value will be checked in operator_bound_checker operator_bound_ratio = 0.8 -[RULE] +[RULE-BUCKET] # region : URL of different regions where can download rule yaml file -cn-north-9 = https://cnnorth9-modelarts-sdk.obs.cn-north-9.myhuaweicloud.com/modelarts/solution/ma_advisor_rules/ -cn-southwest-2 = https://cnsouthwest2-modelarts-sdk.obs.cn-southwest-2.myhuaweicloud.com/modelarts/solution/ma_advisor_rules/ -cn-north-7 = https://cnnorth7-modelarts-sdk.obs.cn-north-7.ulanqab.huawei.com/modelarts/solution/ma_advisor_rules/ \ No newline at end of file +cn-north-9 = cnnorth9-modelarts-sdk +cn-southwest-2 = cnsouthwest2-modelarts-sdk +cn-north-7 = cnnorth7-modelarts-sdk \ No newline at end of file diff --git a/profiler/advisor/dataset/cluster/cluster_dataset.py b/profiler/advisor/dataset/cluster/cluster_dataset.py index ee8b3563b7..a2d91377cc 100644 --- a/profiler/advisor/dataset/cluster/cluster_dataset.py +++ b/profiler/advisor/dataset/cluster/cluster_dataset.py @@ -2,6 +2,7 @@ import logging import os +from profiler.advisor.dataset.dataset import Dataset from profiler.advisor.utils.utils import singleton from profiler.cluster_analyse.common_func.file_manager import FileManager from profiler.advisor.common import constant as const @@ -13,10 +14,10 @@ from profiler.advisor.dataset.cluster.cluster_step_trace_time_bean import Cluste logger = logging.getLogger() -class ClusterDataset: +class ClusterDataset(Dataset): - def __init__(self, collection_path, **kwargs) -> None: - self.collection_path = os.path.realpath(collection_path) + def __init__(self, collection_path, data: dict, **kwargs) -> None: + super().__init__(collection_path, data) if not self.is_cluster_analysis_output_exist(): self.cluster_analyze() @@ -64,14 +65,14 @@ class ClusterDataset: class ClusterStepTraceTimeDataSet(ClusterDataset): RANK = "rank" - def __init__(self, collection_path: str, kwargs: dict = None): - super().__init__(collection_path) + def __init__(self, collection_path: str, data: dict, kwargs: dict = None): self._step_dict = defaultdict() - self.parse() + super().__init__(collection_path, data) - def parse(self): + def _parse(self): step_data = self.load_csv_data(const.CLUSTER_STEP_TIME_CSV, ClusterStepTraceTimeBean) self._step_dict = self.formate_data(step_data) + return True def formate_data(self, step_data: list): step_dict = defaultdict(lambda: [0, 0, 0]) @@ -100,15 +101,14 @@ class ClusterCommunicationDataSet(ClusterDataset): SDMA = "SDMA" RDMA = "RDMA" - def __init__(self, collection_path: str, kwargs: dict = None): - super().__init__(collection_path) + def __init__(self, collection_path: str, data: dict, kwargs: dict = None): self.rank_bw_dict = defaultdict(lambda: { self.RDMA_TIME_MS: 0, self.RDMA_SIZE_MB: 0, self.SDMA_TIME_MS: 0, self.SDMA_SIZE_MB: 0, }) - self.parse() + super().__init__(collection_path, data) @staticmethod def compute_ratio(dividend: float, divisor: float): @@ -117,7 +117,7 @@ class ClusterCommunicationDataSet(ClusterDataset): else: return round(dividend / divisor, 4) - def parse(self): + def _parse(self): communication_json = self.load_json_data(const.CLUSTER_COMM_JSON) self.process(communication_json) diff --git a/profiler/advisor/dataset/dataset.py b/profiler/advisor/dataset/dataset.py new file mode 100644 index 0000000000..7f1e40a38b --- /dev/null +++ b/profiler/advisor/dataset/dataset.py @@ -0,0 +1,38 @@ +""" +dataset module +""" +import logging +import os + +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class Dataset: + """ + :param collection_path: dataSet absolute path + dataset base class + """ + + def __init__(self, collection_path, data=None) -> None: + if data is None: + data = {} + self.collection_path = os.path.abspath(os.path.join(Config().work_path, collection_path)) + logger.debug("init %s with %s", self.__class__.__name__, self.collection_path) + if self._parse(): + key = self.get_key() + if key not in data: + data[key] = [] + data[key].append(self) + + def _parse(self): + return None + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__name__.rsplit('.', maxsplit=1)[-1] diff --git a/profiler/advisor/dataset/timeline_event_dataset.py b/profiler/advisor/dataset/timeline_event_dataset.py index c1134a9784..9b4c102dff 100644 --- a/profiler/advisor/dataset/timeline_event_dataset.py +++ b/profiler/advisor/dataset/timeline_event_dataset.py @@ -2,6 +2,7 @@ import logging from typing import List import ijson +from profiler.advisor.dataset.dataset import Dataset from tqdm import tqdm from profiler.advisor.common import constant as const @@ -13,17 +14,17 @@ logger = logging.getLogger() @singleton -class TimelineEventDataset: +class TimelineEventDataset(Dataset): - def __init__(self, root_dir, **kwargs) -> None: + def __init__(self, collection_path, data: dict, **kwargs) -> None: self._ops_with_task_type = {} self._ops_with_stack = {} self._torch_to_npu = {} self._acl_to_npu = set() self._aten: List[str] = [] self._optimizer: List[str] = [] - self.timeline_dir = root_dir - self.timeline_data_list = get_file_path_from_directory(root_dir, lambda file: file.endswith("trace_view.json")) + self.timeline_dir = collection_path + self.timeline_data_list = get_file_path_from_directory(collection_path, lambda file: file.endswith("trace_view.json")) self.dataset_len = None self.analysis_mode = kwargs.get("analysis_mode") self.task_type = kwargs.get("task_type") @@ -34,13 +35,14 @@ class TimelineEventDataset: logger.info("Load fusion operators database for cann version '%s' and torch version '%s'", self.cann_version, self.torch_version) - self.parse() + super().__init__(collection_path, data) if self.analysis_mode in ["op_stack", "all"]: self._task_op_names = list(set([event_key.split("-")[0] for event_key in self._ops_with_task_type.keys()])) self._post_process() + @property def ops_with_stack(self): return self._ops_with_stack @@ -69,23 +71,15 @@ class TimelineEventDataset: def aten(self): return self._aten - @classmethod - def get_key(cls): - """ - get key of dataset - :return: key - """ - return cls.__module__.rsplit('.', maxsplit=1)[-1] - - def parse(self): + def _parse(self): if len(self.timeline_data_list) == 0: logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) return False if len(self.timeline_data_list) > 1: - logger.warning("Please ensure only one trace_view.json in %s, skip timeline analysis.", self.timeline_dir) - return False + logger.warning("Please ensure only one trace_view.json in %s, there will analysis first timeline profiling data.", self.timeline_dir) + self.timeline_data_list = [self.timeline_data_list[0]] result = self.parse_data_with_generator(self._add_event) diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 19da350a02..156922f4d1 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -1,47 +1,44 @@ import os -from profiler.advisor.analyzer.scheduling.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer -from profiler.advisor.analyzer.overall.overall_analyzer import OverallSummaryAnalyzer -from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer from profiler.advisor.utils.utils import Timer -from profiler.advisor.result.result import OptimizeResult from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyzer from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer class Interface: - supported_analysiser = { - "computing": [], - "scheduling": [TimelineFusionOpsAnalyzer], + supported_analyzer = { + "schedule": [TimelineFusionOpsAnalyzer], + "computation": [], "communication": [], "overall": [], "dataloader": [], "cluster": [SlowRankAnalyzer, SlowLinkAnalyzer] } - all_dimension = supported_analysiser.keys() + all_dimension = supported_analyzer.keys() def __init__(self, **kwargs): self.collection_path = os.path.realpath(kwargs.get("profiling_path")) @staticmethod - def get_analyzer(dimension, is_inference=False): - return Interface.supported_analysiser.get(dimension, []) + def get_analyzer(dimension): + return Interface.supported_analyzer.get(dimension, []) def get_result(self: any, dimension: str, render_html=False, **kwargs): """ :Param mode: affinity apis, ai cpu and so on. """ result_list = [] - analysiser_list = self.get_analyzer(dimension, kwargs.get("is_inference", False)) - for idx, clss in enumerate(analysiser_list): + analyzer_list = self.get_analyzer(dimension) + for idx, clss in enumerate(analyzer_list): if clss and callable(clss): - analysiser = clss(collection_path = self.collection_path, **kwargs) - result_list.append(analysiser.optimize()) - if render_html and idx == len(analysiser_list) - 1: - if hasattr(analysiser, "html_render"): - analysiser.html_render.render_html() - analysiser.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') + analyzer = clss(collection_path = self.collection_path, **kwargs) + result_list.append(analyzer.optimize(**kwargs)) + if render_html and idx == len(analyzer_list) - 1: + if hasattr(analyzer, "html_render"): + analyzer.html_render.render_html() + analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') return result_list diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index d7837e1e40..1a4444f1ec 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -1,499 +1,549 @@ -import json -import logging -import multiprocessing as mp -import os -import queue -import re -import stat -import time -import traceback -import types -from functools import wraps -from typing import Any, Set - -import click -import requests -from requests.adapters import HTTPAdapter -from tqdm import tqdm - -from profiler.advisor.common import constant as const -from profiler.advisor.common.timeline.fusion_ops_db import FusionOperatorDB -from profiler.advisor.common.version_control import VersionControl -from profiler.advisor.utils.log import init_logger, get_log_level - -logger = logging.getLogger() -logger.setLevel(get_log_level()) -permission_warned: Set = set() - - -def ignore_warning(exception: Exception = None): - return exception - - -class ContextObject(object): - def __init__(self): - self._debug = False - - def set_debug(self, debug=False): - self._debug = debug - - @property - def debug_mode(self): - return self._debug - - -def debug_option(f): - return click.option('--debug', '-D', - is_flag=True, - expose_value=False, - is_eager=True, - callback=init_logger, - help="Debug Mode. Shows full stack trace when error occurs.")(f) - - -def singleton(cls): - """ - :param cls: any class - :return: singleton handle - """ - _instance = {} - - def _singleton(*args: any, **kw: any) -> any: - if cls not in _instance: - _instance[cls] = cls(*args, **kw) - return _instance.get(cls) - - return _singleton - - -def lazy_property(func): - """ - Lazy loading of class attributes. - which is calculated only once when it is called for the first time, - and will not be repeated for each call after that. - """ - attr_name = "_lazy_" + func.__name__ - - @property - def _lazy_property(instance): - if not hasattr(instance, attr_name): - setattr(instance, attr_name, func(instance)) - return getattr(instance, attr_name) - - return _lazy_property - - -class CheckPathAccess: - """ - check path access permissions - """ - - # pylint: disable=no-member - def __init__(self, func): - wraps(func)(self) - self.warned = permission_warned - - def __call__(self, *args, **kwargs): - path = args[0] - if not os.access(path, os.R_OK) and path not in self.warned: - logger.warning("%s can not read, check the permissions", path) - self.warned.add(path) - return self.__wrapped__(*args, **kwargs) - - def __get__(self, instance, cls): - if instance is None: - return self - return types.MethodType(self, instance) - - -def walk_error_handler(error): - """ - handle dir walk error - """ - if error.filename not in permission_warned: - logger.warning(error) - permission_warned.add(error.filename) - - -@CheckPathAccess -def get_file_path_from_directory(path: str, check_func: Any) -> list: - """ - get file from directory - """ - file_list = [] - for root, _, files in os.walk(path, onerror=walk_error_handler): - for filename in files: - filepath = os.path.join(root, filename) - if check_func(filename): - file_list.append(filepath) - return file_list - - -@singleton -class Timer: - def __init__(self): - self.strftime = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) - - -def get_analyze_processes(): - # n_processes not exposed to user through ma-advisor command arguments now - return min(int(os.getenv(const.MA_ADVISOR_ANALYZE_PROCESSES, 1)), const.MA_ADVISOR_MAX_PROCESSES) - - -def init_timeline_ops_db(cann_version=None, torch_version=None): - logger.debug("init operators database") - - return FusionOperatorDB(cann_version=cann_version, torch_version=torch_version) - - -def format_timeline_result(result: dict, dump_html=False): - """ - :Param result: json for api name and stack - :Return: json after format - """ - format_result = {} - if dump_html: - result = json.loads(json.dumps(result).replace("\\r\\n", "
").replace("", "<module>")) - - for key, stacks in result.items(): - api_name = key.split(":")[0] - format_result[api_name] = sorted(list(stacks.items()), key=lambda stack: stack[1], reverse=True) - return format_result - - -class ParallelJob: - - def __init__(self, src_func, ops_api_list, job_name=None): - if not callable(src_func): - raise TypeError(f"src_func should be callable") - - if not isinstance(ops_api_list, (list, tuple)): - raise TypeError(f"ops_api_list should be list or tuple") - - self.src_func = src_func - self.ops_api_list = ops_api_list - self.job_name = job_name - - def start(self, n_proccesses): - - queue = mp.Queue(len(self.ops_api_list)) - completed_queue = mp.Queue() - for i in range(len(self.ops_api_list)): - queue.put(i) - - processes = [] - listen = mp.Process(target=self.listener, args=(completed_queue, len(self.ops_api_list),)) - listen.start() - - for i in range(n_proccesses): - p = mp.Process(target=self.parallel_queue, args=(queue, completed_queue,)) - processes.append(p) - p.start() - - for p in processes: - p.join() - - completed_queue.put(None) - listen.join() - - def listener(self, completed_queue, num): - pbar = tqdm(total=num, position=0, leave=False, ncols=100, desc=self.job_name) - for _ in iter(completed_queue.get, None): - pbar.update() - pbar.refresh() - pbar.n = num - - def parallel_queue(self, job_queue, completed_queue): - while True: - try: - if job_queue.empty(): - break - token = job_queue.get(timeout=1) - except queue.Empty: - continue - self.src_func(*self.ops_api_list[token]) - completed_queue.put(token) - - -def mp_queue_to_list(job_queue): - queue_list = [] - while True: - try: - if job_queue.empty(): - break - token = job_queue.get(timeout=1) - queue_list.append(token) - except queue.Empty: - continue - return queue_list - - -def load_parameter(parameter, default): - if not os.environ.get(parameter, None): - return default - else: - return os.environ.get(parameter) - - -def get_supported_subclass(clazz: VersionControl.__class__, cann_version: str): - """ - Returns a list of subclasses that support the specified version - :param clazz: Class name which is extends to VersionControl.__class__ - :param cann_version: The CANN software version - :return: The list of subclasses that support the specified CANN version - """ - # 获取所有支持这个cann版本的子类 - dataset_classes = clazz.__subclasses__() - sub_class_list = [cls for cls in dataset_classes if cls.is_supported(cann_version)] - logger.debug("The support subclass list is %s, cann version is %s", str(sub_class_list), cann_version) - return sub_class_list - - -def to_percent(num: float) -> str: - """ - change float to percent format - """ - num = num * 100 - return f"{num:.2f}%" - - -def safe_division(numerator, denominator): - """Return 0 if denominator is 0.""" - return denominator and numerator / denominator - - -def safe_write(content, save_path): - if os.path.dirname(save_path) != "": - os.makedirs(os.path.dirname(save_path), exist_ok=True) - - with os.fdopen(os.open(save_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, - stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP), "w") as f: - f.write(content) - - -def create_directory_for_file(file: str) -> None: - """ - create directory for file - """ - dirname = os.path.dirname(file) - if not os.path.exists(dirname): - os.makedirs(dirname) - - -class CheckPathAccess: - """ - check path access permissions - """ - - # pylint: disable=no-member - def __init__(self, func): - wraps(func)(self) - self.warned = permission_warned - - def __call__(self, *args, **kwargs): - path = args[0] - if path and not os.access(path, os.R_OK) and path not in self.warned: - logger.warning("%s can not read, check the permissions", path) - self.warned.add(path) - return self.__wrapped__(*args, **kwargs) - - def __get__(self, instance, cls): - if instance is None: - return self - return types.MethodType(self, instance) - - -@CheckPathAccess -def get_file_path_from_directory(path, check_func): - """ - get file from directory - """ - file_list = [] - - if not path: - return file_list - - if not os.path.isdir(path): - logger.warning("Expected existed directory, but got %s", path) - - for root, _, files in os.walk(path): - for filename in files: - filepath = os.path.join(root, filename) - if check_func(filename): - file_list.append(filepath) - return file_list - - -@CheckPathAccess -def get_dir_path_from_directory(path: str, check_func: Any) -> list: - """ - get file from directory - """ - file_list = [] - for root, _, files in os.walk(path, onerror=walk_error_handler): - for filename in files: - filepath = os.path.join(root, filename) - if check_func(filename): - file_list.append(filepath) - return file_list - - -def is_regex_pattern(string: str): - """ - Check if str is a regular expression. - """ - escaped_string = re.escape(string) - return not (escaped_string == string) - - -def join_prof_path(root_dir: str, sub_dir: str) -> str: - """ - regular expression matching method for path concatenation - """ - if is_regex_pattern(sub_dir): - for root, _, _ in os.walk(root_dir, onerror=walk_error_handler): - if re.match(sub_dir, os.path.basename(root)): - return root - else: - sub_dir = os.path.join(root_dir, sub_dir) - if os.path.exists(sub_dir): - return sub_dir - return "" - - -def format_excel_title(title: str) -> str: - """ - format excel title - """ - title = title.lower() - title = title.replace("(us)", '') - title = title.replace("(ns)", '') - title = title.replace("(%)", '') - title = title.replace(" ", "_") - return title - - -def format_float(num: float) -> float: - """ - format float num, round to 2 decimal places - """ - return round(num, 2) - - -class SafeOpen: - """ - safe open to check file - """ - - # pylint: disable=consider-using-with - def __init__(self, name, mode='r', encoding=None): - self.file = None - if not os.path.exists(name): - logger.warning("%s not exist, please check", name) - return - - if os.access(name, os.R_OK): - self.file = open(name, mode, encoding=encoding, errors="ignore") - else: - logger.warning("%s can not read, check the permissions", name) - - def __enter__(self): - return self.file - - def __exit__(self, exc_type, exc_val, exc_tb): - if self.file: - self.file.close() - return True - - -def save_downloaded_file(response, url_path, file_save_path): - """保存响应体中的文件 - - 参数: - response: 请求后获取的响应体 - url_path: url路径 - file_save_path: 保存路径 - 返回: - final_file_path: 文件保存绝对路径 - """ - # 获取url路径中的文件名, 拼接在保存路径下 - file_save_path = os.path.normpath(file_save_path) - file_name = os.path.basename(url_path) - final_file_path = os.path.join(file_save_path, file_name) - # 若目标保存路径不存在,则自动生成 - if not os.path.exists(file_save_path): - os.makedirs(file_save_path) - if response.status_code <= 300: - logger.debug("Response status code is %s", response.status_code) - flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL - modes = stat.S_IWUSR | stat.S_IRUSR - # 若文件已存在,则移除已有的文件并保存最新的文件 - if os.path.exists(final_file_path): - os.remove(final_file_path) - # 保存文件 - with os.fdopen(os.open(final_file_path, flags, modes), mode="wb") as f: - f.write(response.content) - logger.info("Success to save content in: %s", os.path.abspath(final_file_path)) - else: - # 若响应码不为预期的数值, 显示相应告警 - logger.warning("Failed to save the response body. The response status code is %s. " - "Please check the network or file URL", response.status_code) - - -def request_with_retry(url_path): - """使用requests请求获取文件, 失败则进行重试, 最多请求 max_retries+1 次 - - 参数: - url_path: URL路径 - file_save_path: 云文件保存路径 - """ - logger.debug("Requesting or retrying to get %s", url_path) - - # 若从环境变量指定了保存路径,优先从环境变量中获取,若为空则使用默认的云文件保存路径constant.CLOUD_RULE_PATH - file_save_path = os.path.join(os.path.expanduser("~"), const.CLOUD_RULE_PATH) - if os.getenv(const.ADVISOR_RULE_PATH): - file_save_path = os.getenv(const.ADVISOR_RULE_PATH) - - session = requests.Session() - # 使用session发起的所有请求, 默认最多会重试 max_retries 次, 计入最初请求, 最差情况下请求 max_retries+1 次 - adapter = HTTPAdapter(max_retries=const.MAX_RETRIES) - session.mount('http://', adapter) - session.mount('https://', adapter) - - logger.debug('Session try to get response') - response = None - try: - response = session.get(url_path, timeout=const.TIMEOUT) - except Exception as e: - logger.debug("Error: %s: %s", e, traceback.format_exc()) - - if response is None: - logger.warning("Fail to download: %s, response is None, " - "please use the environment variable %s for more detailed information", - url_path, const.ADVISOR_LOG_LEVEL) - else: - try: - # 若响应码为400~600之间,response.raise_for_status抛出HTTPError错误, 跳过调用save_downloaded_file函数逻辑 - response.raise_for_status() - save_downloaded_file(response, url_path=url_path, file_save_path=file_save_path) - except Exception as e: - logger.warning("Error: %s: %s", e, traceback.format_exc()) - # 关闭 session, 清除所有装配器 - session.close() - - -def read_csv(file): - import csv - - raw_data = [] - logger.debug("Parse file %s", file) - with SafeOpen(file, encoding="utf-8") as csv_file: - try: - csv_content = csv.reader(csv_file) - for row in csv_content: - raw_data.append(row) - except OSError as error: - logger.error("Read csv file failed : %s", error) - return [] - - return raw_data +import inspect +import json +import logging +import multiprocessing as mp +import os +import queue +import re +import stat +import time +import traceback +import types +from functools import wraps +from typing import Any, Set + +import click +import requests +from requests.adapters import HTTPAdapter +from tqdm import tqdm + +from profiler.advisor.common import constant as const +from profiler.advisor.common.version_control import VersionControl +from profiler.advisor.utils.log import init_logger, get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) +permission_warned: Set = set() + + +def ignore_warning(exception: Exception = None): + return exception + + +class ContextObject(object): + def __init__(self): + self._debug = False + + def set_debug(self, debug=False): + self._debug = debug + + @property + def debug_mode(self): + return self._debug + + +def debug_option(f): + return click.option('--debug', '-D', + is_flag=True, + expose_value=False, + is_eager=True, + callback=init_logger, + help="Debug Mode. Shows full stack trace when error occurs.")(f) + + +def get_class_absolute_path(cls): + module = inspect.getmodule(cls) + if module is not None: + module_path = module.__name__ + class_name = cls.__name__ + return f"{module_path}.{class_name}" + else: + return None + + +def is_static_func(function_obj): + return isinstance(function_obj, staticmethod) + + +def singleton(cls): + """ + :param cls: any class + :return: singleton handle + + When using the singleton function, you need to manually specify arg='dataSet_path'. Otherwise, the singleton function + is initialized by class name. + if cls has 'arg' property, _instance map will build by class_name and 'arg', the default value of + collection path is class absolute path. + + _instance = {cls.name: {collection_path: instance}} + """ + _instance = {} + + def _singleton(*args: any, **kw: any) -> any: + collection_path = kw.get("collection_path") + if not collection_path: + collection_path = get_class_absolute_path(cls) + if cls in _instance and collection_path in _instance[cls]: + return _instance[cls].get(collection_path) + if cls not in _instance: + _instance[cls] = {collection_path: cls(*args, **kw)} + else: + _instance[cls][collection_path] = cls(*args, **kw) + return _instance[cls].get(collection_path) + + # 保留原始类的属性和方法 + _singleton.__name__ = cls.__name__ + _singleton.__module__ = cls.__module__ + _singleton.__doc__ = cls.__doc__ + + # 拷贝原始类的类方法和静态方法 + _singleton.__dict__.update(cls.__dict__) + for base_class in inspect.getmro(cls)[::-1]: + # 获取类的所有成员 + members = inspect.getmembers(base_class) + + # 过滤出函数对象 + function_objs = [member[1] for member in members if inspect.isfunction(member[1]) or inspect.ismethod(member[1])] + for function_obj in function_objs: + if inspect.isfunction(function_obj) and not is_static_func(function_obj): + continue + setattr(_singleton, function_obj.__name__, function_obj) + + return _singleton + + +def lazy_property(func): + """ + Lazy loading of class attributes. + which is calculated only once when it is called for the first time, + and will not be repeated for each call after that. + """ + attr_name = "_lazy_" + func.__name__ + + @property + def _lazy_property(instance): + if not hasattr(instance, attr_name): + setattr(instance, attr_name, func(instance)) + return getattr(instance, attr_name) + + return _lazy_property + + +class CheckPathAccess: + """ + check path access permissions + """ + + # pylint: disable=no-member + def __init__(self, func): + wraps(func)(self) + self.warned = permission_warned + + def __call__(self, *args, **kwargs): + path = args[0] + if not os.access(path, os.R_OK) and path not in self.warned: + logger.warning("%s can not read, check the permissions", path) + self.warned.add(path) + return self.__wrapped__(*args, **kwargs) + + def __get__(self, instance, cls): + if instance is None: + return self + return types.MethodType(self, instance) + + +def walk_error_handler(error): + """ + handle dir walk error + """ + if error.filename not in permission_warned: + logger.warning(error) + permission_warned.add(error.filename) + + +@CheckPathAccess +def get_file_path_from_directory(path: str, check_func: Any) -> list: + """ + get file from directory + """ + file_list = [] + for root, _, files in os.walk(path, onerror=walk_error_handler): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +@singleton +class Timer: + def __init__(self): + self.strftime = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) + + +def get_analyze_processes(): + # n_processes not exposed to user through att-advisor command arguments now + return min(int(os.getenv(const.MA_ADVISOR_ANALYZE_PROCESSES, 1)), const.MA_ADVISOR_MAX_PROCESSES) + + +def format_timeline_result(result: dict, dump_html=False): + """ + :Param result: json for api name and stack + :Return: json after format + """ + format_result = {} + if dump_html: + result = json.loads(json.dumps(result).replace("\\r\\n", "
").replace("", "<module>")) + + for key, stacks in result.items(): + api_name = key.split(":")[0] + format_result[api_name] = sorted(list(stacks.items()), key=lambda stack: stack[1], reverse=True) + return format_result + + +class ParallelJob: + + def __init__(self, src_func, ops_api_list, job_name=None): + if not callable(src_func): + raise TypeError(f"src_func should be callable") + + if not isinstance(ops_api_list, (list, tuple)): + raise TypeError(f"ops_api_list should be list or tuple") + + self.src_func = src_func + self.ops_api_list = ops_api_list + self.job_name = job_name + + def start(self, n_proccesses): + + job_queue = mp.Queue(len(self.ops_api_list)) + completed_queue = mp.Queue() + for i in range(len(self.ops_api_list)): + job_queue.put(i) + + processes = [] + listen = mp.Process(target=self.listener, args=(completed_queue, len(self.ops_api_list),)) + listen.start() + + for i in range(n_proccesses): + p = mp.Process(target=self.parallel_queue, args=(job_queue, completed_queue,)) + processes.append(p) + p.start() + + for p in processes: + p.join() + + completed_queue.put(None) + listen.join() + + def listener(self, completed_queue, num): + pbar = tqdm(total=num, position=0, leave=False, ncols=100, desc=self.job_name) + for _ in iter(completed_queue.get, None): + pbar.update() + pbar.refresh() + pbar.n = num + + def parallel_queue(self, job_queue, completed_queue): + while True: + try: + if job_queue.empty(): + break + token = job_queue.get(timeout=1) + except queue.Empty: + continue + self.src_func(*self.ops_api_list[token]) + completed_queue.put(token) + + +def mp_queue_to_list(job_queue): + queue_list = [] + while True: + try: + if job_queue.empty(): + break + token = job_queue.get(timeout=1) + queue_list.append(token) + except queue.Empty: + continue + return queue_list + + +def load_parameter(parameter, default): + if not os.environ.get(parameter, None): + return default + else: + return os.environ.get(parameter) + + +def get_supported_subclass(clazz: VersionControl.__class__, cann_version: str): + """ + Returns a list of subclasses that support the specified version + :param clazz: Class name which is extends to VersionControl.__class__ + :param cann_version: The CANN software version + :return: The list of subclasses that support the specified CANN version + """ + # 获取所有支持这个cann版本的子类 + dataset_classes = clazz.__subclasses__() + sub_class_list = [cls for cls in dataset_classes if cls.is_supported(cann_version)] + logger.debug("The support subclass list is %s, cann version is %s", str(sub_class_list), cann_version) + return sub_class_list + + +def to_percent(num: float) -> str: + """ + change float to percent format + """ + num = num * 100 + return f"{num:.2f}%" + + +def safe_division(numerator, denominator): + """Return 0 if denominator is 0.""" + return denominator and numerator / denominator + + +def safe_write(content, save_path): + if os.path.dirname(save_path) != "": + os.makedirs(os.path.dirname(save_path), exist_ok=True) + + with os.fdopen(os.open(save_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, + stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP), "w") as f: + f.write(content) + + +def create_directory_for_file(file: str) -> None: + """ + create directory for file + """ + dirname = os.path.dirname(file) + if not os.path.exists(dirname): + os.makedirs(dirname) + + +class CheckPathAccess: + """ + check path access permissions + """ + + # pylint: disable=no-member + def __init__(self, func): + wraps(func)(self) + self.warned = permission_warned + + def __call__(self, *args, **kwargs): + path = args[0] + if path and not os.access(path, os.R_OK) and path not in self.warned: + logger.warning("%s can not read, check the permissions", path) + self.warned.add(path) + return self.__wrapped__(*args, **kwargs) + + def __get__(self, instance, cls): + if instance is None: + return self + return types.MethodType(self, instance) + + +@CheckPathAccess +def get_file_path_from_directory(path, check_func): + """ + get file from directory + """ + file_list = [] + + if not path: + return file_list + + if not os.path.isdir(path): + logger.warning("Expected existed directory, but got %s", path) + + for root, _, files in os.walk(path): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +@CheckPathAccess +def get_dir_path_from_directory(path: str, check_func: Any) -> list: + """ + get file from directory + """ + file_list = [] + for root, _, files in os.walk(path, onerror=walk_error_handler): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +def is_regex_pattern(string: str): + """ + Check if str is a regular expression. + """ + escaped_string = re.escape(string) + return not (escaped_string == string) + + +def join_prof_path(root_dir: str, sub_dir: str) -> str: + """ + regular expression matching method for path concatenation + """ + if is_regex_pattern(sub_dir): + for root, _, _ in os.walk(root_dir, onerror=walk_error_handler): + if re.match(sub_dir, os.path.basename(root)): + return root + else: + sub_dir = os.path.join(root_dir, sub_dir) + if os.path.exists(sub_dir): + return sub_dir + return "" + + +def format_excel_title(title: str) -> str: + """ + format excel title + """ + title = title.lower() + title = title.replace("(us)", '') + title = title.replace("(ns)", '') + title = title.replace("(%)", '') + title = title.replace(" ", "_") + return title + + +def format_float(num: float) -> float: + """ + format float num, round to 2 decimal places + """ + return round(num, 2) + + +class SafeOpen: + """ + safe open to check file + """ + + # pylint: disable=consider-using-with + def __init__(self, name, mode='r', encoding=None): + self.file = None + if not os.path.exists(name): + logger.warning("%s not exist, please check", name) + return + + if os.access(name, os.R_OK): + self.file = open(name, mode, encoding=encoding, errors="ignore") + else: + logger.warning("%s can not read, check the permissions", name) + + def __enter__(self): + return self.file + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.file: + self.file.close() + return True + + +def save_downloaded_file(response, url_path, file_save_path): + """保存响应体中的文件 + + 参数: + response: 请求后获取的响应体 + url_path: url路径 + file_save_path: 保存路径 + 返回: + final_file_path: 文件保存绝对路径 + """ + # 获取url路径中的文件名, 拼接在保存路径下 + file_save_path = os.path.normpath(file_save_path) + file_name = os.path.basename(url_path) + final_file_path = os.path.join(file_save_path, file_name) + # 若目标保存路径不存在,则自动生成 + if not os.path.exists(file_save_path): + os.makedirs(file_save_path) + if response.status_code <= 300: + logger.debug("Response status code is %s", response.status_code) + flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL + modes = stat.S_IWUSR | stat.S_IRUSR + # 若文件已存在,则移除已有的文件并保存最新的文件 + if os.path.exists(final_file_path): + os.remove(final_file_path) + # 保存文件 + with os.fdopen(os.open(final_file_path, flags, modes), mode="wb") as f: + f.write(response.content) + logger.info("Success to save content in: %s", os.path.abspath(final_file_path)) + else: + # 若响应码不为预期的数值, 显示相应告警 + logger.warning("Failed to save the response body. The response status code is %s. " + "Please check the network or try another region", response.status_code) + + +def request_with_retry(url_path, region_name=None): + """使用requests请求获取文件, 失败则进行重试, 最多请求 max_retries+1 次 + + 参数: + url_path: URL路径 + file_save_path: 云文件保存路径 + """ + logger.debug("Requesting or retrying to get file from region: %s", region_name) + + # 若从环境变量指定了保存路径,优先从环境变量中获取,若为空则使用默认的云文件保存路径constant.CLOUD_RULE_PATH + file_save_path = os.path.join(os.path.expanduser("~"), const.CLOUD_RULE_PATH) + if os.getenv(const.ADVISOR_RULE_PATH): + file_save_path = os.getenv(const.ADVISOR_RULE_PATH) + + session = requests.Session() + # 使用session发起的所有请求, 默认最多会重试 max_retries 次, 计入最初请求, 最差情况下请求 max_retries+1 次 + adapter = HTTPAdapter(max_retries=const.MAX_RETRIES) + session.mount(const.HTTP_PREFIXES, adapter) + session.mount(const.HTTPS_PREFIXES, adapter) + + logger.debug('Session try to get response') + response = None + try: + response = session.get(url_path, timeout=const.TIMEOUT) + except Exception as e: + logger.debug("Error: %s: %s", e, traceback.format_exc()) + + if response is None: + logger.warning("Fail to download file from region: %s, response is None, " + "please use the environment variable %s for more detailed information", + region_name, const.ADVISOR_LOG_LEVEL) + else: + try: + # 若响应码为400~600之间,response.raise_for_status抛出HTTPError错误, 跳过调用save_downloaded_file函数逻辑 + response.raise_for_status() + save_downloaded_file(response, url_path=url_path, file_save_path=file_save_path) + except Exception as e: + logger.warning("Error: %s: %s", e, traceback.format_exc()) + # 关闭 session, 清除所有装配器 + session.close() + + +def read_csv(file): + import csv + + raw_data = [] + logger.debug("Parse file %s", file) + with SafeOpen(file, encoding="utf-8") as csv_file: + try: + csv_content = csv.reader(csv_file) + for row in csv_content: + raw_data.append(row) + except OSError as error: + logger.error("Read csv file failed : %s", error) + return [] + + return raw_data + + +def get_file_path_by_walk(root, filename): + file_path = "" + for root, _, files in os.walk(root, topdown=True): + for name in files: + if name == filename: + file_path = os.path.join(root, name) + return file_path + return file_path diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 2efecffcb7..95d8fa2f53 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -19,8 +19,8 @@ def _analyze(dimensions, **kwargs): job_list = [] for dimension in dimensions: - interface = Interface(**kwargs) - job_list.append((dimension, interface)) + interface = Interface(**kwargs) + job_list.append((dimension, interface)) for i, (dimension, interface) in enumerate(job_list[::-1]): result_list += interface.get_result(dimension, render_html=i == len(job_list) - 1) @@ -41,8 +41,9 @@ def analyze_cli(**kwargs): name="all", short_help='Analyze timeline, operators and graph.') @click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, - help='path of trace_view.json in profiling') -@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) + help='Directory of profiling data') +@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path(), + help='Directory of benchmark profiling data, used for compare performance') @click.option('--cann_version', '-cv', 'cann_version', type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), default=constant.DEFAULT_CANN_VERSION, @@ -52,7 +53,14 @@ def analyze_cli(**kwargs): type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), default=constant.DEFAULT_TORCH_VERSION, help='The runtime torch version, which can be detected by exec command "pip show torch"') -@click.option('--is_inference', is_flag=True) +# @click.option('--is_inference', is_flag=True, help="Enable performance analysis of inference task") +@click.option("-pt", + "--profiling_type", + metavar="", + default=constant.ASCEND_PYTORCH_PROFILER, + required=False, + type=click.Choice(constant.SUPPORTED_PROFILING_TYPE), + help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") @debug_option def analyze_all(**kwargs) -> None: # 当前compare_tools必须输入两个profiling路径,att-advisor有等价功能支持输入一个Profiling路径,后续替换成对应实现 @@ -66,8 +74,7 @@ def analyze_all(**kwargs) -> None: name="communication", short_help='Analyze timeline, operators and graph.') @click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, - help='path of trace_view.json in profiling') -@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) + help='Directory of profiling data') @click.option('--cann_version', '-cv', 'cann_version', type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), default=constant.DEFAULT_CANN_VERSION, @@ -77,19 +84,16 @@ def analyze_all(**kwargs) -> None: type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), default=constant.DEFAULT_TORCH_VERSION, help='The runtime torch version, which can be detected by exec command "pip show torch"') -@click.option('--mode', '-m', 'mode', default=None) -@click.option('--is_inference', is_flag=True) @debug_option def analyze_communication(**kwargs) -> None: _analyze(["communication"], **kwargs) @analyze_cli.command(context_settings=CONTEXT_SETTINGS, - name="scheduling", + name="schedule", short_help='Analyze timeline, operators and graph.') @click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, - help='path of trace_view.json in profiling') -@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) + help='Directory of profiling data') @click.option('--cann_version', '-cv', 'cann_version', type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), default=constant.DEFAULT_CANN_VERSION, @@ -99,19 +103,16 @@ def analyze_communication(**kwargs) -> None: type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), default=constant.DEFAULT_TORCH_VERSION, help='The runtime torch version, which can be detected by exec command "pip show torch"') -@click.option('--mode', '-m', 'mode', default=None) -@click.option('--is_inference', is_flag=True) @debug_option -def analyze_scheduling(**kwargs) -> None: - _analyze(["scheduling"], **kwargs) +def analyze_schedule(**kwargs) -> None: + _analyze(["schedule"], **kwargs) @analyze_cli.command(context_settings=CONTEXT_SETTINGS, - name="computing", + name="computation", short_help='Analyze timeline, operators and graph.') @click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, - help='path of trace_view.json in profiling') -@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) + help='Directory of profiling data') @click.option('--cann_version', '-cv', 'cann_version', type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), default=constant.DEFAULT_CANN_VERSION, @@ -121,8 +122,33 @@ def analyze_scheduling(**kwargs) -> None: type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), default=constant.DEFAULT_TORCH_VERSION, help='The runtime torch version, which can be detected by exec command "pip show torch"') -@click.option('--mode', '-m', 'mode', default=None) -@click.option('--is_inference', is_flag=True) +@click.option("-pt", + "--profiling_type", + metavar="", + default=constant.ASCEND_PYTORCH_PROFILER, + required=False, + type=click.Choice(constant.SUPPORTED_PROFILING_TYPE), + help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") @debug_option -def analyze_computing(**kwargs) -> None: - _analyze(["computing"], **kwargs) +def analyze_computation(**kwargs) -> None: + _analyze(["computation"], **kwargs) + + +@analyze_cli.command(context_settings=CONTEXT_SETTINGS, + name="dataloader", + short_help='Analyze timeline, operators and graph.') +@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, + help='Directory of profiling data') +@click.option('--cann_version', '-cv', 'cann_version', + type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), + default=constant.DEFAULT_CANN_VERSION, + help='The CANN software version, which can be viewed by executing the following command: ' + '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') +@click.option('--torch_version', '-tv', 'torch_version', + type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), + default=constant.DEFAULT_TORCH_VERSION, + help='The runtime torch version, which can be detected by exec command "pip show torch"') +@click.option('--is_inference', is_flag=True, help="Enable performance analysis of inference task") +@debug_option +def analyze_dataloader(**kwargs) -> None: + _analyze(["dataloader"], **kwargs) diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index b14d3dfd86..d9b5b10da7 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -53,7 +53,7 @@ advisor_cli.add_command(compare_cli, name="compare") if __name__ == '__main__': advisor_cli.main( - ["analyze", "scheduling", "-d", + ["analyze", "schedule", "-d", r"/home/ma-user/work/profiling", ] ) diff --git a/profiler/cli/update_cli.py b/profiler/cli/update_cli.py new file mode 100644 index 0000000000..9407981ae0 --- /dev/null +++ b/profiler/cli/update_cli.py @@ -0,0 +1,40 @@ +from urllib import parse + +import click + +from profiler.advisor.common import constant +from profiler.advisor.config.config import Config +from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup +from profiler.advisor.utils.utils import debug_option, request_with_retry + + +@click.group(name="update", cls=ClickAliasedGroup) +def update_cli(**kwargs): + """Update operation command, such as update rule and specify save path.""" + pass + + +@update_cli.command(context_settings=CONTEXT_SETTINGS, + name="rule", + short_help='Update the ma-advisor rules on the terminal. The default save path is ' + '"~/rules/cloud/". If user want to specify the save path, please use the environment ' + 'variable "ADVISOR_RULE_PATH"') +@click.option('--region', '-r', type=click.Choice(constant.CLOUD_RULE_REGION_LIST), required=True, + default=constant.DEFAULT_CLOUD_RULE_REGION, + help='Specifies the region where the rule file is downloaded.') +@debug_option +def update_rule(**kwargs) -> None: + """ + Download the latest rule yaml file. + """ + region_name = kwargs.get("region") + rule_bucket = Config().config.get(constant.RULE_BUCKET, region_name) + rule_endpoint_suffix = constant.COMMON_ENDPOINT_SUFFIX.format(region_name) + if region_name in constant.INNER_REGION_LIST: + rule_endpoint_suffix = constant.INNER_ENDPOINT_SUFFIX.format(region_name) + + obs_url = constant.HTTPS_PREFIXES + rule_bucket + "." + rule_endpoint_suffix + obs_url = parse.urljoin(obs_url, constant.COMMON_YAML_DIR) + for file_name in constant.CLOUD_YAML_NAME_LIST: + url = parse.urljoin(obs_url, file_name) + request_with_retry(url, region_name) diff --git a/requirements/test.txt b/requirements/tests.txt similarity index 95% rename from requirements/test.txt rename to requirements/tests.txt index 3bacb7ca55..bab89704aa 100644 --- a/requirements/test.txt +++ b/requirements/tests.txt @@ -1,5 +1,5 @@ -pytest==6.2.4 -pytest-cov==2.12.0 -pytest-mock==3.6.1 -pytest-cookies==0.6.1 +pytest==6.2.4 +pytest-cov==2.12.0 +pytest-mock==3.6.1 +pytest-cookies==0.6.1 mock==4.0.3 \ No newline at end of file diff --git a/version.txt b/version.txt index 7bcd0e3612..9f8e9b69a3 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.0.2 \ No newline at end of file +1.0 \ No newline at end of file -- Gitee From 1761de9838e078f50ff993c34fcfb9c39f7d02db Mon Sep 17 00:00:00 2001 From: PersonalC Date: Wed, 8 May 2024 19:35:20 +0800 Subject: [PATCH 09/21] =?UTF-8?q?att=20advisor=E6=96=B0=E5=A2=9Edataset?= =?UTF-8?q?=E6=A1=86=E6=9E=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- profiler/advisor/analyzer/base_analyzer.py | 73 +- .../analyzer/cluster/slow_link_analyser.py | 10 +- .../analyzer/cluster/slow_rank_analyser.py | 10 +- .../{computing => computation}/__init__.py | 0 .../aicpu/__init__.py | 0 .../bound/__init__.py | 0 .../op_compile/__init__.py | 0 .../analyzer/overall/overall_analyzer.py | 1 + .../{scheduling => schedule}/__init__.py | 0 .../free_event/__init__.py | 0 .../fusion_ops/__init__.py | 0 .../fusion_ops/fusion_ops_analyzer.py | 542 ++++----- profiler/advisor/common/constant.py | 12 +- .../advisor/common/timeline/fusion_ops_db.py | 340 +----- .../common/timeline/fusion_ops_rule.py | 110 ++ .../timeline/fusion_ops_rule_handler.py | 193 +++ profiler/advisor/config/config.ini | 8 +- .../dataset/cluster/cluster_dataset.py | 23 +- profiler/advisor/dataset/dataset.py | 38 + .../advisor/dataset/timeline_event_dataset.py | 26 +- profiler/advisor/interface/interface.py | 33 +- profiler/advisor/utils/utils.py | 1048 +++++++++-------- profiler/cli/analyze_cli.py | 72 +- profiler/cli/entrance.py | 2 +- profiler/cli/update_cli.py | 40 + requirements/{test.txt => tests.txt} | 8 +- version.txt | 2 +- 27 files changed, 1414 insertions(+), 1177 deletions(-) rename profiler/advisor/analyzer/{computing => computation}/__init__.py (100%) rename profiler/advisor/analyzer/{computing => computation}/aicpu/__init__.py (100%) rename profiler/advisor/analyzer/{computing => computation}/bound/__init__.py (100%) rename profiler/advisor/analyzer/{computing => computation}/op_compile/__init__.py (100%) rename profiler/advisor/analyzer/{scheduling => schedule}/__init__.py (100%) rename profiler/advisor/analyzer/{scheduling => schedule}/free_event/__init__.py (100%) rename profiler/advisor/analyzer/{scheduling => schedule}/fusion_ops/__init__.py (100%) rename profiler/advisor/analyzer/{scheduling => schedule}/fusion_ops/fusion_ops_analyzer.py (91%) create mode 100644 profiler/advisor/common/timeline/fusion_ops_rule.py create mode 100644 profiler/advisor/common/timeline/fusion_ops_rule_handler.py create mode 100644 profiler/advisor/dataset/dataset.py create mode 100644 profiler/cli/update_cli.py rename requirements/{test.txt => tests.txt} (95%) diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index ff945da5cf..6f9438ba75 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -1,17 +1,60 @@ +import logging +from functools import wraps +from typing import Dict, List, Union from abc import abstractmethod, ABCMeta + +from profiler.advisor.common import constant +from profiler.advisor.common.version_control import VersionControl +from profiler.advisor.dataset.dataset import Dataset +from profiler.advisor.result.result import OptimizeResult from profiler.advisor.display.html.render import HTMLRender -dataset_cls_list = [] +logger = logging.getLogger() + + +class BaseAnalyzer(VersionControl, metaclass=ABCMeta): + _SUPPORT_VERSIONS = constant.SUPPORTED_CANN_VERSION -class BaseAnalyzer(metaclass=ABCMeta): - def __init__(self, collection_path, dataset_cls_list, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): + dataset_cls_list = [] + + def __init__(self, collection_path, n_processes: int = 1, cann_version=constant.DEFAULT_CANN_VERSION, + torch_version=constant.DEFAULT_TORCH_VERSION, **kwargs): self.n_processes = n_processes self.cann_version = cann_version self.torch_version = torch_version self.html_render = HTMLRender() self.collection_path = collection_path self.kwargs = kwargs - self.event_dataset_list = self.get_dataset_dict(dataset_cls_list) + self.dataset_list: Dict[str, List[Dataset]] = {} + self.init_dataset_list() + self.result = OptimizeResult() + self.record_list: Dict[str, List] = {} + + @classmethod + def check_data(cls, data_list: tuple): + """ + check if all data in data list is contained + :param data_list: data list to check + :return: func ptr if check success + """ + + def decorate(func): + + @wraps(func) + def wrapper(self): + data = self.dataset_list + if data is None: + return None + for data_key in data_list: + if data_key not in data: + return None + + logger.info("Enable analysis %s with %s", self.__class__.__name__, ",".join(data_list)) + return func(self, data) + + return wrapper + + return decorate @abstractmethod def optimize(self): @@ -25,10 +68,24 @@ class BaseAnalyzer(metaclass=ABCMeta): def make_render(self): pass - def get_dataset_dict(self, dataset_cls_list): - datasets = {key: [] for key in dataset_cls_list} + def init_dataset_list(self)->None: + dataset_cls_list = self.dataset_cls_list + if len(dataset_cls_list) == 0: + logger.warning(f"Analyser: %s don't rely on any dataset!", self.__class__.__name__) + return for dataset_cls in dataset_cls_list: if dataset_cls and callable(dataset_cls): - datasets[dataset_cls] = dataset_cls(self.collection_path, **self.kwargs) - return datasets + dataset_cls(collection_path=self.collection_path, data=self.dataset_list, **self.kwargs) + + @staticmethod + def get_first_data_by_key(data, key) -> Union[Dataset, None]: + """ + get the first member from data with key + :param data: input data + :param key: data key + :return: the first dataset in dataset list + """ + if key in data and len(data[key]) > 0: + return data[key][0] + return None diff --git a/profiler/advisor/analyzer/cluster/slow_link_analyser.py b/profiler/advisor/analyzer/cluster/slow_link_analyser.py index d4212ada2c..e9143ae1de 100644 --- a/profiler/advisor/analyzer/cluster/slow_link_analyser.py +++ b/profiler/advisor/analyzer/cluster/slow_link_analyser.py @@ -14,7 +14,9 @@ # limitations under the License. from collections import defaultdict +from typing import Dict, List from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataSet @@ -35,9 +37,11 @@ class SlowLinkAnalyzer(BaseAnalyzer): SLOW_LINK_ANALYSIS = "slow_link_analysis" dataset_cls_list = [ClusterCommunicationDataSet] - def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): - super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) - self.communication_data_class = self.event_dataset_list[ClusterCommunicationDataSet] + def __init__(self, collection_path, n_processes: int = 1, cann_version=constant.DEFAULT_CANN_VERSION, + torch_version=constant.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + key = ClusterCommunicationDataSet.get_key() + self.communication_data_class = self.get_first_data_by_key(self.dataset_list, key) self.rank_bw_dict = self.communication_data_class.get_data() self.result = OptimizeResult() self.bottelneck = '' diff --git a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py index 35b4663d38..b49ef5ec8c 100644 --- a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py +++ b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py @@ -14,7 +14,9 @@ # limitations under the License. from collections import defaultdict +from typing import Dict, List from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant from profiler.advisor.result.result import OptimizeResult from profiler.advisor.result.item import OptimizeItem, OptimizeRecord from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataSet @@ -27,9 +29,11 @@ class SlowRankAnalyzer(BaseAnalyzer): BOTTLENECK_LIST = ['Computing', 'Communication', "Free"] dataset_cls_list = [ClusterStepTraceTimeDataSet] - def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): - super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) - self.step_trace_class = self.event_dataset_list[ClusterStepTraceTimeDataSet] + def __init__(self, collection_path, n_processes: int = 1, cann_version=constant.DEFAULT_CANN_VERSION, + torch_version=constant.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + key = ClusterStepTraceTimeDataSet.get_key() + self.step_trace_class = self.get_first_data_by_key(self.dataset_list, key) self.step_trace_dict = self.step_trace_class.get_data() self.result = OptimizeResult() self.bottelneck = '' diff --git a/profiler/advisor/analyzer/computing/__init__.py b/profiler/advisor/analyzer/computation/__init__.py similarity index 100% rename from profiler/advisor/analyzer/computing/__init__.py rename to profiler/advisor/analyzer/computation/__init__.py diff --git a/profiler/advisor/analyzer/computing/aicpu/__init__.py b/profiler/advisor/analyzer/computation/aicpu/__init__.py similarity index 100% rename from profiler/advisor/analyzer/computing/aicpu/__init__.py rename to profiler/advisor/analyzer/computation/aicpu/__init__.py diff --git a/profiler/advisor/analyzer/computing/bound/__init__.py b/profiler/advisor/analyzer/computation/bound/__init__.py similarity index 100% rename from profiler/advisor/analyzer/computing/bound/__init__.py rename to profiler/advisor/analyzer/computation/bound/__init__.py diff --git a/profiler/advisor/analyzer/computing/op_compile/__init__.py b/profiler/advisor/analyzer/computation/op_compile/__init__.py similarity index 100% rename from profiler/advisor/analyzer/computing/op_compile/__init__.py rename to profiler/advisor/analyzer/computation/op_compile/__init__.py diff --git a/profiler/advisor/analyzer/overall/overall_analyzer.py b/profiler/advisor/analyzer/overall/overall_analyzer.py index 93b227fb61..e31a5d4288 100644 --- a/profiler/advisor/analyzer/overall/overall_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_analyzer.py @@ -1,4 +1,5 @@ import logging +from typing import Dict, List from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.advisor.display.html.render import HTMLRender diff --git a/profiler/advisor/analyzer/scheduling/__init__.py b/profiler/advisor/analyzer/schedule/__init__.py similarity index 100% rename from profiler/advisor/analyzer/scheduling/__init__.py rename to profiler/advisor/analyzer/schedule/__init__.py diff --git a/profiler/advisor/analyzer/scheduling/free_event/__init__.py b/profiler/advisor/analyzer/schedule/free_event/__init__.py similarity index 100% rename from profiler/advisor/analyzer/scheduling/free_event/__init__.py rename to profiler/advisor/analyzer/schedule/free_event/__init__.py diff --git a/profiler/advisor/analyzer/scheduling/fusion_ops/__init__.py b/profiler/advisor/analyzer/schedule/fusion_ops/__init__.py similarity index 100% rename from profiler/advisor/analyzer/scheduling/fusion_ops/__init__.py rename to profiler/advisor/analyzer/schedule/fusion_ops/__init__.py diff --git a/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py similarity index 91% rename from profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py rename to profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py index ca10dcb1f5..4259db093b 100644 --- a/profiler/advisor/analyzer/scheduling/fusion_ops/fusion_ops_analyzer.py +++ b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py @@ -1,271 +1,271 @@ -import multiprocessing -import logging -import re - -from tqdm import tqdm - -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.common import constant as const -from profiler.advisor.common.timeline.event import TimelineEvent -from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset -from profiler.advisor.result.result import OptimizeResult -from profiler.advisor.result.item import OptimizeItem, OptimizeRecord -from profiler.advisor.utils.utils import format_timeline_result -from profiler.advisor.utils.utils import init_timeline_ops_db - -logger = logging.getLogger() - - -class TimelineFusionOpsAnalyzer(BaseAnalyzer): - dataset_cls_list = [TimelineEventDataset] - - def __init__(self, collection_path, n_processes: int = 1, cann_version=None, torch_version=None, **kwargs): - super().__init__(collection_path, self.dataset_cls_list, n_processes, cann_version, torch_version, **kwargs) - self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() - self.matched_op_stacks = {} - self.empty_stacks = True - self.result = OptimizeResult() - self.timeline_event_dataset = self.event_dataset_list[TimelineEventDataset] - - def optimize(self, **kwargs): - for mode in [const.ATEN.lower(), const.OPTIMIZER.lower()]: - - for op_combined, npu_apis in tqdm(getattr(init_timeline_ops_db(self.cann_version, self.torch_version), - f"_{mode}_op_api_map").items(), leave=False, ncols=100, - desc="Scanning timeline for affinity apis"): - for npu_api in npu_apis.split("/"): - self.find_fusion_ops(self.timeline_event_dataset, op_combined, npu_api, mode) - - self.query_stack(self.timeline_event_dataset) - - logger.info("Finish timeline analysis") - self.make_record() - self.make_render() - return self.result - - def find_fusion_ops(self, event_dataset: TimelineEventDataset, ops: str, npu_api: str, mode: str): - """ - :Param event_dataset: dataset of timeline event - :Param ops: operator combination with '-' as separator , e.g. permute-reshape - :Param npu_api: api of torch_npu, generally more efficient than torch api - :Param mode: aten or dequeue or optimizer - :Return: json of op_name and called times and detail stacks - """ - op_rule_pattern, enable_regex = self._format_rule_to_pattern(ops) - if not enable_regex: - self._match_ops(event_dataset, op_rule_pattern, npu_api, mode) - else: - try: - self._match_ops_with_regex(event_dataset, op_rule_pattern, npu_api, mode) - except Exception as e: - logger.warning("Failed to find fusion operators with regex %s, reason is %s", ops, e) - - def _match_ops(self, event_dataset: TimelineEventDataset, ops: str, npu_api: str, mode: str): - """ match operator based on fusion operators rule(without regex), - only strictly equals of op name list means matched - :Param event_dataset: dataset of timeline event - :Param ops: operator combination with '-' as separator , e.g. permute-reshape - :Param npu_api: api of torch_npu, generally more efficient than torch api - :Param mode: aten or dequeue or optimizer - """ - op_list = ops.split(const.OP_SEP) - - matched_op_index = set() - api_ops_matched = False - - for index, event in enumerate(getattr(event_dataset, mode)): - if self._replace_op_name_prefix(event.name, mode) != op_list[0]: - continue - tmp_dequeue_event_names = [self._replace_op_name_prefix(event.name, mode) for event in - getattr(event_dataset, mode)[index: index + len(op_list)]] - if tmp_dequeue_event_names != op_list: - continue - api_ops_matched = True - matched_op_index.add(event.dataset_index) - - if api_ops_matched: - self._matched_op_index[npu_api + f":{ops}"] = matched_op_index - - def _match_ops_with_regex(self, event_dataset: TimelineEventDataset, op_rule_pattern: str, npu_api: str, - mode: str): - """ match operator based on fusion operators rule(with regex), - using regex to support condition like 'a = torch.mul(xxx) if xxx else torch.add(xxx)' - :Param event_dataset: dataset of timeline event - :Param op_rule_pattern: fusion operators rule with regex definition , e.g. add-mul{0,10}, add-mul* - :Param npu_api: api of torch_npu, generally more efficient than torch api - :Param mode: aten or dequeue or optimizer - """ - matched_op_index = set() - total_op_name = "".join([f"{const.OP_SEP}{self._replace_op_name_prefix(event.name, mode)}{const.OP_SEP}" - for event in - getattr(event_dataset, mode)]) - - matched_pattern_index_tuple = [(x.start(0), x.end(0)) for x in re.finditer(op_rule_pattern, total_op_name)] - # convert list of index tuple to a whole list: [(3, 25), ...] -> [3, 25, ...] - total_ops_split_points = [num for sublist in matched_pattern_index_tuple for num in sublist] - - api_ops_matched = len(total_ops_split_points) != 0 - - op_index = [] - if 0 not in total_ops_split_points: - total_ops_split_points = [0] + total_ops_split_points - if len(list(total_op_name)) not in total_ops_split_points: - total_ops_split_points.append(len(list(total_op_name))) - - # convert total ops name like "-add-mul-xxx-div-" to small pieces like [["add", "mul"], [...], ["div"]] - # by the regex index and then calculate the real index for matched fusion operators in event dataset - for l, r in zip(total_ops_split_points, total_ops_split_points[1:]): - matched_op_flag = True if (l, r) in matched_pattern_index_tuple else False - matched_ops_list = total_op_name[l: r].strip(const.OP_SEP).split(const.OP_SEP + const.OP_SEP) - op_index.append([matched_op_flag, len(matched_ops_list)]) - for i, _ in enumerate(op_index): - if i > 0: - # calculate cumsum for indexing matched operator - op_index[i][1] = op_index[i][1] + op_index[i - 1][1] - op_index = [[False, 0]] + op_index - - for i, _ in enumerate(op_index): - if not op_index[i][0]: - continue - index = op_index[i - 1][1] - matched_op_index.add(index) - - if index > len(getattr(event_dataset, mode)) - 1: - continue - dataset_index = getattr(event_dataset, mode)[index].get("dataset_index") - matched_op_index.add(dataset_index) - - if api_ops_matched: - self._matched_op_index[npu_api + f":{op_rule_pattern}"] = sorted(list(matched_op_index)) - - def make_record(self): - """ - make record for what and how to optimize - """ - if not self.matched_op_stacks: - return - - desc = f"Found {len(format_timeline_result(self.matched_op_stacks))} apis to be replaced" \ - f" based on the runtime env cann-{self.cann_version} and torch-{self.torch_version}" - suggestion = "Please replace training api according to sub table 'Affinity training api'" - if self.empty_stacks: - desc += ", but with no stack" - suggestion = const.TIMELINE_EMPTY_STACKS_PROMPT.format( - timeline_profiling_doc_url=const.TIMELINE_WITH_STACK_DOC_URL - ) - - optimization_item = OptimizeItem( - const.AFFINITY_TRAINING_API, - desc, - [suggestion] - ) - - self.result.add(OptimizeRecord(optimization_item)) - - record_title = ["Affinity API", "Code stacks", "Stack called counts"] - self.result.add_detail(const.AFFINITY_TRAINING_API, headers=record_title) - - for api_name, stacks_info in format_timeline_result(self.matched_op_stacks).items(): - if not stacks_info: - detail = [api_name, "null", "null"] - self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) - else: - for stack in stacks_info: - detail = [api_name, *stack] - self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) - - def make_render(self): - format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True) - - self.html_render.render_template(key="scheduling", - template_dir="templates", - template_name="affinity_api.html", - cann_version=self.cann_version, - torch_version=self.torch_version, - empty_stacks=self.empty_stacks, - with_stack_doc_url=const.TIMELINE_WITH_STACK_DOC_URL, - api_doc_url=const.TIMELINE_API_DOC_URL, - result=format_result_for_html) - - def query_stack(self, event_dataset: TimelineEventDataset): - if all([len(matched_index) == 0 for matched_index in self._matched_op_index.values()]): - return - - op_stack_list = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) - for op_stack in op_stack_list: - for op_rule, stack in op_stack.items(): - if op_rule not in self.matched_op_stacks: - self.matched_op_stacks[op_rule] = {} - if stack == const.TIMELINE_FUSION_OPS_NO_STACK_FLAG: - continue - if stack not in self.matched_op_stacks[op_rule]: - self.matched_op_stacks[op_rule][stack] = 0 - self.matched_op_stacks[op_rule][stack] += 1 - - def _query_stack_by_matched_index(self, index, event): - stack_record = {} - event = TimelineEvent(event) - - matched_op_rules = [] - for op_rule, matched_index in self._matched_op_index.items(): - if index not in matched_index: - continue - - matched_op_rules.append(op_rule) - stack = event.args.get(const.CALL_STACKS) - - if not stack: - logger.debug("Got empty '%s' for event %s", const.CALL_STACKS, event) - continue - - if self.empty_stacks and stack: - self.empty_stacks = False - - stack_record[op_rule] = stack - - if matched_op_rules and not stack_record: - for op_rule in matched_op_rules: - stack_record[op_rule] = const.TIMELINE_FUSION_OPS_NO_STACK_FLAG - - return stack_record - - def _replace_op_name_prefix(self, event_name, mode): - if mode == const.DEQUEUE.lower(): - op_name_prefix = f"{const.DEQUEUE}{const.DEQUEUE_SEP}" - elif mode == const.ATEN: - op_name_prefix = f"{const.ATEN}{const.ATEN_SEP}" - else: - op_name_prefix = f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}" - - return event_name.replace(op_name_prefix, "") - - def _format_rule_to_pattern(self, op_rule): - """ - Args: - op_rule: like (mul){0,1}-(add|neg){0,2}-dropout-(softmax)* - - Returns: op_pattern like (-mul-){0,1}(-add-|-neg-){0,2}(-dropout-)(-softmax-)* - """ - enable_regex = False - if "(" not in op_rule and ")" not in op_rule: - # op_rule which requires fuzzy matching mush consist of "()" - return op_rule, enable_regex - - enable_regex = True - op_pattern_list = op_rule.split(const.OP_SEP) - format_op_pattern = "" - for op_pattern in op_pattern_list: - matched_res = re.search(r'\((.*?)\)', op_pattern) - - ops_index_range = (matched_res.start() + 1, matched_res.end() - 1) if matched_res else ( - 0, len(op_pattern)) - - op_names = op_pattern[ops_index_range[0]: ops_index_range[1]] - tmp_op_names_record = [] - for op_name in op_names.split("|"): - tmp_op_names_record.append(f"{const.OP_SEP}{op_name.strip(' ')}{const.OP_SEP}") - op_suffix = op_pattern[ops_index_range[1] + 1:] - op_names_format = f"({'|'.join(tmp_op_names_record)}){op_suffix}" - - format_op_pattern += op_names_format - return format_op_pattern, enable_regex +import multiprocessing +import logging +import re + +from tqdm import tqdm + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.utils.utils import format_timeline_result +from profiler.advisor.common.timeline.fusion_ops_db import init_timeline_ops_db + +logger = logging.getLogger() + + +class TimelineFusionOpsAnalyzer(BaseAnalyzer): + dataset_cls_list = [TimelineEventDataset] + + def __init__(self, collection_path, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, + torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + self._matched_op_index = {} if self.n_processes <= 1 else multiprocessing.Manager().dict() + self.matched_op_stacks = {} + self.empty_stacks = True + key = TimelineEventDataset.get_key() + self.timeline_event_dataset = self.get_first_data_by_key(self.dataset_list, key) + + def optimize(self, **kwargs): + for mode in [const.ATEN.lower(), const.OPTIMIZER.lower()]: + + for op_combined, npu_apis in tqdm(getattr(init_timeline_ops_db(self.cann_version, self.torch_version), + f"_{mode}_op_api_map").items(), leave=False, ncols=100, + desc="Scanning timeline for affinity apis"): + for npu_api in npu_apis.split("/"): + self.find_fusion_ops(self.timeline_event_dataset, op_combined, npu_api, mode) + + self.query_stack(self.timeline_event_dataset) + + logger.info("Finish timeline analysis") + self.make_record() + self.make_render() + return self.result + + def find_fusion_ops(self, event_dataset, ops: str, npu_api: str, mode: str): + """ + :Param event_dataset: dataset of timeline event + :Param ops: operator combination with '-' as separator , e.g. permute-reshape + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + :Return: json of op_name and called times and detail stacks + """ + op_rule_pattern, enable_regex = self._format_rule_to_pattern(ops) + if not enable_regex: + self._match_ops(event_dataset, op_rule_pattern, npu_api, mode) + else: + try: + self._match_ops_with_regex(event_dataset, op_rule_pattern, npu_api, mode) + except Exception as e: + logger.warning("Failed to find fusion operators with regex %s, reason is %s", ops, e) + + def _match_ops(self, event_dataset, ops: str, npu_api: str, mode: str): + """ match operator based on fusion operators rule(without regex), + only strictly equals of op name list means matched + :Param event_dataset: dataset of timeline event + :Param ops: operator combination with '-' as separator , e.g. permute-reshape + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + """ + op_list = ops.split(const.OP_SEP) + + matched_op_index = set() + api_ops_matched = False + + for index, event in enumerate(getattr(event_dataset, mode)): + if self._replace_op_name_prefix(event.name, mode) != op_list[0]: + continue + tmp_dequeue_event_names = [self._replace_op_name_prefix(event.name, mode) for event in + getattr(event_dataset, mode)[index: index + len(op_list)]] + if tmp_dequeue_event_names != op_list: + continue + api_ops_matched = True + matched_op_index.add(event.dataset_index) + + if api_ops_matched: + self._matched_op_index[npu_api + f":{ops}"] = matched_op_index + + def _match_ops_with_regex(self, event_dataset, op_rule_pattern: str, npu_api: str, + mode: str): + """ match operator based on fusion operators rule(with regex), + using regex to support condition like 'a = torch.mul(xxx) if xxx else torch.add(xxx)' + :Param event_dataset: dataset of timeline event + :Param op_rule_pattern: fusion operators rule with regex definition , e.g. add-mul{0,10}, add-mul* + :Param npu_api: api of torch_npu, generally more efficient than torch api + :Param mode: aten or dequeue or optimizer + """ + matched_op_index = set() + total_op_name = "".join([f"{const.OP_SEP}{self._replace_op_name_prefix(event.name, mode)}{const.OP_SEP}" + for event in + getattr(event_dataset, mode)]) + + matched_pattern_index_tuple = [(x.start(0), x.end(0)) for x in re.finditer(op_rule_pattern, total_op_name)] + # convert list of index tuple to a whole list: [(3, 25), ...] -> [3, 25, ...] + total_ops_split_points = [num for sublist in matched_pattern_index_tuple for num in sublist] + + api_ops_matched = len(total_ops_split_points) != 0 + + op_index = [] + if 0 not in total_ops_split_points: + total_ops_split_points = [0] + total_ops_split_points + if len(list(total_op_name)) not in total_ops_split_points: + total_ops_split_points.append(len(list(total_op_name))) + + # convert total ops name like "-add-mul-xxx-div-" to small pieces like [["add", "mul"], [...], ["div"]] + # by the regex index and then calculate the real index for matched fusion operators in event dataset + for l, r in zip(total_ops_split_points, total_ops_split_points[1:]): + matched_op_flag = True if (l, r) in matched_pattern_index_tuple else False + matched_ops_list = total_op_name[l: r].strip(const.OP_SEP).split(const.OP_SEP + const.OP_SEP) + op_index.append([matched_op_flag, len(matched_ops_list)]) + for i, _ in enumerate(op_index): + if i > 0: + # calculate cumsum for indexing matched operator + op_index[i][1] = op_index[i][1] + op_index[i - 1][1] + op_index = [[False, 0]] + op_index + + for i, _ in enumerate(op_index): + if not op_index[i][0]: + continue + index = op_index[i - 1][1] + matched_op_index.add(index) + + if index > len(getattr(event_dataset, mode)) - 1: + continue + dataset_index = getattr(event_dataset, mode)[index].get("dataset_index") + matched_op_index.add(dataset_index) + + if api_ops_matched: + self._matched_op_index[npu_api + f":{op_rule_pattern}"] = sorted(list(matched_op_index)) + + def make_record(self): + """ + make record for what and how to optimize + """ + if not self.matched_op_stacks: + return + + desc = f"Found {len(format_timeline_result(self.matched_op_stacks))} apis to be replaced" \ + f" based on the runtime env cann-{self.cann_version} and torch-{self.torch_version}" + suggestion = "Please replace training api according to sub table 'Affinity training api'" + if self.empty_stacks: + desc += ", but with no stack" + suggestion = const.TIMELINE_EMPTY_STACKS_PROMPT.format( + timeline_profiling_doc_url=const.TIMELINE_WITH_STACK_DOC_URL + ) + + optimization_item = OptimizeItem( + const.AFFINITY_TRAINING_API, + desc, + [suggestion] + ) + + self.result.add(OptimizeRecord(optimization_item)) + + record_title = ["Affinity API", "Code stacks", "Stack called counts"] + self.result.add_detail(const.AFFINITY_TRAINING_API, headers=record_title) + + for api_name, stacks_info in format_timeline_result(self.matched_op_stacks).items(): + if not stacks_info: + detail = [api_name, "null", "null"] + self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) + else: + for stack in stacks_info: + detail = [api_name, *stack] + self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) + + def make_render(self): + format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True) + + self.html_render.render_template(key="schedule", + template_dir="templates", + template_name="affinity_api.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + empty_stacks=self.empty_stacks, + with_stack_doc_url=const.TIMELINE_WITH_STACK_DOC_URL, + api_doc_url=const.TIMELINE_API_DOC_URL, + result=format_result_for_html) + + def query_stack(self, event_dataset): + if all([len(matched_index) == 0 for matched_index in self._matched_op_index.values()]): + return + + op_stack_list = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) + for op_stack in op_stack_list: + for op_rule, stack in op_stack.items(): + if op_rule not in self.matched_op_stacks: + self.matched_op_stacks[op_rule] = {} + if stack == const.TIMELINE_FUSION_OPS_NO_STACK_FLAG: + continue + if stack not in self.matched_op_stacks[op_rule]: + self.matched_op_stacks[op_rule][stack] = 0 + self.matched_op_stacks[op_rule][stack] += 1 + + def _query_stack_by_matched_index(self, index, event): + stack_record = {} + event = TimelineEvent(event) + + matched_op_rules = [] + for op_rule, matched_index in self._matched_op_index.items(): + if index not in matched_index: + continue + + matched_op_rules.append(op_rule) + stack = event.args.get(const.CALL_STACKS) + + if not stack: + logger.debug("Got empty '%s' for event %s", const.CALL_STACKS, event) + continue + + if self.empty_stacks and stack: + self.empty_stacks = False + + stack_record[op_rule] = stack + + if matched_op_rules and not stack_record: + for op_rule in matched_op_rules: + stack_record[op_rule] = const.TIMELINE_FUSION_OPS_NO_STACK_FLAG + + return stack_record + + def _replace_op_name_prefix(self, event_name, mode): + if mode == const.DEQUEUE.lower(): + op_name_prefix = f"{const.DEQUEUE}{const.DEQUEUE_SEP}" + elif mode == const.ATEN: + op_name_prefix = f"{const.ATEN}{const.ATEN_SEP}" + else: + op_name_prefix = f"{const.OPTIMIZER}.{const.OPTIMIZER_STEP}{const.OPTIMIZER_SEP}" + + return event_name.replace(op_name_prefix, "") + + def _format_rule_to_pattern(self, op_rule): + """ + Args: + op_rule: like (mul){0,1}-(add|neg){0,2}-dropout-(softmax)* + + Returns: op_pattern like (-mul-){0,1}(-add-|-neg-){0,2}(-dropout-)(-softmax-)* + """ + enable_regex = False + if "(" not in op_rule and ")" not in op_rule: + # op_rule which requires fuzzy matching mush consist of "()" + return op_rule, enable_regex + + enable_regex = True + op_pattern_list = op_rule.split(const.OP_SEP) + format_op_pattern = "" + for op_pattern in op_pattern_list: + matched_res = re.search(r'\((.*?)\)', op_pattern) + + ops_index_range = (matched_res.start() + 1, matched_res.end() - 1) if matched_res else ( + 0, len(op_pattern)) + + op_names = op_pattern[ops_index_range[0]: ops_index_range[1]] + tmp_op_names_record = [] + for op_name in op_names.split("|"): + tmp_op_names_record.append(f"{const.OP_SEP}{op_name.strip(' ')}{const.OP_SEP}") + op_suffix = op_pattern[ops_index_range[1] + 1:] + op_names_format = f"({'|'.join(tmp_op_names_record)}){op_suffix}" + + format_op_pattern += op_names_format + return format_op_pattern, enable_regex diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index df12fd76d3..664753c724 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -56,8 +56,8 @@ CANN_VERSION_C13 = '7.0.RC1' CANN_VERSION_C15 = '7.0.0' CANN_VERSION_C17 = '8.0.0' SUPPORTED_CANN_VERSION = [CANN_VERSION_C30, CANN_VERSION_C13, CANN_VERSION_C15, CANN_VERSION_C17] -DEFAULT_CANN_VERSION = CANN_VERSION_C15 -ASCEND_PYTORCH_PROFILER = "ascend_pytorch_proflier" +DEFAULT_CANN_VERSION = CANN_VERSION_C17 +ASCEND_PYTORCH_PROFILER = "ascend_pytorch_profiler" MSLITE = "mslite" MSPROF = "msprof" SUPPORTED_PROFILING_TYPE = [ASCEND_PYTORCH_PROFILER, MSLITE, MSPROF] @@ -83,12 +83,20 @@ ADVISOR_LOG_LEVEL = "ADVISOR_LOG_LEVEL" DEFAULT_LOG_LEVEL = "INFO" SUPPORTED_LOG_LEVEL = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] +RULE_BUCKET = "RULE-BUCKET" CLOUD_RULE_REGION_CN_NORTH_9 = "cn-north-9" CLOUD_RULE_REGION_CN_NORTH_7 = "cn-north-7" CLOUD_RULE_REGION_CN_SOUTHWEST_2 = "cn-southwest-2" CLOUD_RULE_REGION_LIST = [CLOUD_RULE_REGION_CN_NORTH_7, CLOUD_RULE_REGION_CN_NORTH_9, CLOUD_RULE_REGION_CN_SOUTHWEST_2] +INNER_REGION_LIST = [CLOUD_RULE_REGION_CN_NORTH_7] DEFAULT_CLOUD_RULE_REGION = CLOUD_RULE_REGION_CN_SOUTHWEST_2 +HTTP_PREFIXES = "http://" +HTTPS_PREFIXES = "https://" +COMMON_YAML_DIR = "modelarts/solution/ma_advisor_rules/" +COMMON_ENDPOINT_SUFFIX = "obs.{}.myhuaweicloud.com" +INNER_ENDPOINT_SUFFIX= "obs.{}.ulanqab.huawei.com" + AICPU_RULES_YAML_NAME = "aicpu_rules.yaml" FUSSION_PASS_YAML_NAME = "op_fussion_pass.yaml" TIMELINE_FUSION_OPS_YAML_NAME = "timeline_fusion_ops.yaml" diff --git a/profiler/advisor/common/timeline/fusion_ops_db.py b/profiler/advisor/common/timeline/fusion_ops_db.py index 19a86437e0..f37cfe50d1 100644 --- a/profiler/advisor/common/timeline/fusion_ops_db.py +++ b/profiler/advisor/common/timeline/fusion_ops_db.py @@ -1,330 +1,44 @@ -import copy import logging import os import yaml -from profiler.advisor.common import constant as const +from profiler.advisor.common import constant +from profiler.advisor.common.timeline.fusion_ops_rule import OpRule +from profiler.advisor.common.timeline.fusion_ops_rule_handler import TimelineOpRuleHandler from profiler.advisor.utils.log import get_log_level +from profiler.advisor.utils.utils import get_file_path_by_walk logger = logging.getLogger() logger.setLevel(get_log_level()) -class TimelineOpRuleHandler: - """基于线性规划思想保存OpRule,用于局部继承、全局继承等功能""" - - def __init__(self): - self._db_content = None - # 具体生成的timeline规则,key为unique_id - self._all_tmp_timeline_op_rule = {} - # 所有timeline规则的dict集合,key为unique_id - self._all_origin_timeline_op_rule_dict = {} - # 已生成timeline规则的id数组 - self._exist_timeline_op_rule_unique_id_list = [] - - @staticmethod - def _get_local_inherit_id_list(op_rule: dict): - local_inherit_id_list = [] - for _, val in op_rule.items(): - if val.get("inherit_unique_id") is not None: - local_inherit_id_list.append(val.get("inherit_unique_id")) - return local_inherit_id_list - - @staticmethod - def _is_duplicated_element_in_lists(list_a, list_b): - """检查两个数组中是否存在重复的元素,若有任意元素重复,返回True""" - if not isinstance(list_a, list): - list_a = [list_a] - if not isinstance(list_b, list): - list_b = [list_b] - for element in list_a: - if element in list_b: - return True - return False - - def set_db_content(self, db_content): - # 过滤非 dict 格式, 或 dict 中没有定义 unique_id 的数据, 并保存到 _all_origin_timeline_op_rule_dict 中 - self._db_content = copy.deepcopy(db_content) - for rule_dic in self._db_content: - if not isinstance(rule_dic, dict) or rule_dic.get("unique_id") is None: - continue - self._all_origin_timeline_op_rule_dict[rule_dic.get("unique_id")] = rule_dic - if self._all_origin_timeline_op_rule_dict: - self.generate_all_timeline_op_rule() - - def generate_basic_timeline_op_rules(self): - """用于实现获取无全局继承规则, 无全局继承的规则认为是基础版本规则, 默认不会存在局部继承""" - for _, rule_dic in self._all_origin_timeline_op_rule_dict.items(): - if rule_dic.get("inherit_unique_id") is None: - self.add_basic_timeline_op_rule(rule_dic) - - def add_basic_timeline_op_rule(self, rule_dic): - # 若基础规则中存在局部继承的规则,则跳过 - local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) - if local_inherit_id_list: - return - - temp_rule = OpRule() - temp_rule.merge(rule_dic.get("operator_rules")) - - unique_id = rule_dic.get("unique_id") - logger.debug("The rule of version %s is basic rule.", unique_id) - self.add_new_timeline_op_rule(unique_id, temp_rule.tmp_rule) - - def add_empty_timeline_op_rule(self, unique_id): - if self._all_origin_timeline_op_rule_dict.get(unique_id) is None: - self._all_origin_timeline_op_rule_dict[unique_id] = {} - tmp_rule = {} - logger.debug("The rule of version %s is empty.", unique_id) - self.add_new_timeline_op_rule(unique_id, tmp_rule) - - def add_new_timeline_op_rule(self, unique_id, tmp_rule): - if unique_id not in self._exist_timeline_op_rule_unique_id_list: - self._exist_timeline_op_rule_unique_id_list.append(unique_id) - self._all_tmp_timeline_op_rule[unique_id] = tmp_rule - logger.debug("The rule of version %s is successfully generated.", unique_id) - - def generate_specified_list_timeline_op_rule(self, specified_unique_id_list, kid_id_list=None): - for specified_unique_id in specified_unique_id_list: - if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: - self.generate_specified_timeline_op_rule(specified_unique_id, kid_id_list) - - def generate_specified_timeline_op_rule(self, specified_unique_id, kid_id_list=None): - """用于实现生成特定版本规则 - - 若不存在相应specified_unique_id的规则、或是已生成、循环继承等情况,将该规则置空并返回 - 规则库文件结构设置为多叉树, 结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成, - 直到specified_unique_id规则依赖继承的规则库全部生成完毕, 再生成该指定规则库, 将specified_unique_id的规则库归档 - - 参数: - specified_unique_id: 指定版本规则id - kid_id_list: 子规则id数组, 用于防止循环继承, 如间接继承自身或直接继承自身等情况 - 返回: - None - """ - if kid_id_list is None: - kid_id_list = [] - - # 若该unique_id规则在timeline_fusion_ops.yaml中没有相应的规则, 生成该id规则,置为空 - if self._all_origin_timeline_op_rule_dict.get(specified_unique_id) is None: - logger.warning("The specified version %s does not exist in the rule library. " - "Ensure that the corresponding rule is configured in the YAML file. " - "The version %s is left blank.", - specified_unique_id, - specified_unique_id) - self.add_empty_timeline_op_rule(specified_unique_id) - return - - # 若该unique_id规则已经生成,则无需再次生成 - if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: - logger.warning("The rule has been generated and does not need to be generated again. " - "Check whether unique id %s in the YAML file is duplicate.", - specified_unique_id) - return - - # 若kid_id_list不为空,且间接继承自身,则尝试生成空规则用于继承 - if kid_id_list and self._is_duplicated_element_in_lists(specified_unique_id, kid_id_list): - logger.warning("It cannot be inherited indirectly. Ensure that the corresponding rules are correctly " - "configured in the YAML file and leave Version %s blank.", - specified_unique_id) - self.add_empty_timeline_op_rule(specified_unique_id) - return - - rule_dic = self._all_origin_timeline_op_rule_dict.get(specified_unique_id) - if rule_dic is not None: - kid_id_list.append(specified_unique_id) - - global_inherit_id = rule_dic.get("inherit_unique_id") - if global_inherit_id and global_inherit_id not in self._exist_timeline_op_rule_unique_id_list: - logger.debug("The rule of version %s global inherit the rule of version %s", - specified_unique_id, global_inherit_id) - self.generate_specified_timeline_op_rule(global_inherit_id, kid_id_list) - - # 若局部继承的规则未生成, 生成该规则 - local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) - if local_inherit_id_list: - logger.debug("The rule of version %s local inherit the rule of version %s", - specified_unique_id, local_inherit_id_list) - self.generate_specified_list_timeline_op_rule(specified_unique_id_list=local_inherit_id_list, - kid_id_list=kid_id_list) - logger.debug("Start to generate rule of version %s", specified_unique_id) - # 实现全局继承与局部继承 - temp_rule = OpRule(timeline_op_rule_handler=self, - rule=self._all_tmp_timeline_op_rule.get(global_inherit_id)) - temp_rule.merge(rule_dic.get("operator_rules")) - # 将生成的规则归档保存 - self.add_new_timeline_op_rule(specified_unique_id, temp_rule.tmp_rule) - return - logger.error("Failed to generate the rule whose unique_id is %s. Ensure that the rule is configured in " - "the YAML file and the version %s is empty.", specified_unique_id, specified_unique_id) - self.add_empty_timeline_op_rule(specified_unique_id) - - def generate_all_timeline_op_rule(self): - """用于实现获取所有版本规则 - - 查找db_content中的规则库, 规则库文件结构设置为多叉树, 优先生成无继承的基础规则版本 - 循环并生成其他版本, 文件结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成,直到全部规则库生成后退出函数 - - 参数: - None - 返回: - None - """ - self.generate_basic_timeline_op_rules() - _unique_id_list = copy.deepcopy(list(self._all_origin_timeline_op_rule_dict.keys())) - for unique_id in _unique_id_list: - if unique_id in self._exist_timeline_op_rule_unique_id_list: - continue - self.generate_specified_timeline_op_rule(unique_id) - - def get_tmp_timeline_op_rule_with_unique_id(self, unique_id): - if unique_id not in self._exist_timeline_op_rule_unique_id_list: - logger.error("The specified unique_id does not exist in the rule library. Ensure that the " - "corresponding rule is configured in the YAML file and the version %s is empty." - "If the value of unique_id is a negative number, the version may not be supported.", - unique_id) - self.add_empty_timeline_op_rule(unique_id) - if unique_id < 0: - logger.error("Advise to use a positive integer as the unique id of rules. " - "Negative numbers: %s are not recommended to use as unique id. " - "If specified invalid unique id: %s is used, an empty rule is returned by default.", - unique_id, const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) - return self._all_tmp_timeline_op_rule.get(unique_id) - - -class OpRule: - - def __init__(self, rule=None, timeline_op_rule_handler=None): - if rule is None: - self._tmp_rule = {} - else: - self._tmp_rule = copy.deepcopy(rule) - if timeline_op_rule_handler is None: - self.timeline_op_rule_handler = {} - else: - self.timeline_op_rule_handler = copy.deepcopy(timeline_op_rule_handler) - self._rule = {} - - @property - def tmp_rule(self): - return self._tmp_rule - - @staticmethod - def _format_rule(rule): - """格式化规则函数, 将额外规则格式化为{key,数组list}形式, 使得yaml文件中operator_rules若写成key:str形式也能正常读取""" - format_rule = {} - for key, val in rule.items(): - if not isinstance(val, list): - val = [val] - format_rule[key] = val - return format_rule - - def merge(self, extra_rule): - """合并函数, 将已有规则库与额外规则合并, 若无继承则已有规则库应为空""" - for key, val in extra_rule.items(): - for func, op_rules in val.items(): - try: - getattr(self, f"{func}")(key, op_rules) - except AttributeError: - logger.error("Undefined field and function name. Ensure that %s is correct in the rule " - "library.", func) - - def get_final_rules(self): - """获取最终的规则库""" - self._restore_rule() - return self._rule - - def add(self, key, add_rules: dict): - """新增函数, 新增已有规则库不存在的额外规则""" - if add_rules is None: - return - if self._tmp_rule.get(key) is None: - self._tmp_rule[key] = {} - format_add_rule = self._format_rule(add_rules) - for add_key, add_val in format_add_rule.items(): - logger.debug("add: %s: %s", add_key, add_val) - if add_key not in self._tmp_rule: - self._tmp_rule[key][add_key] = add_val - else: - logger.warning("This key has been written to the rule, " - "%s: %s should be written in the overwrite section", add_key, add_val) - self._tmp_rule[key][add_key].update(add_val) - - def overwrite(self, key, overwrite_rules: dict): - """重写函数, 重写已有规则库中已经存在的规则""" - if overwrite_rules is None: - return - if self._tmp_rule.get(key) is None: - self._tmp_rule[key] = {} - format_overwrite_rules = self._format_rule(overwrite_rules) - for overwrite_key, overwrite_val in format_overwrite_rules.items(): - logger.debug("overwrite: %s: %s", overwrite_key, overwrite_val) - if overwrite_key not in self._tmp_rule: - logger.warning("This key is not written to the rule. " - "%s: %s should be written in the add section", overwrite_key, overwrite_val) - self._tmp_rule[key][overwrite_key] = overwrite_val - else: - self._tmp_rule[key][overwrite_key].update(overwrite_val) - - def exclude(self, key, exclude_rules: list): - """除外函数, 将已有规则库已有的规则除外删除""" - if exclude_rules is None: - return - for exclude_key in exclude_rules: - logger.debug("exclude: %s", exclude_key) - if isinstance(exclude_key, str): - if exclude_key not in self._tmp_rule[key]: - logger.warning("This key is not written to the rule. " - "do not need to exclude: %s.", exclude_key) - continue - self._tmp_rule[key].pop(exclude_key) - else: - logger.warning("Error type rule in exclude: %s", exclude_key) - - def inherit_unique_id(self, key, inherit_unique_id): - """局部继承函数, 将规则库中指定unique_id版本覆盖指定位置""" - result_rule = self.timeline_op_rule_handler.get_tmp_timeline_op_rule_with_unique_id(inherit_unique_id) - if result_rule is not None and result_rule.get(key) is not None: - self._tmp_rule[key] = copy.deepcopy(result_rule.get(key)) - return - logger.error("Rule library version %s does not exist. ", inherit_unique_id) - - def _restore_rule(self): - for key, op_api_map in self._tmp_rule.items(): - self._rule[key] = [{op_combined: api} for op_combined, api in op_api_map.items()] - +def init_timeline_ops_db(cann_version=None, torch_version=None): + logger.debug("init operators database") -def get_file_path_by_walk(root, filename): - file_path = "" - for root, _, files in os.walk(root, topdown=True): - for name in files: - if name == filename: - file_path = os.path.join(root, name) - return file_path - return file_path + return FusionOperatorDB(cann_version=cann_version, torch_version=torch_version) def get_timeline_fusion_ops_yaml_path(): # 环境变量 ADVISOR_RULE_PATH 不为空且该路径存在, os.walk遍历其下文件, 若存在相应的规则文件则返回路径 - advisor_rule_path = os.getenv(const.ADVISOR_RULE_PATH) + advisor_rule_path = os.getenv(constant.ADVISOR_RULE_PATH) if advisor_rule_path and os.path.exists(advisor_rule_path): - specified_file_path = get_file_path_by_walk(advisor_rule_path, const.TIMELINE_FUSION_OPS_YAML_NAME) + specified_file_path = get_file_path_by_walk(advisor_rule_path, constant.TIMELINE_FUSION_OPS_YAML_NAME) if len(specified_file_path.strip()) and os.path.exists(specified_file_path): logger.debug("Successfully find The %s file which is specified by the environment variable: %s.", - specified_file_path, const.ADVISOR_RULE_PATH) + specified_file_path, constant.ADVISOR_RULE_PATH) return specified_file_path logger.warning("The %s does not exist in path: %s. Try to use cloud or default local YAML file.", - const.TIMELINE_FUSION_OPS_YAML_NAME, os.path.normpath(advisor_rule_path)) + constant.TIMELINE_FUSION_OPS_YAML_NAME, os.path.normpath(advisor_rule_path)) # 检查云文件默认保存路径文件夹下是否存在相应文件, 默认路径 ~/rules/cloud/ - cloud_file_path = os.path.join(os.path.expanduser("~"), const.CLOUD_RULE_PATH, const.TIMELINE_FUSION_OPS_YAML_NAME) + cloud_file_path = os.path.join(os.path.expanduser("~"), constant.CLOUD_RULE_PATH, constant.TIMELINE_FUSION_OPS_YAML_NAME) if os.path.exists(cloud_file_path): - logger.debug("Successfully find The cloud %s file in %s.", const.TIMELINE_FUSION_OPS_YAML_NAME, + logger.debug("Successfully find The cloud %s file in %s.", constant.TIMELINE_FUSION_OPS_YAML_NAME, cloud_file_path) return cloud_file_path # 检查本地默认文件 local_file_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), - const.DEFAULT_RULE_PATH, const.TIMELINE_FUSION_OPS_YAML_NAME) + constant.DEFAULT_RULE_PATH, constant.TIMELINE_FUSION_OPS_YAML_NAME) if not os.path.exists(local_file_path): # 若本地默认文件不存在, 则log异常信息并 logger.error("The default local YAML file does not exist. Please check the YAML file in the default path %s.", @@ -337,8 +51,8 @@ class FusionOperatorDB: def __init__(self, file_path=None, cann_version=None, torch_version=None): self.timeline_fusion_ops_yaml_path = os.path.normpath(get_timeline_fusion_ops_yaml_path()) - self.cann_version = cann_version or const.DEFAULT_CANN_VERSION - self.torch_version = torch_version or const.DEFAULT_TORCH_VERSION + self.cann_version = cann_version or constant.DEFAULT_CANN_VERSION + self.torch_version = torch_version or constant.DEFAULT_TORCH_VERSION self._supported_version_dict = {} @@ -379,9 +93,9 @@ class FusionOperatorDB: return self._optimizer_op_api_map def get_fusion_operator_with_unique_id(self, unique_id): - if unique_id == const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID: + if unique_id == constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID: logger.warning("The specified unique id: %s is invalid.Please check whether the rule of the unique id " - "exists and modify the rule.", const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) + "exists and modify the rule.", constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) return {} result_tmp_rule = self.timeline_op_rule_handler.get_tmp_timeline_op_rule_with_unique_id(unique_id) result_op_rule = OpRule(result_tmp_rule) @@ -411,7 +125,7 @@ class FusionOperatorDB: def _is_version_supported(self, db_content): """校验当前版本是否被规则库中的版本支持, 保存版本支持信息数组, 按数组或字符串的可变方式保存""" - if db_content is None : + if db_content is None: logger.warning( "The rule library is empty. Check the rule library file: %s", self.timeline_fusion_ops_yaml_path @@ -455,18 +169,18 @@ class FusionOperatorDB: def _is_version_supported_in_supported_version_dict(self, cann_version=None, torch_version=None): """校验当前版本是否存在在规则库中的版本支持字典中""" for _, supported_version in self._supported_version_dict.items(): - if self._is_version_supported_in_version(supported_version, cann_version, torch_version): + if self._is_version_supported_in_versions(supported_version, cann_version, torch_version): return True return False def _get_unique_id_in_supported_version_dict(self, cann_version=None, torch_version=None) -> int: """校验当前版本是否存在在规则库中的版本支持字典中, 在使用前请检查是否支持该版本""" for key_unique_id, supported_version in self._supported_version_dict.items(): - if self._is_version_supported_in_version(supported_version, cann_version, torch_version): + if self._is_version_supported_in_versions(supported_version, cann_version, torch_version): return key_unique_id - return const.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID + return constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID - def _is_version_supported_in_version(self, supported_version, cann_version=None, torch_version=None): + def _is_version_supported_in_versions(self, supported_version, cann_version=None, torch_version=None): """校验当前cann版本和torch版本是否存在在规则库中的版本支持数组的元素中""" cann_version_list = supported_version[0] if not isinstance(cann_version_list, list): @@ -485,9 +199,9 @@ class FusionOperatorDB: def _parse_db(self): """生成输出的规则库""" - self._parse(const.ATEN) - self._parse(const.DEQUEUE) - self._parse(const.OPTIMIZER) + self._parse(constant.ATEN) + self._parse(constant.DEQUEUE) + self._parse(constant.OPTIMIZER) def _parse(self, mode): """生成输出的规则库中指定部分, 如aten, Optimizer等""" @@ -521,7 +235,7 @@ class FusionOperatorDB: if not os.path.exists(file_path): logger.warning("Path: '%s' does not exist, please specific existed path of " "fusion operators yaml file by setting env '%s'", - os.path.abspath(file_path), const.ADVISOR_RULE_PATH) + os.path.abspath(file_path), constant.ADVISOR_RULE_PATH) self.is_empty = True return {} diff --git a/profiler/advisor/common/timeline/fusion_ops_rule.py b/profiler/advisor/common/timeline/fusion_ops_rule.py new file mode 100644 index 0000000000..deee68edb9 --- /dev/null +++ b/profiler/advisor/common/timeline/fusion_ops_rule.py @@ -0,0 +1,110 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. +import copy +import logging + +from profiler.advisor.utils.log import get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) + + +class OpRule: + + def __init__(self, rule=None, timeline_op_rule_handler=None): + if rule is None: + self._tmp_rule = {} + else: + self._tmp_rule = copy.deepcopy(rule) + if timeline_op_rule_handler is None: + self.timeline_op_rule_handler = {} + else: + self.timeline_op_rule_handler = copy.deepcopy(timeline_op_rule_handler) + self._rule = {} + + @property + def tmp_rule(self): + return self._tmp_rule + + @staticmethod + def _format_rule(rule): + """格式化规则函数, 将额外规则格式化为{key,数组list}形式, 使得yaml文件中operator_rules若写成key:str形式也能正常读取""" + format_rule = {} + for key, val in rule.items(): + if not isinstance(val, list): + val = [val] + format_rule[key] = val + return format_rule + + def merge(self, extra_rule): + """合并函数, 将已有规则库与额外规则合并, 若无继承则已有规则库应为空""" + for key, val in extra_rule.items(): + for func, op_rules in val.items(): + try: + getattr(self, f"{func}")(key, op_rules) + except AttributeError: + logger.error("Undefined field and function name. Ensure that %s is correct in the rule " + "library.", func) + + def get_final_rules(self): + """获取最终的规则库""" + self._restore_rule() + return self._rule + + def add(self, key, add_rules: dict): + """新增函数, 新增已有规则库不存在的额外规则""" + if add_rules is None: + return + if self._tmp_rule.get(key) is None: + self._tmp_rule[key] = {} + format_add_rule = self._format_rule(add_rules) + for add_key, add_val in format_add_rule.items(): + logger.debug("add: %s: %s", add_key, add_val) + if add_key not in self._tmp_rule: + self._tmp_rule[key][add_key] = add_val + else: + logger.warning("This key has been written to the rule, " + "%s: %s should be written in the overwrite section", add_key, add_val) + self._tmp_rule[key][add_key].update(add_val) + + def overwrite(self, key, overwrite_rules: dict): + """重写函数, 重写已有规则库中已经存在的规则""" + if overwrite_rules is None: + return + if self._tmp_rule.get(key) is None: + self._tmp_rule[key] = {} + format_overwrite_rules = self._format_rule(overwrite_rules) + for overwrite_key, overwrite_val in format_overwrite_rules.items(): + logger.debug("overwrite: %s: %s", overwrite_key, overwrite_val) + if overwrite_key not in self._tmp_rule: + logger.warning("This key is not written to the rule. " + "%s: %s should be written in the add section", overwrite_key, overwrite_val) + self._tmp_rule[key][overwrite_key] = overwrite_val + else: + self._tmp_rule[key][overwrite_key].update(overwrite_val) + + def exclude(self, key, exclude_rules: list): + """除外函数, 将已有规则库已有的规则除外删除""" + if exclude_rules is None: + return + for exclude_key in exclude_rules: + logger.debug("exclude: %s", exclude_key) + if isinstance(exclude_key, str): + if exclude_key not in self._tmp_rule[key]: + logger.warning("This key is not written to the rule. " + "do not need to exclude: %s.", exclude_key) + continue + self._tmp_rule[key].pop(exclude_key) + else: + logger.warning("Error type rule in exclude: %s", exclude_key) + + def inherit_unique_id(self, key, inherit_unique_id): + """局部继承函数, 将规则库中指定unique_id版本覆盖指定位置""" + result_rule = self.timeline_op_rule_handler.get_tmp_timeline_op_rule_with_unique_id(inherit_unique_id) + if result_rule is not None and result_rule.get(key) is not None: + self._tmp_rule[key] = copy.deepcopy(result_rule.get(key)) + return + logger.error("Rule library version %s does not exist. ", inherit_unique_id) + + def _restore_rule(self): + for key, op_api_map in self._tmp_rule.items(): + self._rule[key] = [{op_combined: api} for op_combined, api in op_api_map.items()] diff --git a/profiler/advisor/common/timeline/fusion_ops_rule_handler.py b/profiler/advisor/common/timeline/fusion_ops_rule_handler.py new file mode 100644 index 0000000000..b0558cca6d --- /dev/null +++ b/profiler/advisor/common/timeline/fusion_ops_rule_handler.py @@ -0,0 +1,193 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. +import copy +import logging + +from profiler.advisor.common import constant +from profiler.advisor.common.timeline.fusion_ops_rule import OpRule +from profiler.advisor.utils.log import get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) + + +class TimelineOpRuleHandler: + """基于线性规划思想保存OpRule,用于局部继承、全局继承等功能""" + + def __init__(self): + self._db_content = None + # 具体生成的timeline规则,key为unique_id + self._all_tmp_timeline_op_rule = {} + # 所有timeline规则的dict集合,key为unique_id + self._all_origin_timeline_op_rule_dict = {} + # 已生成timeline规则的id数组 + self._exist_timeline_op_rule_unique_id_list = [] + + @staticmethod + def _get_local_inherit_id_list(op_rule: dict): + local_inherit_id_list = [] + for _, val in op_rule.items(): + if val.get("inherit_unique_id") is not None: + local_inherit_id_list.append(val.get("inherit_unique_id")) + return local_inherit_id_list + + @staticmethod + def _is_duplicated_element_in_lists(list_a, list_b): + """检查两个数组中是否存在重复的元素,若有任意元素重复,返回True""" + if not isinstance(list_a, list): + list_a = [list_a] + if not isinstance(list_b, list): + list_b = [list_b] + # 将两个数组合并为一个列表,使用集合(set)判断列表中是否存在重复元素 + combined_list = list_a + list_b + if len(combined_list) != len(set(combined_list)): + return True + return False + + def set_db_content(self, db_content): + # 过滤非 dict 格式, 或 dict 中没有定义 unique_id 的数据, 并保存到 _all_origin_timeline_op_rule_dict 中 + self._db_content = copy.deepcopy(db_content) + for rule_dic in self._db_content: + if not isinstance(rule_dic, dict) or rule_dic.get("unique_id") is None: + continue + self._all_origin_timeline_op_rule_dict[rule_dic.get("unique_id")] = rule_dic + if self._all_origin_timeline_op_rule_dict: + self.generate_all_timeline_op_rule() + + def generate_basic_timeline_op_rules(self): + """用于实现获取无全局继承规则, 无全局继承的规则认为是基础版本规则, 默认不会存在局部继承""" + for _, rule_dic in self._all_origin_timeline_op_rule_dict.items(): + if rule_dic.get("inherit_unique_id") is None: + self.add_basic_timeline_op_rule(rule_dic) + + def add_basic_timeline_op_rule(self, rule_dic): + # 若基础规则中存在局部继承的规则,则跳过 + local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) + if local_inherit_id_list: + return + + temp_rule = OpRule() + temp_rule.merge(rule_dic.get("operator_rules")) + + unique_id = rule_dic.get("unique_id") + logger.debug("The rule of version %s is basic rule.", unique_id) + self.add_new_timeline_op_rule(unique_id, temp_rule.tmp_rule) + + def add_empty_timeline_op_rule(self, unique_id): + if self._all_origin_timeline_op_rule_dict.get(unique_id) is None: + self._all_origin_timeline_op_rule_dict[unique_id] = {} + tmp_rule = {} + logger.debug("The rule of version %s is empty.", unique_id) + self.add_new_timeline_op_rule(unique_id, tmp_rule) + + def add_new_timeline_op_rule(self, unique_id, tmp_rule): + if unique_id not in self._exist_timeline_op_rule_unique_id_list: + self._exist_timeline_op_rule_unique_id_list.append(unique_id) + self._all_tmp_timeline_op_rule[unique_id] = tmp_rule + logger.debug("The rule of version %s is successfully generated.", unique_id) + + def generate_specified_list_timeline_op_rule(self, specified_unique_id_list, kid_id_list=None): + for specified_unique_id in specified_unique_id_list: + if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: + self.generate_specified_timeline_op_rule(specified_unique_id, kid_id_list) + + def generate_specified_timeline_op_rule(self, specified_unique_id, kid_id_list=None): + """用于实现生成特定版本规则 + + 若不存在相应specified_unique_id的规则、或是已生成、循环继承等情况,将该规则置空并返回 + 规则库文件结构设置为多叉树, 结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成, + 直到specified_unique_id规则依赖继承的规则库全部生成完毕, 再生成该指定规则库, 将specified_unique_id的规则库归档 + + 参数: + specified_unique_id: 指定版本规则id + kid_id_list: 子规则id数组, 用于防止循环继承, 如间接继承自身或直接继承自身等情况 + 返回: + None + """ + if kid_id_list is None: + kid_id_list = [] + + # 若该unique_id规则在timeline_fusion_ops.yaml中没有相应的规则, 生成该id规则,置为空 + if self._all_origin_timeline_op_rule_dict.get(specified_unique_id) is None: + logger.warning("The specified version %s does not exist in the rule library. " + "Ensure that the corresponding rule is configured in the YAML file. " + "The version %s is left blank.", + specified_unique_id, + specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + return + + # 若该unique_id规则已经生成,则无需再次生成 + if specified_unique_id in self._exist_timeline_op_rule_unique_id_list: + logger.warning("The rule has been generated and does not need to be generated again. " + "Check whether unique id %s in the YAML file is duplicate.", + specified_unique_id) + return + + # 若kid_id_list不为空,且间接继承自身,则尝试生成空规则用于继承 + if kid_id_list and self._is_duplicated_element_in_lists(specified_unique_id, kid_id_list): + logger.warning("It cannot be inherited indirectly. Ensure that the corresponding rules are correctly " + "configured in the YAML file and leave Version %s blank.", + specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + return + + rule_dic = self._all_origin_timeline_op_rule_dict.get(specified_unique_id) + if rule_dic is not None: + kid_id_list.append(specified_unique_id) + + global_inherit_id = rule_dic.get("inherit_unique_id") + if global_inherit_id and global_inherit_id not in self._exist_timeline_op_rule_unique_id_list: + logger.debug("The rule of version %s global inherit the rule of version %s", + specified_unique_id, global_inherit_id) + self.generate_specified_timeline_op_rule(global_inherit_id, kid_id_list) + + # 若局部继承的规则未生成, 生成该规则 + local_inherit_id_list = self._get_local_inherit_id_list(rule_dic.get("operator_rules")) + if local_inherit_id_list: + logger.debug("The rule of version %s local inherit the rule of version %s", + specified_unique_id, local_inherit_id_list) + self.generate_specified_list_timeline_op_rule(specified_unique_id_list=local_inherit_id_list, + kid_id_list=kid_id_list) + logger.debug("Start to generate rule of version %s", specified_unique_id) + # 实现全局继承与局部继承 + temp_rule = OpRule(timeline_op_rule_handler=self, + rule=self._all_tmp_timeline_op_rule.get(global_inherit_id)) + temp_rule.merge(rule_dic.get("operator_rules")) + # 将生成的规则归档保存 + self.add_new_timeline_op_rule(specified_unique_id, temp_rule.tmp_rule) + return + logger.error("Failed to generate the rule whose unique_id is %s. Ensure that the rule is configured in " + "the YAML file and the version %s is empty.", specified_unique_id, specified_unique_id) + self.add_empty_timeline_op_rule(specified_unique_id) + + def generate_all_timeline_op_rule(self): + """用于实现获取所有版本规则 + + 查找db_content中的规则库, 规则库文件结构设置为多叉树, 优先生成无继承的基础规则版本 + 循环并生成其他版本, 文件结构决定了不断向下搜索最终应该是从基础版本开始继承, 递归生成,直到全部规则库生成后退出函数 + + 参数: + None + 返回: + None + """ + self.generate_basic_timeline_op_rules() + _unique_id_list = copy.deepcopy(list(self._all_origin_timeline_op_rule_dict.keys())) + for unique_id in _unique_id_list: + if unique_id in self._exist_timeline_op_rule_unique_id_list: + continue + self.generate_specified_timeline_op_rule(unique_id) + + def get_tmp_timeline_op_rule_with_unique_id(self, unique_id): + if unique_id not in self._exist_timeline_op_rule_unique_id_list: + logger.error("The specified unique_id does not exist in the rule library. Ensure that the " + "corresponding rule is configured in the YAML file and the version %s is empty." + "If the value of unique_id is a negative number, the version may not be supported.", + unique_id) + self.add_empty_timeline_op_rule(unique_id) + if unique_id < 0: + logger.error("Advise to use a positive integer as the unique id of rules. " + "Negative numbers: %s are not recommended to use as unique id. " + "If specified invalid unique id: %s is used, an empty rule is returned by default.", + unique_id, constant.TIMELINE_FUSION_OPS_INVALID_UNIQUE_ID) + return self._all_tmp_timeline_op_rule.get(unique_id) diff --git a/profiler/advisor/config/config.ini b/profiler/advisor/config/config.ini index b8f6703685..7185a6b3bf 100644 --- a/profiler/advisor/config/config.ini +++ b/profiler/advisor/config/config.ini @@ -9,8 +9,8 @@ tune_ops_file = operator_tuning_file.cfg [THRESHOLD] # operator_bound_ratio: (mte, cube, vector, scalar) ratio greater than this value will be checked in operator_bound_checker operator_bound_ratio = 0.8 -[RULE] +[RULE-BUCKET] # region : URL of different regions where can download rule yaml file -cn-north-9 = https://cnnorth9-modelarts-sdk.obs.cn-north-9.myhuaweicloud.com/modelarts/solution/ma_advisor_rules/ -cn-southwest-2 = https://cnsouthwest2-modelarts-sdk.obs.cn-southwest-2.myhuaweicloud.com/modelarts/solution/ma_advisor_rules/ -cn-north-7 = https://cnnorth7-modelarts-sdk.obs.cn-north-7.ulanqab.huawei.com/modelarts/solution/ma_advisor_rules/ \ No newline at end of file +cn-north-9 = cnnorth9-modelarts-sdk +cn-southwest-2 = cnsouthwest2-modelarts-sdk +cn-north-7 = cnnorth7-modelarts-sdk \ No newline at end of file diff --git a/profiler/advisor/dataset/cluster/cluster_dataset.py b/profiler/advisor/dataset/cluster/cluster_dataset.py index ee8b3563b7..4db50464ef 100644 --- a/profiler/advisor/dataset/cluster/cluster_dataset.py +++ b/profiler/advisor/dataset/cluster/cluster_dataset.py @@ -2,6 +2,7 @@ import logging import os +from profiler.advisor.dataset.dataset import Dataset from profiler.advisor.utils.utils import singleton from profiler.cluster_analyse.common_func.file_manager import FileManager from profiler.advisor.common import constant as const @@ -13,10 +14,10 @@ from profiler.advisor.dataset.cluster.cluster_step_trace_time_bean import Cluste logger = logging.getLogger() -class ClusterDataset: +class ClusterDataset(Dataset): - def __init__(self, collection_path, **kwargs) -> None: - self.collection_path = os.path.realpath(collection_path) + def __init__(self, collection_path, data: dict, **kwargs) -> None: + super().__init__(collection_path, data) if not self.is_cluster_analysis_output_exist(): self.cluster_analyze() @@ -64,14 +65,14 @@ class ClusterDataset: class ClusterStepTraceTimeDataSet(ClusterDataset): RANK = "rank" - def __init__(self, collection_path: str, kwargs: dict = None): - super().__init__(collection_path) + def __init__(self, collection_path: str, data: dict, kwargs: dict = None): self._step_dict = defaultdict() - self.parse() + super().__init__(collection_path, data) - def parse(self): + def _parse(self): step_data = self.load_csv_data(const.CLUSTER_STEP_TIME_CSV, ClusterStepTraceTimeBean) self._step_dict = self.formate_data(step_data) + return True def formate_data(self, step_data: list): step_dict = defaultdict(lambda: [0, 0, 0]) @@ -100,15 +101,14 @@ class ClusterCommunicationDataSet(ClusterDataset): SDMA = "SDMA" RDMA = "RDMA" - def __init__(self, collection_path: str, kwargs: dict = None): - super().__init__(collection_path) + def __init__(self, collection_path: str, data: dict, kwargs: dict = None): self.rank_bw_dict = defaultdict(lambda: { self.RDMA_TIME_MS: 0, self.RDMA_SIZE_MB: 0, self.SDMA_TIME_MS: 0, self.SDMA_SIZE_MB: 0, }) - self.parse() + super().__init__(collection_path, data) @staticmethod def compute_ratio(dividend: float, divisor: float): @@ -117,9 +117,10 @@ class ClusterCommunicationDataSet(ClusterDataset): else: return round(dividend / divisor, 4) - def parse(self): + def _parse(self): communication_json = self.load_json_data(const.CLUSTER_COMM_JSON) self.process(communication_json) + return True def process(self, communication_json: dict): for comm_group, group_dict in communication_json.items(): diff --git a/profiler/advisor/dataset/dataset.py b/profiler/advisor/dataset/dataset.py new file mode 100644 index 0000000000..7f1e40a38b --- /dev/null +++ b/profiler/advisor/dataset/dataset.py @@ -0,0 +1,38 @@ +""" +dataset module +""" +import logging +import os + +from profiler.advisor.config.config import Config + +logger = logging.getLogger() + + +class Dataset: + """ + :param collection_path: dataSet absolute path + dataset base class + """ + + def __init__(self, collection_path, data=None) -> None: + if data is None: + data = {} + self.collection_path = os.path.abspath(os.path.join(Config().work_path, collection_path)) + logger.debug("init %s with %s", self.__class__.__name__, self.collection_path) + if self._parse(): + key = self.get_key() + if key not in data: + data[key] = [] + data[key].append(self) + + def _parse(self): + return None + + @classmethod + def get_key(cls): + """ + get key of dataset + :return: key + """ + return cls.__name__.rsplit('.', maxsplit=1)[-1] diff --git a/profiler/advisor/dataset/timeline_event_dataset.py b/profiler/advisor/dataset/timeline_event_dataset.py index c1134a9784..9b4c102dff 100644 --- a/profiler/advisor/dataset/timeline_event_dataset.py +++ b/profiler/advisor/dataset/timeline_event_dataset.py @@ -2,6 +2,7 @@ import logging from typing import List import ijson +from profiler.advisor.dataset.dataset import Dataset from tqdm import tqdm from profiler.advisor.common import constant as const @@ -13,17 +14,17 @@ logger = logging.getLogger() @singleton -class TimelineEventDataset: +class TimelineEventDataset(Dataset): - def __init__(self, root_dir, **kwargs) -> None: + def __init__(self, collection_path, data: dict, **kwargs) -> None: self._ops_with_task_type = {} self._ops_with_stack = {} self._torch_to_npu = {} self._acl_to_npu = set() self._aten: List[str] = [] self._optimizer: List[str] = [] - self.timeline_dir = root_dir - self.timeline_data_list = get_file_path_from_directory(root_dir, lambda file: file.endswith("trace_view.json")) + self.timeline_dir = collection_path + self.timeline_data_list = get_file_path_from_directory(collection_path, lambda file: file.endswith("trace_view.json")) self.dataset_len = None self.analysis_mode = kwargs.get("analysis_mode") self.task_type = kwargs.get("task_type") @@ -34,13 +35,14 @@ class TimelineEventDataset: logger.info("Load fusion operators database for cann version '%s' and torch version '%s'", self.cann_version, self.torch_version) - self.parse() + super().__init__(collection_path, data) if self.analysis_mode in ["op_stack", "all"]: self._task_op_names = list(set([event_key.split("-")[0] for event_key in self._ops_with_task_type.keys()])) self._post_process() + @property def ops_with_stack(self): return self._ops_with_stack @@ -69,23 +71,15 @@ class TimelineEventDataset: def aten(self): return self._aten - @classmethod - def get_key(cls): - """ - get key of dataset - :return: key - """ - return cls.__module__.rsplit('.', maxsplit=1)[-1] - - def parse(self): + def _parse(self): if len(self.timeline_data_list) == 0: logger.warning("Please ensure trace_view.json in %s, skip timeline analysis.", self.timeline_dir) return False if len(self.timeline_data_list) > 1: - logger.warning("Please ensure only one trace_view.json in %s, skip timeline analysis.", self.timeline_dir) - return False + logger.warning("Please ensure only one trace_view.json in %s, there will analysis first timeline profiling data.", self.timeline_dir) + self.timeline_data_list = [self.timeline_data_list[0]] result = self.parse_data_with_generator(self._add_event) diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 19da350a02..156922f4d1 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -1,47 +1,44 @@ import os -from profiler.advisor.analyzer.scheduling.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer -from profiler.advisor.analyzer.overall.overall_analyzer import OverallSummaryAnalyzer -from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer from profiler.advisor.utils.utils import Timer -from profiler.advisor.result.result import OptimizeResult from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyzer from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer class Interface: - supported_analysiser = { - "computing": [], - "scheduling": [TimelineFusionOpsAnalyzer], + supported_analyzer = { + "schedule": [TimelineFusionOpsAnalyzer], + "computation": [], "communication": [], "overall": [], "dataloader": [], "cluster": [SlowRankAnalyzer, SlowLinkAnalyzer] } - all_dimension = supported_analysiser.keys() + all_dimension = supported_analyzer.keys() def __init__(self, **kwargs): self.collection_path = os.path.realpath(kwargs.get("profiling_path")) @staticmethod - def get_analyzer(dimension, is_inference=False): - return Interface.supported_analysiser.get(dimension, []) + def get_analyzer(dimension): + return Interface.supported_analyzer.get(dimension, []) def get_result(self: any, dimension: str, render_html=False, **kwargs): """ :Param mode: affinity apis, ai cpu and so on. """ result_list = [] - analysiser_list = self.get_analyzer(dimension, kwargs.get("is_inference", False)) - for idx, clss in enumerate(analysiser_list): + analyzer_list = self.get_analyzer(dimension) + for idx, clss in enumerate(analyzer_list): if clss and callable(clss): - analysiser = clss(collection_path = self.collection_path, **kwargs) - result_list.append(analysiser.optimize()) - if render_html and idx == len(analysiser_list) - 1: - if hasattr(analysiser, "html_render"): - analysiser.html_render.render_html() - analysiser.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') + analyzer = clss(collection_path = self.collection_path, **kwargs) + result_list.append(analyzer.optimize(**kwargs)) + if render_html and idx == len(analyzer_list) - 1: + if hasattr(analyzer, "html_render"): + analyzer.html_render.render_html() + analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') return result_list diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index d7837e1e40..1a4444f1ec 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -1,499 +1,549 @@ -import json -import logging -import multiprocessing as mp -import os -import queue -import re -import stat -import time -import traceback -import types -from functools import wraps -from typing import Any, Set - -import click -import requests -from requests.adapters import HTTPAdapter -from tqdm import tqdm - -from profiler.advisor.common import constant as const -from profiler.advisor.common.timeline.fusion_ops_db import FusionOperatorDB -from profiler.advisor.common.version_control import VersionControl -from profiler.advisor.utils.log import init_logger, get_log_level - -logger = logging.getLogger() -logger.setLevel(get_log_level()) -permission_warned: Set = set() - - -def ignore_warning(exception: Exception = None): - return exception - - -class ContextObject(object): - def __init__(self): - self._debug = False - - def set_debug(self, debug=False): - self._debug = debug - - @property - def debug_mode(self): - return self._debug - - -def debug_option(f): - return click.option('--debug', '-D', - is_flag=True, - expose_value=False, - is_eager=True, - callback=init_logger, - help="Debug Mode. Shows full stack trace when error occurs.")(f) - - -def singleton(cls): - """ - :param cls: any class - :return: singleton handle - """ - _instance = {} - - def _singleton(*args: any, **kw: any) -> any: - if cls not in _instance: - _instance[cls] = cls(*args, **kw) - return _instance.get(cls) - - return _singleton - - -def lazy_property(func): - """ - Lazy loading of class attributes. - which is calculated only once when it is called for the first time, - and will not be repeated for each call after that. - """ - attr_name = "_lazy_" + func.__name__ - - @property - def _lazy_property(instance): - if not hasattr(instance, attr_name): - setattr(instance, attr_name, func(instance)) - return getattr(instance, attr_name) - - return _lazy_property - - -class CheckPathAccess: - """ - check path access permissions - """ - - # pylint: disable=no-member - def __init__(self, func): - wraps(func)(self) - self.warned = permission_warned - - def __call__(self, *args, **kwargs): - path = args[0] - if not os.access(path, os.R_OK) and path not in self.warned: - logger.warning("%s can not read, check the permissions", path) - self.warned.add(path) - return self.__wrapped__(*args, **kwargs) - - def __get__(self, instance, cls): - if instance is None: - return self - return types.MethodType(self, instance) - - -def walk_error_handler(error): - """ - handle dir walk error - """ - if error.filename not in permission_warned: - logger.warning(error) - permission_warned.add(error.filename) - - -@CheckPathAccess -def get_file_path_from_directory(path: str, check_func: Any) -> list: - """ - get file from directory - """ - file_list = [] - for root, _, files in os.walk(path, onerror=walk_error_handler): - for filename in files: - filepath = os.path.join(root, filename) - if check_func(filename): - file_list.append(filepath) - return file_list - - -@singleton -class Timer: - def __init__(self): - self.strftime = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) - - -def get_analyze_processes(): - # n_processes not exposed to user through ma-advisor command arguments now - return min(int(os.getenv(const.MA_ADVISOR_ANALYZE_PROCESSES, 1)), const.MA_ADVISOR_MAX_PROCESSES) - - -def init_timeline_ops_db(cann_version=None, torch_version=None): - logger.debug("init operators database") - - return FusionOperatorDB(cann_version=cann_version, torch_version=torch_version) - - -def format_timeline_result(result: dict, dump_html=False): - """ - :Param result: json for api name and stack - :Return: json after format - """ - format_result = {} - if dump_html: - result = json.loads(json.dumps(result).replace("\\r\\n", "
").replace("", "<module>")) - - for key, stacks in result.items(): - api_name = key.split(":")[0] - format_result[api_name] = sorted(list(stacks.items()), key=lambda stack: stack[1], reverse=True) - return format_result - - -class ParallelJob: - - def __init__(self, src_func, ops_api_list, job_name=None): - if not callable(src_func): - raise TypeError(f"src_func should be callable") - - if not isinstance(ops_api_list, (list, tuple)): - raise TypeError(f"ops_api_list should be list or tuple") - - self.src_func = src_func - self.ops_api_list = ops_api_list - self.job_name = job_name - - def start(self, n_proccesses): - - queue = mp.Queue(len(self.ops_api_list)) - completed_queue = mp.Queue() - for i in range(len(self.ops_api_list)): - queue.put(i) - - processes = [] - listen = mp.Process(target=self.listener, args=(completed_queue, len(self.ops_api_list),)) - listen.start() - - for i in range(n_proccesses): - p = mp.Process(target=self.parallel_queue, args=(queue, completed_queue,)) - processes.append(p) - p.start() - - for p in processes: - p.join() - - completed_queue.put(None) - listen.join() - - def listener(self, completed_queue, num): - pbar = tqdm(total=num, position=0, leave=False, ncols=100, desc=self.job_name) - for _ in iter(completed_queue.get, None): - pbar.update() - pbar.refresh() - pbar.n = num - - def parallel_queue(self, job_queue, completed_queue): - while True: - try: - if job_queue.empty(): - break - token = job_queue.get(timeout=1) - except queue.Empty: - continue - self.src_func(*self.ops_api_list[token]) - completed_queue.put(token) - - -def mp_queue_to_list(job_queue): - queue_list = [] - while True: - try: - if job_queue.empty(): - break - token = job_queue.get(timeout=1) - queue_list.append(token) - except queue.Empty: - continue - return queue_list - - -def load_parameter(parameter, default): - if not os.environ.get(parameter, None): - return default - else: - return os.environ.get(parameter) - - -def get_supported_subclass(clazz: VersionControl.__class__, cann_version: str): - """ - Returns a list of subclasses that support the specified version - :param clazz: Class name which is extends to VersionControl.__class__ - :param cann_version: The CANN software version - :return: The list of subclasses that support the specified CANN version - """ - # 获取所有支持这个cann版本的子类 - dataset_classes = clazz.__subclasses__() - sub_class_list = [cls for cls in dataset_classes if cls.is_supported(cann_version)] - logger.debug("The support subclass list is %s, cann version is %s", str(sub_class_list), cann_version) - return sub_class_list - - -def to_percent(num: float) -> str: - """ - change float to percent format - """ - num = num * 100 - return f"{num:.2f}%" - - -def safe_division(numerator, denominator): - """Return 0 if denominator is 0.""" - return denominator and numerator / denominator - - -def safe_write(content, save_path): - if os.path.dirname(save_path) != "": - os.makedirs(os.path.dirname(save_path), exist_ok=True) - - with os.fdopen(os.open(save_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, - stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP), "w") as f: - f.write(content) - - -def create_directory_for_file(file: str) -> None: - """ - create directory for file - """ - dirname = os.path.dirname(file) - if not os.path.exists(dirname): - os.makedirs(dirname) - - -class CheckPathAccess: - """ - check path access permissions - """ - - # pylint: disable=no-member - def __init__(self, func): - wraps(func)(self) - self.warned = permission_warned - - def __call__(self, *args, **kwargs): - path = args[0] - if path and not os.access(path, os.R_OK) and path not in self.warned: - logger.warning("%s can not read, check the permissions", path) - self.warned.add(path) - return self.__wrapped__(*args, **kwargs) - - def __get__(self, instance, cls): - if instance is None: - return self - return types.MethodType(self, instance) - - -@CheckPathAccess -def get_file_path_from_directory(path, check_func): - """ - get file from directory - """ - file_list = [] - - if not path: - return file_list - - if not os.path.isdir(path): - logger.warning("Expected existed directory, but got %s", path) - - for root, _, files in os.walk(path): - for filename in files: - filepath = os.path.join(root, filename) - if check_func(filename): - file_list.append(filepath) - return file_list - - -@CheckPathAccess -def get_dir_path_from_directory(path: str, check_func: Any) -> list: - """ - get file from directory - """ - file_list = [] - for root, _, files in os.walk(path, onerror=walk_error_handler): - for filename in files: - filepath = os.path.join(root, filename) - if check_func(filename): - file_list.append(filepath) - return file_list - - -def is_regex_pattern(string: str): - """ - Check if str is a regular expression. - """ - escaped_string = re.escape(string) - return not (escaped_string == string) - - -def join_prof_path(root_dir: str, sub_dir: str) -> str: - """ - regular expression matching method for path concatenation - """ - if is_regex_pattern(sub_dir): - for root, _, _ in os.walk(root_dir, onerror=walk_error_handler): - if re.match(sub_dir, os.path.basename(root)): - return root - else: - sub_dir = os.path.join(root_dir, sub_dir) - if os.path.exists(sub_dir): - return sub_dir - return "" - - -def format_excel_title(title: str) -> str: - """ - format excel title - """ - title = title.lower() - title = title.replace("(us)", '') - title = title.replace("(ns)", '') - title = title.replace("(%)", '') - title = title.replace(" ", "_") - return title - - -def format_float(num: float) -> float: - """ - format float num, round to 2 decimal places - """ - return round(num, 2) - - -class SafeOpen: - """ - safe open to check file - """ - - # pylint: disable=consider-using-with - def __init__(self, name, mode='r', encoding=None): - self.file = None - if not os.path.exists(name): - logger.warning("%s not exist, please check", name) - return - - if os.access(name, os.R_OK): - self.file = open(name, mode, encoding=encoding, errors="ignore") - else: - logger.warning("%s can not read, check the permissions", name) - - def __enter__(self): - return self.file - - def __exit__(self, exc_type, exc_val, exc_tb): - if self.file: - self.file.close() - return True - - -def save_downloaded_file(response, url_path, file_save_path): - """保存响应体中的文件 - - 参数: - response: 请求后获取的响应体 - url_path: url路径 - file_save_path: 保存路径 - 返回: - final_file_path: 文件保存绝对路径 - """ - # 获取url路径中的文件名, 拼接在保存路径下 - file_save_path = os.path.normpath(file_save_path) - file_name = os.path.basename(url_path) - final_file_path = os.path.join(file_save_path, file_name) - # 若目标保存路径不存在,则自动生成 - if not os.path.exists(file_save_path): - os.makedirs(file_save_path) - if response.status_code <= 300: - logger.debug("Response status code is %s", response.status_code) - flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL - modes = stat.S_IWUSR | stat.S_IRUSR - # 若文件已存在,则移除已有的文件并保存最新的文件 - if os.path.exists(final_file_path): - os.remove(final_file_path) - # 保存文件 - with os.fdopen(os.open(final_file_path, flags, modes), mode="wb") as f: - f.write(response.content) - logger.info("Success to save content in: %s", os.path.abspath(final_file_path)) - else: - # 若响应码不为预期的数值, 显示相应告警 - logger.warning("Failed to save the response body. The response status code is %s. " - "Please check the network or file URL", response.status_code) - - -def request_with_retry(url_path): - """使用requests请求获取文件, 失败则进行重试, 最多请求 max_retries+1 次 - - 参数: - url_path: URL路径 - file_save_path: 云文件保存路径 - """ - logger.debug("Requesting or retrying to get %s", url_path) - - # 若从环境变量指定了保存路径,优先从环境变量中获取,若为空则使用默认的云文件保存路径constant.CLOUD_RULE_PATH - file_save_path = os.path.join(os.path.expanduser("~"), const.CLOUD_RULE_PATH) - if os.getenv(const.ADVISOR_RULE_PATH): - file_save_path = os.getenv(const.ADVISOR_RULE_PATH) - - session = requests.Session() - # 使用session发起的所有请求, 默认最多会重试 max_retries 次, 计入最初请求, 最差情况下请求 max_retries+1 次 - adapter = HTTPAdapter(max_retries=const.MAX_RETRIES) - session.mount('http://', adapter) - session.mount('https://', adapter) - - logger.debug('Session try to get response') - response = None - try: - response = session.get(url_path, timeout=const.TIMEOUT) - except Exception as e: - logger.debug("Error: %s: %s", e, traceback.format_exc()) - - if response is None: - logger.warning("Fail to download: %s, response is None, " - "please use the environment variable %s for more detailed information", - url_path, const.ADVISOR_LOG_LEVEL) - else: - try: - # 若响应码为400~600之间,response.raise_for_status抛出HTTPError错误, 跳过调用save_downloaded_file函数逻辑 - response.raise_for_status() - save_downloaded_file(response, url_path=url_path, file_save_path=file_save_path) - except Exception as e: - logger.warning("Error: %s: %s", e, traceback.format_exc()) - # 关闭 session, 清除所有装配器 - session.close() - - -def read_csv(file): - import csv - - raw_data = [] - logger.debug("Parse file %s", file) - with SafeOpen(file, encoding="utf-8") as csv_file: - try: - csv_content = csv.reader(csv_file) - for row in csv_content: - raw_data.append(row) - except OSError as error: - logger.error("Read csv file failed : %s", error) - return [] - - return raw_data +import inspect +import json +import logging +import multiprocessing as mp +import os +import queue +import re +import stat +import time +import traceback +import types +from functools import wraps +from typing import Any, Set + +import click +import requests +from requests.adapters import HTTPAdapter +from tqdm import tqdm + +from profiler.advisor.common import constant as const +from profiler.advisor.common.version_control import VersionControl +from profiler.advisor.utils.log import init_logger, get_log_level + +logger = logging.getLogger() +logger.setLevel(get_log_level()) +permission_warned: Set = set() + + +def ignore_warning(exception: Exception = None): + return exception + + +class ContextObject(object): + def __init__(self): + self._debug = False + + def set_debug(self, debug=False): + self._debug = debug + + @property + def debug_mode(self): + return self._debug + + +def debug_option(f): + return click.option('--debug', '-D', + is_flag=True, + expose_value=False, + is_eager=True, + callback=init_logger, + help="Debug Mode. Shows full stack trace when error occurs.")(f) + + +def get_class_absolute_path(cls): + module = inspect.getmodule(cls) + if module is not None: + module_path = module.__name__ + class_name = cls.__name__ + return f"{module_path}.{class_name}" + else: + return None + + +def is_static_func(function_obj): + return isinstance(function_obj, staticmethod) + + +def singleton(cls): + """ + :param cls: any class + :return: singleton handle + + When using the singleton function, you need to manually specify arg='dataSet_path'. Otherwise, the singleton function + is initialized by class name. + if cls has 'arg' property, _instance map will build by class_name and 'arg', the default value of + collection path is class absolute path. + + _instance = {cls.name: {collection_path: instance}} + """ + _instance = {} + + def _singleton(*args: any, **kw: any) -> any: + collection_path = kw.get("collection_path") + if not collection_path: + collection_path = get_class_absolute_path(cls) + if cls in _instance and collection_path in _instance[cls]: + return _instance[cls].get(collection_path) + if cls not in _instance: + _instance[cls] = {collection_path: cls(*args, **kw)} + else: + _instance[cls][collection_path] = cls(*args, **kw) + return _instance[cls].get(collection_path) + + # 保留原始类的属性和方法 + _singleton.__name__ = cls.__name__ + _singleton.__module__ = cls.__module__ + _singleton.__doc__ = cls.__doc__ + + # 拷贝原始类的类方法和静态方法 + _singleton.__dict__.update(cls.__dict__) + for base_class in inspect.getmro(cls)[::-1]: + # 获取类的所有成员 + members = inspect.getmembers(base_class) + + # 过滤出函数对象 + function_objs = [member[1] for member in members if inspect.isfunction(member[1]) or inspect.ismethod(member[1])] + for function_obj in function_objs: + if inspect.isfunction(function_obj) and not is_static_func(function_obj): + continue + setattr(_singleton, function_obj.__name__, function_obj) + + return _singleton + + +def lazy_property(func): + """ + Lazy loading of class attributes. + which is calculated only once when it is called for the first time, + and will not be repeated for each call after that. + """ + attr_name = "_lazy_" + func.__name__ + + @property + def _lazy_property(instance): + if not hasattr(instance, attr_name): + setattr(instance, attr_name, func(instance)) + return getattr(instance, attr_name) + + return _lazy_property + + +class CheckPathAccess: + """ + check path access permissions + """ + + # pylint: disable=no-member + def __init__(self, func): + wraps(func)(self) + self.warned = permission_warned + + def __call__(self, *args, **kwargs): + path = args[0] + if not os.access(path, os.R_OK) and path not in self.warned: + logger.warning("%s can not read, check the permissions", path) + self.warned.add(path) + return self.__wrapped__(*args, **kwargs) + + def __get__(self, instance, cls): + if instance is None: + return self + return types.MethodType(self, instance) + + +def walk_error_handler(error): + """ + handle dir walk error + """ + if error.filename not in permission_warned: + logger.warning(error) + permission_warned.add(error.filename) + + +@CheckPathAccess +def get_file_path_from_directory(path: str, check_func: Any) -> list: + """ + get file from directory + """ + file_list = [] + for root, _, files in os.walk(path, onerror=walk_error_handler): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +@singleton +class Timer: + def __init__(self): + self.strftime = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) + + +def get_analyze_processes(): + # n_processes not exposed to user through att-advisor command arguments now + return min(int(os.getenv(const.MA_ADVISOR_ANALYZE_PROCESSES, 1)), const.MA_ADVISOR_MAX_PROCESSES) + + +def format_timeline_result(result: dict, dump_html=False): + """ + :Param result: json for api name and stack + :Return: json after format + """ + format_result = {} + if dump_html: + result = json.loads(json.dumps(result).replace("\\r\\n", "
").replace("", "<module>")) + + for key, stacks in result.items(): + api_name = key.split(":")[0] + format_result[api_name] = sorted(list(stacks.items()), key=lambda stack: stack[1], reverse=True) + return format_result + + +class ParallelJob: + + def __init__(self, src_func, ops_api_list, job_name=None): + if not callable(src_func): + raise TypeError(f"src_func should be callable") + + if not isinstance(ops_api_list, (list, tuple)): + raise TypeError(f"ops_api_list should be list or tuple") + + self.src_func = src_func + self.ops_api_list = ops_api_list + self.job_name = job_name + + def start(self, n_proccesses): + + job_queue = mp.Queue(len(self.ops_api_list)) + completed_queue = mp.Queue() + for i in range(len(self.ops_api_list)): + job_queue.put(i) + + processes = [] + listen = mp.Process(target=self.listener, args=(completed_queue, len(self.ops_api_list),)) + listen.start() + + for i in range(n_proccesses): + p = mp.Process(target=self.parallel_queue, args=(job_queue, completed_queue,)) + processes.append(p) + p.start() + + for p in processes: + p.join() + + completed_queue.put(None) + listen.join() + + def listener(self, completed_queue, num): + pbar = tqdm(total=num, position=0, leave=False, ncols=100, desc=self.job_name) + for _ in iter(completed_queue.get, None): + pbar.update() + pbar.refresh() + pbar.n = num + + def parallel_queue(self, job_queue, completed_queue): + while True: + try: + if job_queue.empty(): + break + token = job_queue.get(timeout=1) + except queue.Empty: + continue + self.src_func(*self.ops_api_list[token]) + completed_queue.put(token) + + +def mp_queue_to_list(job_queue): + queue_list = [] + while True: + try: + if job_queue.empty(): + break + token = job_queue.get(timeout=1) + queue_list.append(token) + except queue.Empty: + continue + return queue_list + + +def load_parameter(parameter, default): + if not os.environ.get(parameter, None): + return default + else: + return os.environ.get(parameter) + + +def get_supported_subclass(clazz: VersionControl.__class__, cann_version: str): + """ + Returns a list of subclasses that support the specified version + :param clazz: Class name which is extends to VersionControl.__class__ + :param cann_version: The CANN software version + :return: The list of subclasses that support the specified CANN version + """ + # 获取所有支持这个cann版本的子类 + dataset_classes = clazz.__subclasses__() + sub_class_list = [cls for cls in dataset_classes if cls.is_supported(cann_version)] + logger.debug("The support subclass list is %s, cann version is %s", str(sub_class_list), cann_version) + return sub_class_list + + +def to_percent(num: float) -> str: + """ + change float to percent format + """ + num = num * 100 + return f"{num:.2f}%" + + +def safe_division(numerator, denominator): + """Return 0 if denominator is 0.""" + return denominator and numerator / denominator + + +def safe_write(content, save_path): + if os.path.dirname(save_path) != "": + os.makedirs(os.path.dirname(save_path), exist_ok=True) + + with os.fdopen(os.open(save_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, + stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP), "w") as f: + f.write(content) + + +def create_directory_for_file(file: str) -> None: + """ + create directory for file + """ + dirname = os.path.dirname(file) + if not os.path.exists(dirname): + os.makedirs(dirname) + + +class CheckPathAccess: + """ + check path access permissions + """ + + # pylint: disable=no-member + def __init__(self, func): + wraps(func)(self) + self.warned = permission_warned + + def __call__(self, *args, **kwargs): + path = args[0] + if path and not os.access(path, os.R_OK) and path not in self.warned: + logger.warning("%s can not read, check the permissions", path) + self.warned.add(path) + return self.__wrapped__(*args, **kwargs) + + def __get__(self, instance, cls): + if instance is None: + return self + return types.MethodType(self, instance) + + +@CheckPathAccess +def get_file_path_from_directory(path, check_func): + """ + get file from directory + """ + file_list = [] + + if not path: + return file_list + + if not os.path.isdir(path): + logger.warning("Expected existed directory, but got %s", path) + + for root, _, files in os.walk(path): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +@CheckPathAccess +def get_dir_path_from_directory(path: str, check_func: Any) -> list: + """ + get file from directory + """ + file_list = [] + for root, _, files in os.walk(path, onerror=walk_error_handler): + for filename in files: + filepath = os.path.join(root, filename) + if check_func(filename): + file_list.append(filepath) + return file_list + + +def is_regex_pattern(string: str): + """ + Check if str is a regular expression. + """ + escaped_string = re.escape(string) + return not (escaped_string == string) + + +def join_prof_path(root_dir: str, sub_dir: str) -> str: + """ + regular expression matching method for path concatenation + """ + if is_regex_pattern(sub_dir): + for root, _, _ in os.walk(root_dir, onerror=walk_error_handler): + if re.match(sub_dir, os.path.basename(root)): + return root + else: + sub_dir = os.path.join(root_dir, sub_dir) + if os.path.exists(sub_dir): + return sub_dir + return "" + + +def format_excel_title(title: str) -> str: + """ + format excel title + """ + title = title.lower() + title = title.replace("(us)", '') + title = title.replace("(ns)", '') + title = title.replace("(%)", '') + title = title.replace(" ", "_") + return title + + +def format_float(num: float) -> float: + """ + format float num, round to 2 decimal places + """ + return round(num, 2) + + +class SafeOpen: + """ + safe open to check file + """ + + # pylint: disable=consider-using-with + def __init__(self, name, mode='r', encoding=None): + self.file = None + if not os.path.exists(name): + logger.warning("%s not exist, please check", name) + return + + if os.access(name, os.R_OK): + self.file = open(name, mode, encoding=encoding, errors="ignore") + else: + logger.warning("%s can not read, check the permissions", name) + + def __enter__(self): + return self.file + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.file: + self.file.close() + return True + + +def save_downloaded_file(response, url_path, file_save_path): + """保存响应体中的文件 + + 参数: + response: 请求后获取的响应体 + url_path: url路径 + file_save_path: 保存路径 + 返回: + final_file_path: 文件保存绝对路径 + """ + # 获取url路径中的文件名, 拼接在保存路径下 + file_save_path = os.path.normpath(file_save_path) + file_name = os.path.basename(url_path) + final_file_path = os.path.join(file_save_path, file_name) + # 若目标保存路径不存在,则自动生成 + if not os.path.exists(file_save_path): + os.makedirs(file_save_path) + if response.status_code <= 300: + logger.debug("Response status code is %s", response.status_code) + flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL + modes = stat.S_IWUSR | stat.S_IRUSR + # 若文件已存在,则移除已有的文件并保存最新的文件 + if os.path.exists(final_file_path): + os.remove(final_file_path) + # 保存文件 + with os.fdopen(os.open(final_file_path, flags, modes), mode="wb") as f: + f.write(response.content) + logger.info("Success to save content in: %s", os.path.abspath(final_file_path)) + else: + # 若响应码不为预期的数值, 显示相应告警 + logger.warning("Failed to save the response body. The response status code is %s. " + "Please check the network or try another region", response.status_code) + + +def request_with_retry(url_path, region_name=None): + """使用requests请求获取文件, 失败则进行重试, 最多请求 max_retries+1 次 + + 参数: + url_path: URL路径 + file_save_path: 云文件保存路径 + """ + logger.debug("Requesting or retrying to get file from region: %s", region_name) + + # 若从环境变量指定了保存路径,优先从环境变量中获取,若为空则使用默认的云文件保存路径constant.CLOUD_RULE_PATH + file_save_path = os.path.join(os.path.expanduser("~"), const.CLOUD_RULE_PATH) + if os.getenv(const.ADVISOR_RULE_PATH): + file_save_path = os.getenv(const.ADVISOR_RULE_PATH) + + session = requests.Session() + # 使用session发起的所有请求, 默认最多会重试 max_retries 次, 计入最初请求, 最差情况下请求 max_retries+1 次 + adapter = HTTPAdapter(max_retries=const.MAX_RETRIES) + session.mount(const.HTTP_PREFIXES, adapter) + session.mount(const.HTTPS_PREFIXES, adapter) + + logger.debug('Session try to get response') + response = None + try: + response = session.get(url_path, timeout=const.TIMEOUT) + except Exception as e: + logger.debug("Error: %s: %s", e, traceback.format_exc()) + + if response is None: + logger.warning("Fail to download file from region: %s, response is None, " + "please use the environment variable %s for more detailed information", + region_name, const.ADVISOR_LOG_LEVEL) + else: + try: + # 若响应码为400~600之间,response.raise_for_status抛出HTTPError错误, 跳过调用save_downloaded_file函数逻辑 + response.raise_for_status() + save_downloaded_file(response, url_path=url_path, file_save_path=file_save_path) + except Exception as e: + logger.warning("Error: %s: %s", e, traceback.format_exc()) + # 关闭 session, 清除所有装配器 + session.close() + + +def read_csv(file): + import csv + + raw_data = [] + logger.debug("Parse file %s", file) + with SafeOpen(file, encoding="utf-8") as csv_file: + try: + csv_content = csv.reader(csv_file) + for row in csv_content: + raw_data.append(row) + except OSError as error: + logger.error("Read csv file failed : %s", error) + return [] + + return raw_data + + +def get_file_path_by_walk(root, filename): + file_path = "" + for root, _, files in os.walk(root, topdown=True): + for name in files: + if name == filename: + file_path = os.path.join(root, name) + return file_path + return file_path diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 2efecffcb7..95d8fa2f53 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -19,8 +19,8 @@ def _analyze(dimensions, **kwargs): job_list = [] for dimension in dimensions: - interface = Interface(**kwargs) - job_list.append((dimension, interface)) + interface = Interface(**kwargs) + job_list.append((dimension, interface)) for i, (dimension, interface) in enumerate(job_list[::-1]): result_list += interface.get_result(dimension, render_html=i == len(job_list) - 1) @@ -41,8 +41,9 @@ def analyze_cli(**kwargs): name="all", short_help='Analyze timeline, operators and graph.') @click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, - help='path of trace_view.json in profiling') -@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) + help='Directory of profiling data') +@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path(), + help='Directory of benchmark profiling data, used for compare performance') @click.option('--cann_version', '-cv', 'cann_version', type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), default=constant.DEFAULT_CANN_VERSION, @@ -52,7 +53,14 @@ def analyze_cli(**kwargs): type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), default=constant.DEFAULT_TORCH_VERSION, help='The runtime torch version, which can be detected by exec command "pip show torch"') -@click.option('--is_inference', is_flag=True) +# @click.option('--is_inference', is_flag=True, help="Enable performance analysis of inference task") +@click.option("-pt", + "--profiling_type", + metavar="", + default=constant.ASCEND_PYTORCH_PROFILER, + required=False, + type=click.Choice(constant.SUPPORTED_PROFILING_TYPE), + help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") @debug_option def analyze_all(**kwargs) -> None: # 当前compare_tools必须输入两个profiling路径,att-advisor有等价功能支持输入一个Profiling路径,后续替换成对应实现 @@ -66,8 +74,7 @@ def analyze_all(**kwargs) -> None: name="communication", short_help='Analyze timeline, operators and graph.') @click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, - help='path of trace_view.json in profiling') -@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) + help='Directory of profiling data') @click.option('--cann_version', '-cv', 'cann_version', type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), default=constant.DEFAULT_CANN_VERSION, @@ -77,19 +84,16 @@ def analyze_all(**kwargs) -> None: type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), default=constant.DEFAULT_TORCH_VERSION, help='The runtime torch version, which can be detected by exec command "pip show torch"') -@click.option('--mode', '-m', 'mode', default=None) -@click.option('--is_inference', is_flag=True) @debug_option def analyze_communication(**kwargs) -> None: _analyze(["communication"], **kwargs) @analyze_cli.command(context_settings=CONTEXT_SETTINGS, - name="scheduling", + name="schedule", short_help='Analyze timeline, operators and graph.') @click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, - help='path of trace_view.json in profiling') -@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) + help='Directory of profiling data') @click.option('--cann_version', '-cv', 'cann_version', type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), default=constant.DEFAULT_CANN_VERSION, @@ -99,19 +103,16 @@ def analyze_communication(**kwargs) -> None: type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), default=constant.DEFAULT_TORCH_VERSION, help='The runtime torch version, which can be detected by exec command "pip show torch"') -@click.option('--mode', '-m', 'mode', default=None) -@click.option('--is_inference', is_flag=True) @debug_option -def analyze_scheduling(**kwargs) -> None: - _analyze(["scheduling"], **kwargs) +def analyze_schedule(**kwargs) -> None: + _analyze(["schedule"], **kwargs) @analyze_cli.command(context_settings=CONTEXT_SETTINGS, - name="computing", + name="computation", short_help='Analyze timeline, operators and graph.') @click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, - help='path of trace_view.json in profiling') -@click.option('--benchmark_profiling_path', '-bp', 'benchmark_profiling_path', type=click.Path()) + help='Directory of profiling data') @click.option('--cann_version', '-cv', 'cann_version', type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), default=constant.DEFAULT_CANN_VERSION, @@ -121,8 +122,33 @@ def analyze_scheduling(**kwargs) -> None: type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), default=constant.DEFAULT_TORCH_VERSION, help='The runtime torch version, which can be detected by exec command "pip show torch"') -@click.option('--mode', '-m', 'mode', default=None) -@click.option('--is_inference', is_flag=True) +@click.option("-pt", + "--profiling_type", + metavar="", + default=constant.ASCEND_PYTORCH_PROFILER, + required=False, + type=click.Choice(constant.SUPPORTED_PROFILING_TYPE), + help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") @debug_option -def analyze_computing(**kwargs) -> None: - _analyze(["computing"], **kwargs) +def analyze_computation(**kwargs) -> None: + _analyze(["computation"], **kwargs) + + +@analyze_cli.command(context_settings=CONTEXT_SETTINGS, + name="dataloader", + short_help='Analyze timeline, operators and graph.') +@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, + help='Directory of profiling data') +@click.option('--cann_version', '-cv', 'cann_version', + type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), + default=constant.DEFAULT_CANN_VERSION, + help='The CANN software version, which can be viewed by executing the following command: ' + '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') +@click.option('--torch_version', '-tv', 'torch_version', + type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), + default=constant.DEFAULT_TORCH_VERSION, + help='The runtime torch version, which can be detected by exec command "pip show torch"') +@click.option('--is_inference', is_flag=True, help="Enable performance analysis of inference task") +@debug_option +def analyze_dataloader(**kwargs) -> None: + _analyze(["dataloader"], **kwargs) diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index b14d3dfd86..d9b5b10da7 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -53,7 +53,7 @@ advisor_cli.add_command(compare_cli, name="compare") if __name__ == '__main__': advisor_cli.main( - ["analyze", "scheduling", "-d", + ["analyze", "schedule", "-d", r"/home/ma-user/work/profiling", ] ) diff --git a/profiler/cli/update_cli.py b/profiler/cli/update_cli.py new file mode 100644 index 0000000000..9407981ae0 --- /dev/null +++ b/profiler/cli/update_cli.py @@ -0,0 +1,40 @@ +from urllib import parse + +import click + +from profiler.advisor.common import constant +from profiler.advisor.config.config import Config +from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup +from profiler.advisor.utils.utils import debug_option, request_with_retry + + +@click.group(name="update", cls=ClickAliasedGroup) +def update_cli(**kwargs): + """Update operation command, such as update rule and specify save path.""" + pass + + +@update_cli.command(context_settings=CONTEXT_SETTINGS, + name="rule", + short_help='Update the ma-advisor rules on the terminal. The default save path is ' + '"~/rules/cloud/". If user want to specify the save path, please use the environment ' + 'variable "ADVISOR_RULE_PATH"') +@click.option('--region', '-r', type=click.Choice(constant.CLOUD_RULE_REGION_LIST), required=True, + default=constant.DEFAULT_CLOUD_RULE_REGION, + help='Specifies the region where the rule file is downloaded.') +@debug_option +def update_rule(**kwargs) -> None: + """ + Download the latest rule yaml file. + """ + region_name = kwargs.get("region") + rule_bucket = Config().config.get(constant.RULE_BUCKET, region_name) + rule_endpoint_suffix = constant.COMMON_ENDPOINT_SUFFIX.format(region_name) + if region_name in constant.INNER_REGION_LIST: + rule_endpoint_suffix = constant.INNER_ENDPOINT_SUFFIX.format(region_name) + + obs_url = constant.HTTPS_PREFIXES + rule_bucket + "." + rule_endpoint_suffix + obs_url = parse.urljoin(obs_url, constant.COMMON_YAML_DIR) + for file_name in constant.CLOUD_YAML_NAME_LIST: + url = parse.urljoin(obs_url, file_name) + request_with_retry(url, region_name) diff --git a/requirements/test.txt b/requirements/tests.txt similarity index 95% rename from requirements/test.txt rename to requirements/tests.txt index 3bacb7ca55..bab89704aa 100644 --- a/requirements/test.txt +++ b/requirements/tests.txt @@ -1,5 +1,5 @@ -pytest==6.2.4 -pytest-cov==2.12.0 -pytest-mock==3.6.1 -pytest-cookies==0.6.1 +pytest==6.2.4 +pytest-cov==2.12.0 +pytest-mock==3.6.1 +pytest-cookies==0.6.1 mock==4.0.3 \ No newline at end of file diff --git a/version.txt b/version.txt index 7bcd0e3612..9f8e9b69a3 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.0.2 \ No newline at end of file +1.0 \ No newline at end of file -- Gitee From c7bb1aa18ec9eb77c3018f273692950f2f9ba4e2 Mon Sep 17 00:00:00 2001 From: wuyuhan Date: Tue, 14 May 2024 15:13:06 +0800 Subject: [PATCH 10/21] =?UTF-8?q?advisor=20interface=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E5=9B=9B=E7=BA=A7=E5=91=BD=E4=BB=A4=E5=8F=82=E6=95=B0=EF=BC=8C?= =?UTF-8?q?analyzer=5Fcli=E5=85=A5=E5=8F=A3=E5=A4=84=E8=AF=86=E5=88=AB?= =?UTF-8?q?=E6=98=AF=E5=90=A6=E6=98=AF=E9=9B=86=E7=BE=A4=E5=9C=BA=E6=99=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../fusion_ops/fusion_ops_analyzer.py | 9 +-- profiler/advisor/common/analyzer_scopes.py | 7 +++ profiler/advisor/interface/interface.py | 57 ++++++++++++------- profiler/advisor/result/result.py | 4 ++ profiler/cli/analyze_cli.py | 25 ++++++-- 5 files changed, 72 insertions(+), 30 deletions(-) create mode 100644 profiler/advisor/common/analyzer_scopes.py diff --git a/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py index 4259db093b..01613dbe32 100644 --- a/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py +++ b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py @@ -6,6 +6,7 @@ from tqdm import tqdm from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.advisor.common import constant as const +from profiler.advisor.common.analyzer_scopes import SupportedScopes from profiler.advisor.common.timeline.event import TimelineEvent from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset from profiler.advisor.result.item import OptimizeItem, OptimizeRecord @@ -155,7 +156,7 @@ class TimelineFusionOpsAnalyzer(BaseAnalyzer): ) optimization_item = OptimizeItem( - const.AFFINITY_TRAINING_API, + SupportedScopes.TIMELINE_FUSION_OPS, desc, [suggestion] ) @@ -163,16 +164,16 @@ class TimelineFusionOpsAnalyzer(BaseAnalyzer): self.result.add(OptimizeRecord(optimization_item)) record_title = ["Affinity API", "Code stacks", "Stack called counts"] - self.result.add_detail(const.AFFINITY_TRAINING_API, headers=record_title) + self.result.add_detail(SupportedScopes.TIMELINE_FUSION_OPS, headers=record_title) for api_name, stacks_info in format_timeline_result(self.matched_op_stacks).items(): if not stacks_info: detail = [api_name, "null", "null"] - self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) + self.result.add_detail(SupportedScopes.TIMELINE_FUSION_OPS, detail=detail) else: for stack in stacks_info: detail = [api_name, *stack] - self.result.add_detail(const.AFFINITY_TRAINING_API, detail=detail) + self.result.add_detail(SupportedScopes.TIMELINE_FUSION_OPS, detail=detail) def make_render(self): format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True) diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py new file mode 100644 index 0000000000..9a83adeb6d --- /dev/null +++ b/profiler/advisor/common/analyzer_scopes.py @@ -0,0 +1,7 @@ +class SupportedScopes: + + # used for specify fourth-level commands and define the key of the result dict + # the key defined bellow must be the same as value + TIMELINE_FUSION_OPS = "timeline_fusion_ops" + SLOW_RANK = "slow_rank" + SLOW_LINK = "slow_link" diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 156922f4d1..c920ad4ea2 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -1,6 +1,8 @@ +from collections import OrderedDict import os from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer +from profiler.advisor.common.analyzer_scopes import SupportedScopes from profiler.advisor.utils.utils import Timer from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyzer from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer @@ -8,38 +10,51 @@ from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyze class Interface: supported_analyzer = { - "schedule": [TimelineFusionOpsAnalyzer], - "computation": [], - "communication": [], - "overall": [], - "dataloader": [], - "cluster": [SlowRankAnalyzer, SlowLinkAnalyzer] + "schedule": OrderedDict({ + SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer + }), + "computation": OrderedDict(), + "communication": OrderedDict(), + "overall": OrderedDict(), + "dataloader": OrderedDict(), + "cluster": OrderedDict({ + SupportedScopes.SKOW_RANK: SlowRankAnalyzer, + SupportedScopes.SLOW_LINK: SlowLinkAnalyzer + }) } - all_dimension = supported_analyzer.keys() + all_dimension = list(supported_analyzer.keys()) def __init__(self, **kwargs): self.collection_path = os.path.realpath(kwargs.get("profiling_path")) @staticmethod - def get_analyzer(dimension): - return Interface.supported_analyzer.get(dimension, []) + def get_scope(dimension): + return list(Interface.supported_analyzer.get(dimension).keys()) - def get_result(self: any, dimension: str, render_html=False, **kwargs): + @staticmethod + def get_analyzer(dimension, scope): + return Interface.supported_analyzer.get(dimension).get(scope) + + def get_result(self: any, dimension: str, scope: str, render_html=False, output_dict=True, **kwargs): """ :Param mode: affinity apis, ai cpu and so on. """ - result_list = [] - analyzer_list = self.get_analyzer(dimension) - for idx, clss in enumerate(analyzer_list): - if clss and callable(clss): - analyzer = clss(collection_path = self.collection_path, **kwargs) - result_list.append(analyzer.optimize(**kwargs)) - if render_html and idx == len(analyzer_list) - 1: - if hasattr(analyzer, "html_render"): - analyzer.html_render.render_html() - analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') - return result_list + if dimension not in self.all_dimension: + raise ValueError(f"Error dimension {dimension}, supported dimensions are {self.all_dimension}") + + supported_scopes = self.get_scope(dimension) + if scope not in supported_scopes: + raise ValueError(f"Error scope {scope}, supported scopes are {supported_scopes}") + + analyzer = self.get_analyzer(dimension, scope)(collection_path=self.collection_path, **kwargs) + result = analyzer.optimize(**kwargs) + + if render_html: + if hasattr(analyzer, "html_render"): + analyzer.html_render.render_html() + analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') + return result if not output_dict else result.data.get(getattr(SupportedScopes, scope.upper())) if __name__ == "__main__": diff --git a/profiler/advisor/result/result.py b/profiler/advisor/result/result.py index 308db61231..30b8f5795c 100644 --- a/profiler/advisor/result/result.py +++ b/profiler/advisor/result/result.py @@ -102,6 +102,10 @@ class OptimizeResult: self.page_dict = False self._tune_op_list = [] + @property + def data(self): + return self.sheet_recorder.sheet_data + def add_tune_op_list(self, tune_op_list) -> None: """ add tune op name to tune op list diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 95d8fa2f53..86af711564 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -10,6 +10,7 @@ from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup from profiler.advisor.common import constant from profiler.advisor.utils.utils import debug_option from profiler.advisor.interface.interface import Interface +from profiler.cluster_analyse.cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor logger = logging.getLogger() @@ -18,12 +19,26 @@ def _analyze(dimensions, **kwargs): result_list = [] job_list = [] - for dimension in dimensions: - interface = Interface(**kwargs) - job_list.append((dimension, interface)) + def is_cluster(): + profiling_path = kwargs.get("profiling_path") + path_list = [os.path.join(profiling_path, dir_name) for dir_name in os.listdir(profiling_path)] + dir_list = [path for path in path_list if os.path.isdir(path)] + data_processor = PytorchDataPreprocessor(dir_list) + data_map = data_processor.get_data_map() + return len(data_map) > 1 + + is_cluster = is_cluster() - for i, (dimension, interface) in enumerate(job_list[::-1]): - result_list += interface.get_result(dimension, render_html=i == len(job_list) - 1) + for dimension in dimensions: + if not is_cluster and dimension == "cluster": + continue + for scope in Interface.get_scope(dimension): + interface = Interface(**kwargs) + job_list.append((dimension, scope, interface)) + + for i, (dimension, scope, interface) in enumerate(job_list[::-1]): + result_list.append( + interface.get_result(dimension, scope, render_html=i == len(job_list) - 1, output_dict=False)) for result in result_list[::-1]: if result and hasattr(result, "show"): -- Gitee From e8b54caada474a536371f9a7be72c924bcb1947b Mon Sep 17 00:00:00 2001 From: personalc Date: Wed, 15 May 2024 16:49:56 +0800 Subject: [PATCH 11/21] support profiling operator analysis --- .gitignore | 1 + MANIFEST.in | 4 +- profiler/advisor/analyzer/base_analyzer.py | 8 +- .../computation/aicpu/aicpu_checker.py | 278 +++++++++++++++++ .../computation/bound/block_dim_checker.py | 77 +++++ .../bound/operator_bound_checker.py | 56 ++++ .../op_compile/dynamic_shape_checker.py | 82 +++++ .../analyzer/computation/operator_checker.py | 282 ++++++++++++++++++ .../computation/profiling_analyzer.py | 71 +++++ .../fusion_ops/timeline_api_stack_checker.py | 163 ++++++++++ profiler/advisor/common/analyzer_scopes.py | 1 + profiler/advisor/common/profiling/__init__.py | 0 profiler/advisor/common/profiling/ge_info.py | 47 +++ profiler/advisor/common/profiling/msprof.py | 144 +++++++++ .../advisor/common/profiling/op_summary.py | 76 +++++ profiler/advisor/common/profiling/tasktime.py | 75 +++++ .../config/profiling_data_version_config.yaml | 80 +++++ .../dataset/cluster/cluster_dataset.py | 6 +- .../advisor/dataset/profiling/__init__.py | 0 .../advisor/dataset/profiling/builder_base.py | 39 +++ .../advisor/dataset/profiling/db_manager.py | 70 +++++ .../advisor/dataset/profiling/device_info.py | 61 ++++ .../dataset/profiling/info_collection.py | 270 +++++++++++++++++ .../dataset/profiling/profiling_dataset.py | 76 +++++ .../dataset/profiling/profiling_parser.py | 132 ++++++++ .../advisor/display/html/templates/main.html | 8 +- .../html/templates/operator_ai_cpu.html | 61 ++++ .../html/templates/operator_block_dim.html | 38 +++ .../templates/operator_dynamic_shape.html | 15 + .../html/templates/operator_no_bound.html | 38 +++ profiler/advisor/interface/interface.py | 7 +- profiler/advisor/rules/aicpu_rules.yaml | 107 +++++++ profiler/advisor/utils/utils.py | 7 +- profiler/cli/entrance.py | 4 +- profiler/test/tools/__init__.py | 0 profiler/test/tools/tool.py | 38 +++ .../test/ut/advisor/profiling/__init__.py | 0 .../profiling/test_profiling_analyzer.py | 42 +++ .../profiling/test_profiling_dataset.py | 46 +++ profiler/test/ut/advisor/test_utils.py | 49 +++ setup.py | 6 +- 41 files changed, 2544 insertions(+), 21 deletions(-) create mode 100644 profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py create mode 100644 profiler/advisor/analyzer/computation/bound/block_dim_checker.py create mode 100644 profiler/advisor/analyzer/computation/bound/operator_bound_checker.py create mode 100644 profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py create mode 100644 profiler/advisor/analyzer/computation/operator_checker.py create mode 100644 profiler/advisor/analyzer/computation/profiling_analyzer.py create mode 100644 profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py create mode 100644 profiler/advisor/common/profiling/__init__.py create mode 100644 profiler/advisor/common/profiling/ge_info.py create mode 100644 profiler/advisor/common/profiling/msprof.py create mode 100644 profiler/advisor/common/profiling/op_summary.py create mode 100644 profiler/advisor/common/profiling/tasktime.py create mode 100644 profiler/advisor/config/profiling_data_version_config.yaml create mode 100644 profiler/advisor/dataset/profiling/__init__.py create mode 100644 profiler/advisor/dataset/profiling/builder_base.py create mode 100644 profiler/advisor/dataset/profiling/db_manager.py create mode 100644 profiler/advisor/dataset/profiling/device_info.py create mode 100644 profiler/advisor/dataset/profiling/info_collection.py create mode 100644 profiler/advisor/dataset/profiling/profiling_dataset.py create mode 100644 profiler/advisor/dataset/profiling/profiling_parser.py create mode 100644 profiler/advisor/display/html/templates/operator_ai_cpu.html create mode 100644 profiler/advisor/display/html/templates/operator_block_dim.html create mode 100644 profiler/advisor/display/html/templates/operator_dynamic_shape.html create mode 100644 profiler/advisor/display/html/templates/operator_no_bound.html create mode 100644 profiler/advisor/rules/aicpu_rules.yaml create mode 100644 profiler/test/tools/__init__.py create mode 100644 profiler/test/tools/tool.py create mode 100644 profiler/test/ut/advisor/profiling/__init__.py create mode 100644 profiler/test/ut/advisor/profiling/test_profiling_analyzer.py create mode 100644 profiler/test/ut/advisor/profiling/test_profiling_dataset.py create mode 100644 profiler/test/ut/advisor/test_utils.py diff --git a/.gitignore b/.gitignore index 36aacc7241..7e605b88a3 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ share/python-wheels/ .installed.cfg *.egg MANIFEST +.vscode # PyInstaller # Usually these files are written by a python script from a template diff --git a/MANIFEST.in b/MANIFEST.in index d86534656d..cfadbde1db 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,3 @@ -recursive-include profiler/advisor/display * -recursive-include profiler/advisor/third_party/simulation/display * -recursive-include profiler/advisor/checker * +recursive-include profiler/ * global-exclude */__pycache__/* global-exclude *.pyc diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index 6f9438ba75..160f05c464 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -50,7 +50,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): return None logger.info("Enable analysis %s with %s", self.__class__.__name__, ",".join(data_list)) - return func(self, data) + return func(self) return wrapper @@ -76,7 +76,11 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): for dataset_cls in dataset_cls_list: if dataset_cls and callable(dataset_cls): - dataset_cls(collection_path=self.collection_path, data=self.dataset_list, **self.kwargs) + dataset = dataset_cls(collection_path=self.collection_path, data=self.dataset_list, **self.kwargs) + key = dataset_cls.get_key() + if key not in self.dataset_list: + self.dataset_list[key] = [] + self.dataset_list[key].append(dataset) @staticmethod def get_first_data_by_key(data, key) -> Union[Dataset, None]: diff --git a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py new file mode 100644 index 0000000000..4654d97225 --- /dev/null +++ b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py @@ -0,0 +1,278 @@ +import copy +import os +from functools import partial +from typing import List, Dict, Optional + +import yaml +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker, logger +from profiler.advisor.analyzer.schedule.fusion_ops.timeline_api_stack_checker import OpStackFinder +from profiler.advisor.common import constant +from profiler.advisor.dataset.dataset import Dataset +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset + + +class AicpuChecker(OperatorChecker): + _CHECKER = "aicpu operator" + _PROBLEM = "AICPU operator" + _MIN_TASK_DURATION = 20 + _description = f"Some operators and task duration exceed {_MIN_TASK_DURATION} us, such as :\n" + _SUGGESTION: List[str] = ["Modify code to avoid aicpu operator"] + STACK_INFO_ITEMS = "stack_info" + SUGGESTION_INFO_ITEMS = "suggestions" + _ITEMS = [ + "op_name", "op_type", "task_duration", "input_shapes", "input_data_types", "input_formats", "output_shapes", + "output_data_types", "output_formats" + ] + + def __init__(self, cann_version): + super(AicpuChecker, self).__init__(cann_version=cann_version) + self.aicpu_rules: Dict = {} + self.aicpu_checker: Dict = {} + self.load_aicpu_rules() + + def _check_data(self, profiling_data: ProfilingDataset) -> bool: + if not self._check_summary(profiling_data): + return False + return True + + def _check_operator(self, op_info) -> bool: + return op_info.task_type == constant.AI_CPU + + def load_aicpu_rules(self, rule_path="rules/aicpu_rules.yaml") -> Dict: + if not os.path.isabs(rule_path): + rule_path = os.path.join(os.path.dirname(__file__), + "../../computation/", "../", rule_path) + + if not os.path.exists(rule_path): + logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path) + return {} + with open(rule_path, 'r') as f: + self.aicpu_rules = yaml.safe_load(f) + self.filter_aicpu_rules(self.aicpu_rules) + for checker_name, check_rule in self.aicpu_rules.items(): + if not isinstance(check_rule, (list, dict,)): + continue + + if checker_name not in AICPU_CHECKER.keys(): + logger.warning("Skip %s, which is not support now.", checker_name) + continue + + self.aicpu_checker[checker_name] = AICPU_CHECKER[checker_name](check_rule) + + def filter_aicpu_rules(self, aicpu_rules): + support_checkers = [] + for checkers in aicpu_rules['CommonChecker']: + for key, value in checkers.items(): + if key == 'DataTypeChecker' and value['cann_version'] != self.cann_verson: continue + support_checkers.append(checkers) + aicpu_rules['CommonChecker'] = support_checkers + return + + def check_aicpu_attr(self, op_info) -> List[str]: + suggestions = [] + for _, checker in self.aicpu_checker.items(): + suggestions.extend(checker.check(op_info)) + return suggestions + + def check(self, profiling_data: ProfilingDataset) -> bool: + """ + check if any operator need optimize + :param profiling_data: profiling datasest + :return: true or false + """ + + if not self._check_data(profiling_data): + return False + op_summary = profiling_data.op_summary + + def get_opeartor_stack_info(api_stack_finder: OpStackFinder, op_name_list: list) -> list: + data: Dict[str, Dataset] = {} + event_dataset = TimelineEventDataset(collection_path=profiling_data.collection_path, data=data, task_type=constant.AI_CPU) + + # disable multiprocessing, avoid cost time of enable new process for light task + api_stack_finder.get_api_stack_by_op(event_dataset, op_name_list, constant.AI_CPU, + disable_multiprocess=True) + return api_stack_finder._stack_record + + self._op_list = [] + total_task_duration = 0.0 + max_task_duration = 0.0 + for op_info in op_summary.op_list: + if self._check_operator(op_info): + self._op_list.append(op_info) + + task_duration = float(op_info.task_duration) + total_task_duration += task_duration + max_task_duration = max(max_task_duration, task_duration) + if (not self._op_list) or (max_task_duration < self._MIN_TASK_DURATION): + return False + + # 获取所有算子堆栈的信息 + op_name_list = [] + for op in self._op_list: + if op.op_name not in op_name_list: + op_name_list.append(op.op_name) + api_stack_finder = OpStackFinder() + stack_record = get_opeartor_stack_info(api_stack_finder, op_name_list) + + # task_id 到 stack 信息的对应 + self._op_list.sort(key=lambda x: int(x.task_id)) + stack_record.sort(key=lambda x: x[0]) + task_id_to_stack = dict() + for stack in stack_record: + task_id_to_stack[stack[0]] = stack[-1] + + # 算子追加堆栈属性 + for op in self._op_list: + stack = task_id_to_stack.get(int(op.task_id)) + op.add_attr(self.STACK_INFO_ITEMS, stack) + suggestions = self.check_aicpu_attr(op) + op.add_attr(self.SUGGESTION_INFO_ITEMS, suggestions) + + # double 类型算子判断 + double_type_ai_cpu_operator = [] + for op in self._op_list: + if not op.has_attr("input_data_types"): + logger.warning( + "Skip checking of input data in AICPU checker because of not containing input_data_dtypes in op summary") + break + if op.has_attr( + "input_data_types") and "DOUBLE" in op.input_data_types and op.op_name not in double_type_ai_cpu_operator: + double_type_ai_cpu_operator.append(op.op_name) + if bool(double_type_ai_cpu_operator): + self._SUGGESTION.append("Try to convert double type operator to float, such as {}".format( + ",".join(double_type_ai_cpu_operator))) + return True + + def make_render(self, html_render, record): + html_render.render_template(key="operator", + template_dir="templates", + template_name="operator_ai_cpu.html", + format_result=self.format_operator_result(record, constant.OPERATOR_LIST_UNLIMIT)) + + def format_operator_result(self, record, limit): + """ + Format operator result to html + :param record: profiling check record + :param limit: Limit number of operator statistics lists. + :return: + """ + optimization_item = record.optimization_item + release_suggestion_list = [] + for suggestion in optimization_item.suggestion: + release_suggestion_list.append(suggestion.replace('\n', '
')) + logger.debug("suggestion list is %s", release_suggestion_list) + format_result = {"record": record.__dict__, "suggestion": '
'.join(release_suggestion_list), + "task_duration": round(record.statistics_item.task_duration, 2)} + + statistic = self.group_by(copy.deepcopy(self._op_list), op_key='op_type', + limit=limit) + format_result["statistic"] = statistic + stack_key_list = ["stack_info", "input_data_types", "output_data_types"] + if statistic: + for key, info in statistic: + op_info_list = self.group_by_list(info.get("op_info_list"), stack_key_list, limit) + info["op_info_list"] = op_info_list + return format_result + + def group_by_list(self, op_list, op_key_list: List = ["stack_info", "input_data_types", "output_data_types"], + limit: int = constant.OPERATOR_LIST_UNLIMIT): + if op_list is None: + op_list = [] + + # op_key_list 合并添加合并的属性,作为 groupby 的 key value + op_key = '+'.join(op_key_list) # str, json + for op_info in op_list: + attribute = "" + for _op in op_key_list: + if op_info.get_attr(_op): + attribute += op_info.get_attr(_op) + op_info.add_attr(op_key, attribute) + + return self.group_by(op_list, op_key=op_key, limit=limit) + + +class BaserChecker: + def __init__(self, *args, **kwargs): + self.checker_list = [] + + def build(self): + raise NotImplementedError + + def check(self, op_info) -> List[str]: + suggestions = [] + for checker in self.checker_list: + suggestion = checker(op_info) + if suggestion is not None: + suggestions.append(suggestion) + return suggestions + + +class CommonChecker(BaserChecker): + def __init__(self, check_rules: List[Dict] = None): + super(CommonChecker, self).__init__() + self.check_rules = check_rules if check_rules is not None else [] + self.supported_checker = dict(DataTypeChecker=self.datatype_checker) + self.build() + + @staticmethod + def datatype_checker(check_item: Dict, op_info) -> Optional[str]: + supported_op_type = check_item.get('op_type', []) + suggestion = check_item.get('suggestion', "") + valid_inputs = check_item.get('input', []) + valid_outputs = check_item.get('output', []) + ignore_type = check_item.get('ignore_type', []) + op_type = getattr(op_info, 'op_type', "UNKNOWN") + if "__ALL__" in supported_op_type or \ + op_type.lower() in supported_op_type: + if op_type.lower() in ignore_type: + return None + + op_input_dtype = getattr(op_info, 'input_data_types', "").split(";") + op_input_dtype = [item.lower() for item in op_input_dtype] + op_output_dtype = getattr(op_info, 'output_data_types', "").split(";") + op_output_dtype = [item.lower() for item in op_output_dtype] + input_dtype_diff = set(op_input_dtype).difference(set(valid_inputs)) + output_dtype_diff = set(op_output_dtype).difference(set(valid_outputs)) + unsupported_dtype_diff = input_dtype_diff.union(output_dtype_diff) + if not unsupported_dtype_diff: + return None + + return suggestion.format(",".join(unsupported_dtype_diff).upper(), + op_type, + ",".join(valid_inputs).upper()) + + def build(self): + for check in self.check_rules: + (check_func, check_rule), = check.items() + if check_func not in self.supported_checker: + logger.warning("Skip %s, which has not been implemented.", check_func) + continue + self.checker_list.append(partial(self.supported_checker.get(check_func), check_rule)) + + +class ExampleGuideChecker(BaserChecker): + def __init__(self, check_rules: List[Dict] = None): + super(ExampleGuideChecker, self).__init__() + self.check_rules = check_rules if check_rules is not None else [] + self.build() + + def build(self): + def _guide_url(check_item: Dict, op_info) -> Optional[str]: + supported_op_type = check_item.get('op_type', []) + url = check_item.get('url', "") + suggestion = check_item.get('suggestion', "") + + if getattr(op_info, 'op_type', "UNKNOWN").lower() in supported_op_type: + return suggestion if "{}" not in suggestion else suggestion.format(url) + + for check in self.check_rules: + (_, check_rule), = check.items() + self.checker_list.append(partial(_guide_url, check_rule)) + + +AICPU_CHECKER = { + "CommonChecker": CommonChecker, + "ExampleGuideChecker": ExampleGuideChecker +} diff --git a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py new file mode 100644 index 0000000000..d1a1384b8d --- /dev/null +++ b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py @@ -0,0 +1,77 @@ +import logging + +from typing import List + +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.advisor.common import constant +from profiler.advisor.config.config import Config +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset + +logger = logging.getLogger() + + +class BlockDimChecker(OperatorChecker): + _SUGGESTION: List[str] = [] + _CHECKER = "block dim" + _PROBLEM = "block dim" + _description = "some operator does not make full use of {} ai core" + _ITEMS = [ + "op_name", "op_type", "task_type", "task_duration", "income", "block_dim", "mix_block_dim", "input_shapes", + "input_data_types", "input_formats", "output_shapes", "output_data_types", "output_formats" + ] + + def _check_data(self, data): + if not self._check_summary(data): + return False + if not Config().get_config("ai_core_num"): + logger.warning(self.SKIP_CHECK_MSG, self._CHECKER, "ai core num in info.json file") + return False + summary = data.op_summary + op_info = summary.op_list[0] + if not hasattr(op_info, "block_dim"): + logger.warning(self.SKIP_CHECK_MSG, self._CHECKER, "block dim in op summary") + return False + if Config().get_config("ai_core_num"): + self._aicore_num = int(Config().get_config("ai_core_num")) + if Config().get_config("aiv_num"): + self._aiv_num = int(Config().get_config("aiv_num")) + self._description = self._description.format(self._aicore_num) + if self._aiv_num: + self._description += f" or {self._aiv_num} ai vector core" + self._description += f";\n Top-{OperatorChecker._MAX_TUNE_OP_NUM} operator of " \ + "task duration are as follows:\n" + return True + + def make_render(self, html_render, record): + html_render.render_template(key="operator", + template_dir="templates", + template_name="operator_block_dim.html", + format_result=self.format_operator_result(record, constant.OPERATOR_OUT_TOPK)) + + def _check_operator(self, op_info) -> bool: + if op_info.task_type not in ["AI_CORE", "AI_VECTOR_CORE", "MIX_AIC"]: + return False + block_dim = int(op_info.block_dim) + core_num = self.get_core_num(op_info) + if block_dim % core_num == 0: + return False + if op_info.task_type == "MIX_AIC" and hasattr(op_info, "mix_block_dim") \ + and self._aiv_num and int(op_info.mix_block_dim) % self._aiv_num == 0: + return False + return True + + def get_core_num(self, op_info): + """ + get core num of task type + """ + if op_info.task_type == "AI_CORE" or not self._aiv_num: + core_num = self._aicore_num + else: + core_num = self._aiv_num + return core_num + + def format_suggestion_content(self, profiling_data: ProfilingDataset) -> None: + if profiling_data.PROF_TYPE == constant.ASCEND_PYTORCH_PROFILER: + self._SUGGESTION.append(self.PyTorch_OPERATOR_TUNE_SUGGESTION) + elif profiling_data.PROF_TYPE == constant.MSLITE: + self._SUGGESTION.append(self.MSLite_OPERATOR_TUNE_SUGGESTION) diff --git a/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py b/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py new file mode 100644 index 0000000000..d919eb7d5f --- /dev/null +++ b/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py @@ -0,0 +1,56 @@ +import logging +from typing import List + +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.advisor.common import constant +from profiler.advisor.config.config import Config +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.utils.utils import to_percent + +logger = logging.getLogger() + + +class OperatorBoundChecker(OperatorChecker): + _MIN_TASK_DURATION = 20 # min task duration 20us + _CHECKER = "operator no bound" + _PROBLEM = "operator no bound" + _SUGGESTION: List[str] = [] + _description = ( + f"There is no mte, cube, vector, scalar ratio is more than {to_percent(Config().operator_bound_ratio)};\n" + + f"Top task duration operators need to be tuned are as follows: \n") + _ITEMS = [ + "op_name", "op_type", "task_type", "task_duration", "vec_ratio", "mac_ratio", "scalar_ratio", "mte1_ratio", + "mte2_ratio", "mte3_ratio", "block_dim", "input_shapes", "input_data_types", "input_formats", "output_shapes", + "output_data_types", "output_formats" + ] + + def _check_data(self, data): + if not self._check_summary(data): + return False + for op_info in data.op_summary.op_list: + if self._check_operator(op_info): + return True + + logger.warning(self.SKIP_CHECK_MSG, self._CHECKER, "ratio in op summary") + return False + + def _check_operator(self, op_info) -> bool: + bound_list = ["vec_ratio", "mac_ratio", "scalar_ratio", "mte1_ratio", "mte2_ratio", "mte3_ratio"] + ratio_list = [self.get_ratio(op_info, attr) for attr in bound_list] + if not any(ratio_list): + return False # no data, skip check + if any(ratio and ratio > Config().operator_bound_ratio for ratio in ratio_list): + return False + return True + + def make_render(self, html_render, record): + html_render.render_template(key="operator", + template_dir="templates", + template_name="operator_no_bound.html", + format_result=self.format_operator_result(record, constant.OPERATOR_OUT_TOPK)) + + def format_suggestion_content(self, profiling_data: ProfilingDataset) -> None: + if profiling_data.PROF_TYPE == constant.ASCEND_PYTORCH_PROFILER: + self._SUGGESTION.append(self.PyTorch_OPERATOR_TUNE_SUGGESTION) + elif profiling_data.PROF_TYPE == constant.MSLITE: + self._SUGGESTION.append(self.MSLite_OPERATOR_TUNE_SUGGESTION) diff --git a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py new file mode 100644 index 0000000000..4d405eb918 --- /dev/null +++ b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py @@ -0,0 +1,82 @@ +import copy +import logging +from typing import List + +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.advisor.common import constant +from profiler.advisor.dataset.profiling.info_collection import OpInfo +from profiler.advisor.result.item import OptimizeItem, StatisticsItem, OptimizeRecord + +logger = logging.getLogger() + + +class DynamicShapeChecker(OperatorChecker): + ENABLE_COMPILED_SUGGESTION = "Optimize by enabling compiled operator, such as:\n" \ + "`torch_npu.npu.set_compile_mode(jit_compile=False)`\n" + _SUGGESTION: List[str] = [ENABLE_COMPILED_SUGGESTION] + _CHECKER = "dynamic shape operator" + _PROBLEM = "Dynamic shape operator" + _description = f"Found all operators are dynamic shape" + _op_list: List[OpInfo] = [] + _tune_op_list: List[str] = [] # record op name to be tuned, and save to tune_ops_file.cfg + _op_views: List = [] + + def __init__(self, cann_version) -> None: + super().__init__(cann_version = cann_version) + + def check(self, profiling_database) -> bool: + # CANN 8.0.0 之前从 ge_info 中获取 op_state 属性,进行动态 shape 逻辑判断 + if hasattr(profiling_database, "ge_info") and profiling_database.ge_info: + ge_info = profiling_database.ge_info + static_shape_operators = ge_info.get_static_shape_operators() + if len(static_shape_operators) == 0: + OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE = True + return True + # CANN 8.0.0 之后 op_state 属性从 op_summary 文件中获取 + elif hasattr(profiling_database, "op_summary"): + static_shape_operators = profiling_database.op_summary.get_static_shape_operators() + if len(static_shape_operators) == 0: + OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE = True + return True + else: + logger.warning("Skip dynamic shape checker because of not containing ge_info.db file in host filefloder.\n" + "To enable dynamic shape checker, please try to set data_simplification=False in experimental_config.\n" + "More details please refer to link : %s", constant.ASCEND_PROFILER_URL) + return False + + def make_record(self, profiling_database) -> OptimizeRecord: + """ + make record for what and how to optimize + """ + + optimization_item = OptimizeItem( + self._PROBLEM, + self._description, + self._SUGGESTION + ) + statistics_item = StatisticsItem("", "", 1) + return OptimizeRecord(optimization_item, statistics_item) + + def format_operator_result(self, record, limit=-1): + """ + Format operator result to html + :param record: profiling check record + :param limit: Limit number of operator statistics lists. + :return: + """ + optimization_item = record.optimization_item + release_suggestion_list = [] + for suggestion in optimization_item.suggestion: + release_suggestion = copy.deepcopy(suggestion) + if release_suggestion == DynamicShapeChecker.ENABLE_COMPILED_SUGGESTION: + release_suggestion += \ + f"for details please refer to link : LINK" + release_suggestion_list.append(release_suggestion.replace('\n', '
')) + format_result = {"record": record.__dict__, "suggestion": '
'.join(release_suggestion_list)} + return format_result + + def make_render(self, html_render, record): + html_render.render_template(key="operator", + template_dir="templates", + template_name="operator_dynamic_shape.html", + format_result=self.format_operator_result(record)) diff --git a/profiler/advisor/analyzer/computation/operator_checker.py b/profiler/advisor/analyzer/computation/operator_checker.py new file mode 100644 index 0000000000..e8490ff206 --- /dev/null +++ b/profiler/advisor/analyzer/computation/operator_checker.py @@ -0,0 +1,282 @@ +import copy +import logging +from textwrap import fill +from typing import List + +from profiler.advisor.common import constant +from profiler.advisor.common.version_control import VersionControl +from profiler.advisor.config.config import Config +from profiler.advisor.dataset.profiling.info_collection import OpInfo +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.result.item import OptimizeItem, StatisticsItem, OptimizeRecord +from profiler.advisor.utils.utils import safe_division + +logger = logging.getLogger() + + +class OperatorChecker(VersionControl): + _SUPPORT_VERSIONS = [constant.CANN_VERSION_C30, constant.CANN_VERSION_C13, constant.CANN_VERSION_C15, constant.CANN_VERSION_C17] + IS_ALL_OPERATOR_DYNAMIC_SHAPE = False + _MAX_TUNE_OP_NUM = constant.OPERATOR_OUT_TOPK + _MIN_TASK_DURATION = 0 + _MIN_TASK_DURATION_RATIO = 1.0 + _MIN_TOTAL_DURATION_RATIO = 1.0 + _CHECKER = str() + _PROBLEM = str() + _description = str() + STACK_INFO_ITEMS = "" + _ITEMS: List[str] = [] + _SUGGESTION: List[str] = [] + SKIP_CHECK_MSG = "Skip %s checker because of not containing %s" + _tune_op_info_list: List[OpInfo] = [] + PyTorch_OPERATOR_TUNE_SUGGESTION = f"Optimize operator by AOE, such as:\n" \ + f"'aoe --job_type=2 --model_path=$user_dump_path " \ + f"--tune_ops_file={Config().tune_ops_file}'\n" + MSLite_OPERATOR_TUNE_SUGGESTION = f"Optimize operator by AOE in mindspore lite framework, such as:\n" \ + f"converter_lite --fmk=ONNX --optimize=ascend_oriented --saveType=MINDIR " \ + f"--modelFile=$user_model.onnx --outputFile=user_model --configFile=./config.txt\n" + _tune_op_list: List[str] = [] + + def __init__(self, cann_version: str): + self.cann_verson = cann_version + self._op_list: List[OpInfo] = [] + + def check(self, profiling_data: ProfilingDataset) -> bool: + """ + check if any operator need optimize + :param profiling_data: profiling datasest + :return: true or false + """ + if not self._check_data(profiling_data): + return False + + summary = profiling_data.op_summary + total_task_duration = 0.0 + max_task_duration = 0.0 + for op_info in summary.op_list: + if not self._check_operator(op_info): + continue + task_duration = float(op_info.task_duration) + total_task_duration += task_duration + max_task_duration = max(max_task_duration, task_duration) + self._op_list.append(op_info) + if task_duration > self._MIN_TASK_DURATION: + self._tune_op_info_list.append(op_info) + + if any([ + max_task_duration > self._MIN_TASK_DURATION, + round(safe_division(max_task_duration, summary.get_total_task_duration()), + 4) > self._MIN_TASK_DURATION_RATIO, + round(safe_division(total_task_duration, summary.get_total_task_duration()), 4) > + self._MIN_TOTAL_DURATION_RATIO, + ]): + self._op_list.sort(key=lambda x: float(x.get_attr("task_duration")), reverse=True) + self._tune_op_info_list.sort(key=lambda x: float(x.get_attr("task_duration")), reverse=True) + for op in self._op_list: + if op.op_name not in self._tune_op_list and len(self._tune_op_list) < constant.OPERATOR_OUT_TOPK: + self._tune_op_list.append(op.op_name) + return True + return False + + def make_record(self, profiling_data: ProfilingDataset): + """ + Make record for what and how to optimize + :param profiling_data: profiling data + :return: optimize record + """ + task_duration_list = [float(op_info.get_attr("task_duration")) for op_info in self._op_list if + hasattr(op_info, "get_attr")] + total_cost_time = sum(task_duration_list) + total_task_duration = profiling_data.op_summary.get_total_task_duration() + count = len(task_duration_list) + statistics_item = StatisticsItem(total_task_duration, total_cost_time, count, self.get_incomes()) + optimization_item = OptimizeItem( + self._PROBLEM, + self._get_description(self._description, self.get_op_type_list(self._op_list)[:self._MAX_TUNE_OP_NUM]), + self._SUGGESTION + ) + return OptimizeRecord(optimization_item, statistics_item) + + def _get_description(self, description, op_type_list=None): + if not op_type_list: + return description + + desc_suffix = [] + for i in range(len(op_type_list)): + if i % 3 == 0 and i != 0: + desc_suffix.append("\n") + + desc_suffix.append(f"{op_type_list[i]}") + + if i < len(op_type_list) - 1: + desc_suffix.append(", ") + + description += "".join(desc_suffix) + return description + + def pre_check(self, profiling_data) -> bool: + self.format_suggestion_content(profiling_data) + return not (OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE and ( + OperatorChecker.PyTorch_OPERATOR_TUNE_SUGGESTION or OperatorChecker.MSLite_OPERATOR_TUNE_SUGGESTION + ) in self._SUGGESTION) + + def format_operator_result(self, record, limit): + """ + Format operator result to html + :param record: profiling check record + :param limit: Limit number of operator statistics lists. + :return: + """ + optimization_item = record.optimization_item + release_suggestion_list = [] + for suggestion in optimization_item.suggestion: + release_suggestion = copy.deepcopy(suggestion) + if release_suggestion == OperatorChecker.PyTorch_OPERATOR_TUNE_SUGGESTION: + release_suggestion += \ + (f"for details please refer to link : LINK") + elif release_suggestion == OperatorChecker.MSLite_OPERATOR_TUNE_SUGGESTION: + release_suggestion += \ + (f"\nThe config file for MSLite AOE usage is as follows:\n" \ + f"[ascend_context]\n" \ + f"aoe_mode=\"operator tuning\"\n" \ + f"--tune_ops_file={Config().tune_ops_file}\n" + f"\nFor details please refer to link : LINK") + release_suggestion_list.append(release_suggestion.replace('\n', '
')) + format_result = {"record": record.__dict__, + "suggestion": fill('
'.join(release_suggestion_list), width=200), + "task_duration": round(record.statistics_item.task_duration, 2)} + statistic = self.group_by(copy.deepcopy(self._op_list), limit=limit) + format_result["statistic"] = statistic + return format_result + + def group_by(self, op_list, op_key="op_type", + limit: int = constant.OPERATOR_LIST_UNLIMIT): + """ + group by Profiling.OpInfo's attribute key, then return top limit tuple by duration + :param op_list: input a OpInfo list + :param op_key: group by Profiling.OpInfo's attribute key + :param limit: top limit num, if you do not need to limit the length of tuple, input -1(int) + :return: + """ + if op_list is None: + op_list = [] + statistic = {} # str, json + for op_info in op_list: + if statistic.get(op_info.get_attr(op_key)): + statistic[op_info.get_attr(op_key)]["summary"]["total_duration"] = float( + statistic[op_info.get_attr(op_key)]["summary"]["total_duration"]) + float( + op_info.get_attr("task_duration", constant.DEFAULT_DURATION_ZERO)) + statistic[op_info.get_attr(op_key)]["summary"]["counts"] += 1 + stack_info = op_info.get_attr("stack_info") + if stack_info: + op_info.stack_info = stack_info.replace('\r\n', '
') + statistic[op_info.get_attr(op_key)]["op_info_list"].append(op_info) + else: + statistic[op_info.get_attr(op_key)] = {"summary": {}, "op_info_list": []} + statistic[op_info.get_attr(op_key)]["summary"]["op_type"] = op_info.get_attr( + "op_type", constant.DEFAULT_OPERATOR_TYPE) + statistic[op_info.get_attr(op_key)]["summary"]["total_duration"] = float( + op_info.get_attr("task_duration", constant.DEFAULT_DURATION_ZERO)) + statistic[op_info.get_attr(op_key)]["summary"]["counts"] = 1 + stack_info = op_info.get_attr("stack_info") + if stack_info: + op_info.stack_info = stack_info.replace('\r\n', '
') + statistic[op_info.get_attr(op_key)]["op_info_list"] = [op_info] + + if statistic: + for op_key in statistic.keys(): + statistic[op_key]["summary"]["total_duration"] = round( + statistic[op_key]["summary"]["total_duration"], 2) + # Grouped by op_type, sorted by total_duration, and obtained the top 10 operators that take the most time. + if limit > 0: + statistic = sorted( + statistic.items(), key=lambda kv: kv[1]["summary"]["total_duration"], reverse=True)[:limit] + else: + statistic = sorted(statistic.items(), key=lambda kv: kv[1]["summary"]["total_duration"], reverse=True) + else: + logger.warning("%s checker do not has results to format html", str(self.__class__.__name__)) + return statistic + + def _check_data(self, profiling_data): + return True + + def _check_operator(self, op_info): + return False + + def _get_income(self, _op_info: OpInfo) -> float: + return 0 + + def get_tune_op_list(self): + """ + get tune op list + :return: tune op list + """ + return self._tune_op_list + + def get_views(self, _graph_data): + """Get node views.""" + return [] + + @classmethod + def get_name(cls): + """ + get name of checker + :return: checker name + """ + return cls._PROBLEM + + def get_incomes(self) -> float: + """get incomes""" + incomes = 0.0 + for op_info in self._op_list: + income = self._get_income(op_info) + setattr(op_info, "income", round(income, 2)) + incomes += income + return incomes + + def get_op_type_list(self, op_list: List[OpInfo]): + """get op type list""" + op_type_list = [] + for op_info in op_list: + if op_info.op_type not in op_type_list: + op_type_list.append(op_info.op_type) + return op_type_list + + def _check_summary(self, data: ProfilingDataset): + if not data.op_summary: + logger.warning(self.SKIP_CHECK_MSG, self._CHECKER, "op summary") + return False + return True + + @staticmethod + def get_ratio(op_info: OpInfo, attr: str) -> float: + if not op_info.has_attr(attr): + return 0 + value = op_info.get_attr(attr) + if not value or value == "N/A": + return 0 + return float(value) + + def get_details(self) -> list: + """ + get details of operator to be optimized + :return: detail list + """ + op_list = self._op_list + if not op_list or not (self._ITEMS + [self.STACK_INFO_ITEMS]): + return [] + details = [] + attrs = [attr for attr in (self._ITEMS + [self.STACK_INFO_ITEMS]) if op_list[0].has_attr(attr)] + details.append(attrs) + op_list = sorted(op_list, key=lambda x: float(x.get_attr("task_duration")), reverse=True) + for op_info in op_list: + content = [ + op_info.get_attr(attr) if attr != "aicore_time" + else op_info.get_float_attr(attr, strict_mode=True) + + op_info.get_float_attr("aiv_time", strict_mode=True) for attr in attrs + ] + details.append(content) + return details + + def format_suggestion_content(self, profiling_data: ProfilingDataset) -> None: + return diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py new file mode 100644 index 0000000000..98d3c5c49b --- /dev/null +++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py @@ -0,0 +1,71 @@ +import logging +from abc import ABC +from typing import Dict, List + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.analyzer.computation.aicpu.aicpu_checker import AicpuChecker +from profiler.advisor.analyzer.computation.bound.block_dim_checker import BlockDimChecker +from profiler.advisor.analyzer.computation.bound.operator_bound_checker import OperatorBoundChecker +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.advisor.analyzer.computation.op_compile.dynamic_shape_checker import DynamicShapeChecker +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.utils.utils import get_supported_subclass + +logger = logging.getLogger() + + +class ProfilingAnalyzer(BaseAnalyzer, ABC): + dataset_cls_list = [ProfilingDataset] + + def __init__(self, collection_path, **kwargs) -> None: + cann_version = kwargs.get("cann_version", constant.DEFAULT_CANN_VERSION) + torch_version = kwargs.get("torch_version", constant.DEFAULT_TORCH_VERSION) + super().__init__(collection_path, cann_version=cann_version, torch_version=torch_version, **kwargs) + self.checker_list = [checker(cann_version) for checker in get_supported_subclass(OperatorChecker, cann_version)] + # 动态 shape checker 放到首位,因为动态 shape 情形下AOE算子调优现在不支持,AOE 算子调优 checker 可以跳过 + index = next((i for i, item in enumerate(self.checker_list) if isinstance(item, DynamicShapeChecker)), None) + self.checker_list.insert(0, self.checker_list.pop(index)) + self.html_render = HTMLRender() + self.result = OptimizeResult() + + @BaseAnalyzer.check_data((ProfilingDataset.get_key(),)) + def optimize(self) -> OptimizeResult: + """ + optimize operator + :param data: input datasets + :return: result + """ + profiling_data = self.get_first_data_by_key(self.dataset_list, ProfilingDataset.get_key()) + for checker in self.checker_list: + if not checker.pre_check(profiling_data): + continue + if checker.check(profiling_data): + # add record + record = checker.make_record(profiling_data) + checker.make_render(self.html_render, record) + self.result.add(record) + # add details + details = checker.get_details() + if details: + for i, detail in enumerate(details): + if i == 0: + # the first row is header + self.result.add_detail(checker.get_name(), headers=detail) + else: + self.result.add_detail(checker.get_name(), detail=detail) + # add tune op list + tune_op_list = checker.get_tune_op_list() + if tune_op_list: + self.result.add_tune_op_list(tune_op_list) + + return self.result + + def make_record(self): + pass + + def make_render(self): + pass diff --git a/profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py b/profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py new file mode 100644 index 0000000000..f684a48921 --- /dev/null +++ b/profiler/advisor/analyzer/schedule/fusion_ops/timeline_api_stack_checker.py @@ -0,0 +1,163 @@ +import logging +from typing import List + +from profiler.advisor.common import constant as const +from profiler.advisor.common.timeline.event import TimelineEvent +from profiler.advisor.dataset.timeline_event_dataset import TimelineEventDataset +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.utils.utils import get_analyze_processes, ParallelJob + +logger = logging.getLogger() + + +class OpStackFinder: + + def __init__(self): + self.n_processes = get_analyze_processes() + self._stack_record = [] + self._task_id_record = {} + self.op_name = None + self.task_type = None + self.matched_index = set() + + def get_api_stack_by_op(self, event_dataset: TimelineEventDataset, op_name: List[str] = None, task_type: str = None, + disable_multiprocess=False): + """ + :Param event_dataset: dataset of timeline event + :Param op_name: operator name, e.g. IndexPutV2 + :Param task_type: operator task type, optionals are AI_CPU and AI_CORE + :Param disable_multiprocess: disable multiprocessing, avoid cost time of enable new process for light task + """ + if not op_name: + op_name = [] + if not isinstance(op_name, list): + op_name = [op_name] + + self.op_name = ",".join(op_name) + self.task_type = task_type + op_name_list = event_dataset.task_op_names if not op_name else op_name + + if self.n_processes <= 1 or disable_multiprocess: + self._query_stacks_multiprocess(event_dataset, op_name_list, task_type) + else: + event_num_per_process = int(len(op_name_list) / self.n_processes) + 1 + parallel_analyzer = ParallelJob( + self._query_stacks_multiprocess, + [[event_dataset, op_name_list[i:i + event_num_per_process], task_type] + for i in range(0, len(op_name_list), event_num_per_process)], + job_name="Analyzing operator stacks from timeline" + ) + parallel_analyzer.start(self.n_processes) + self.query_stack(event_dataset) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self._stack_record: + return + + desc = f"Found {len(self._stack_record)} called stacks for" + if self.op_name and self.task_type: + desc += f" operators with name '{self.op_name}' with task type '{self.task_type}'" + elif self.op_name and not self.task_type: + desc += f" operators with name '{self.op_name}'" + elif self.task_type and not self.op_name: + desc += f" operators with task type '{self.task_type}'" + else: + desc += " all operators" + + suggestion = f"Please use command 'ma-advisor analyze profiling' to analyze operators" + optimization_item = OptimizeItem( + "Operator stacks", + desc, + [suggestion] + ) + result.add(OptimizeRecord(optimization_item)) + + record_title = ["Task ID", "op name", "op type", "code stacks"] + result.add_detail('operator stacks', headers=record_title) + + for op_info in self._stack_record: + result.add_detail('operator stacks', detail=op_info) + + def _get_api_stack_by_op(self, event_dataset: TimelineEventDataset, op_name: str, task_type: str): + for _, src_op_event in event_dataset.ops_with_task_type.items(): + + op_task_type = src_op_event.get(const.TASK_TYPE) + if not (src_op_event.name == op_name and op_task_type and op_task_type == task_type): + continue + + torch_to_npu_key = f"s-{src_op_event.tid}-{src_op_event.ts}" + torch_to_npu_event = event_dataset.torch_to_npu.get(torch_to_npu_key) or event_dataset.torch_to_npu.get( + f"s-{src_op_event.ts}") or event_dataset.torch_to_npu.get(f"s-{src_op_event.ts.replace('.', '')}") + + acl_to_npu_event = src_op_event.ts in event_dataset.acl_to_npu + + if not torch_to_npu_event and not acl_to_npu_event: + continue + + # query stack by torch_to_npu first, due to each operator had acl_to_npu incoming flow in cann6.3 + if torch_to_npu_event: + dst_op_index = self._query_index_by_torch_to_npu(event_dataset, torch_to_npu_event) + else: + dst_op_index = self._query_index_by_acl_to_npu(acl_to_npu_event) + + if not dst_op_index: + continue + + task_id = src_op_event.task_id + if not task_id: + continue + self.matched_index.add(dst_op_index) + if dst_op_index not in self._task_id_record: + self._task_id_record[dst_op_index] = [] + self._task_id_record[dst_op_index].append([task_id, op_name, task_type]) + + def _query_index_by_torch_to_npu(self, event_dataset, torch_to_npu_event): + dst_op_event_key = torch_to_npu_event.ts + dst_op_event = event_dataset.ops_with_stack.get(dst_op_event_key) + + if not dst_op_event: + return const.TIMELINE_BACKWARD_NO_STACK_CODE + + return dst_op_event.get("dataset_index") + + def _query_index_by_acl_to_npu(self, acl_to_npu_event): + if acl_to_npu_event: + return const.TIMELINE_ACL_TO_NPU_NO_STACK_CODE + + def _query_stacks_multiprocess(self, event_dataset, op_name_list, task_type): + + for op_name in op_name_list: + if task_type is not None: + self._get_api_stack_by_op(event_dataset, op_name, task_type) + else: + self._get_api_stack_by_op(event_dataset, op_name, const.AI_CORE) + self._get_api_stack_by_op(event_dataset, op_name, const.AI_CPU) + + def _format_stack_record(self): + stack_list = [] + for task_id, stack_info in self._task_id_record.items(): + stack_list.append([task_id, *stack_info]) + return stack_list + + def _query_stack_by_matched_index(self, index, event): + if index not in self.matched_index: + return None + event = TimelineEvent(event) + stack = event.args.get(const.CALL_STACKS) + stack = stack if stack else const.NO_STACK_REASON_MAP.get(const.TIMELINE_BACKWARD_NO_STACK_CODE) + for matched_op_info in self._task_id_record.get(index, []): + self._stack_record.append([*matched_op_info, stack]) + + for matched_op_info in self._task_id_record.get(const.TIMELINE_ACL_TO_NPU_NO_STACK_CODE, []): + self._stack_record.append([*matched_op_info, + const.NO_STACK_REASON_MAP.get(const.TIMELINE_ACL_TO_NPU_NO_STACK_CODE)]) + return None + + def query_stack(self, event_dataset: TimelineEventDataset): + if not event_dataset.dataset_len: + return + _ = event_dataset.parse_data_with_generator(self._query_stack_by_matched_index) diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py index 9a83adeb6d..03d7759a72 100644 --- a/profiler/advisor/common/analyzer_scopes.py +++ b/profiler/advisor/common/analyzer_scopes.py @@ -5,3 +5,4 @@ class SupportedScopes: TIMELINE_FUSION_OPS = "timeline_fusion_ops" SLOW_RANK = "slow_rank" SLOW_LINK = "slow_link" + PORFILING_OPERATOR_ANALYSIS = "profiling_operator_analysis" diff --git a/profiler/advisor/common/profiling/__init__.py b/profiler/advisor/common/profiling/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/common/profiling/ge_info.py b/profiler/advisor/common/profiling/ge_info.py new file mode 100644 index 0000000000..9996ec611a --- /dev/null +++ b/profiler/advisor/common/profiling/ge_info.py @@ -0,0 +1,47 @@ +""" +DB +""" +import logging +import os +from typing import Any, List + +from sqlalchemy import text + +from profiler.advisor.dataset.profiling.db_manager import ConnectionManager +from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser + +logger = logging.getLogger() + + +class GeInfo(ProfilingParser): + """ + ge info file + """ + FILE_PATTERN = r"ge_info.db" + FILE_PATTERN_MSG = "ge_info.db" + FILE_INFO = "ge info" + STATIC_OP_STATE = "0" + DYNAMIC_OP_STATE = "1" + + def __init__(self, path: str) -> None: + super().__init__(path) + self.op_state_info_list = None + + def parse_from_file(self, profiling_db_file): + """ + ge info + """ + db_path, db_file = os.path.split(profiling_db_file) + if not ConnectionManager.check_db_exists(db_path, [db_file]): + return False + conn = ConnectionManager(db_path, db_file) + if conn.check_table_exists(['TaskInfo']): + with conn().connect() as sql_conn: + self.op_state_info_list = sql_conn.execute(text("select op_name, op_state from TaskInfo")).fetchall() + return True + + def get_static_shape_operators(self) -> List[Any]: + return [op for op, state in self.op_state_info_list if state == self.STATIC_OP_STATE] + + def get_dynamic_shape_operators(self) -> List[Any]: + return [op for op, state in self.op_state_info_list if state == self.DYNAMIC_OP_STATE] diff --git a/profiler/advisor/common/profiling/msprof.py b/profiler/advisor/common/profiling/msprof.py new file mode 100644 index 0000000000..9453986b82 --- /dev/null +++ b/profiler/advisor/common/profiling/msprof.py @@ -0,0 +1,144 @@ +""" +msprof +""" +import logging +from typing import Dict, List + +from profiler.advisor.dataset.profiling.info_collection import TaskInfo +from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser + +logger = logging.getLogger() + + +class TaskChecker: + """ + check task info + """ + + def __init__(self): + self.sqe_keys = set() + + def is_sqe(self, task: TaskInfo) -> bool: + """check sqe""" + key = (task.pid, task.tid) + if task.args.get('name', '').endswith('_SQE'): + self.sqe_keys.add(key) + return False + + return key in self.sqe_keys + + +class Msprof(ProfilingParser): + """ + msprof + + """ + FILE_PATTERN = r"^msprof[_\d]+.json$" + FILE_PATTERN_MSG = "msprof_*.json" + FILE_INFO = "msprof" + + def __init__(self, path: str) -> None: + super().__init__(path) + self._tasks: List[TaskInfo] = [] + self._iteration_time = 0.0 + self._model_id = None + self._iteration_id = None + self._process_pid: Dict[str, str] = {} + self._min_time = 0.0 + self._max_time = 0.0 + self._data_process_time = 0.0 + self._start_point = 0.0 + + def parse_from_file(self, file: str): + if not self._parse_json(file): + return False + min_time = float('inf') + max_time = 0.0 + task_checker = TaskChecker() + is_iter = False + for item in self._raw_data: + task = TaskInfo(item) + if task.cat == "Iteration Time": + self._min_time = task.start_time + self._max_time = task.end_time + self._iteration_time = task.dur + is_iter = True + if task.cat == "Data_aug Bound" and "Data_aug Bound(us)" in task.args: + self._data_process_time = task.args["Data_aug Bound(us)"] + + if self._start_point == 0 and task.start_time > 0: + self._start_point = task.start_time + + if task_checker.is_sqe(task): + continue + + self._tasks.append(task) + self._parse_task(task) + + start_time = task.start_time + dur = task.dur + if start_time == -1 or dur == -1 or dur == 0: + continue + if start_time < min_time: + min_time = start_time + end_time = start_time + dur + if end_time > max_time: + max_time = end_time + if not is_iter: + self._iteration_time = dur + self._max_time = max_time + self._min_time = min_time + if self._tasks: + return True + return False + + def _parse_task(self, task): + if "Iteration Refresh" in task.name: + self._iteration_id = task.args.get("Iteration ID") + elif "Model ID" in task.name: + self._model_id = int(task.name.split(":")[1]) + elif "process_name" == task.name: + self._process_pid[task.args.get("name")] = task.pid + + @property + def step_time(self): + return self._iteration_time + self._data_process_time + + @property + def iteration_time(self): + return self._iteration_time + + @property + def iter_max_time(self): + return self._max_time + + @property + def iter_min_time(self): + return self._min_time + + @property + def data_process_time(self): + return self._data_process_time + + @property + def tasks(self): + return self._tasks + + @property + def model_id(self): + return self._model_id + + @property + def iteration_id(self): + return self._iteration_id + + @property + def process_pid(self): + return self._process_pid + + def __len__(self): + return len(self._tasks) + + @property + def start_point(self): + return self._start_point diff --git a/profiler/advisor/common/profiling/op_summary.py b/profiler/advisor/common/profiling/op_summary.py new file mode 100644 index 0000000000..d79439dbad --- /dev/null +++ b/profiler/advisor/common/profiling/op_summary.py @@ -0,0 +1,76 @@ +""" +summary +""" +import logging +from decimal import Decimal +from typing import List, Any + +from profiler.advisor.dataset.profiling.info_collection import OpInfo +from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser +from profiler.advisor.utils.utils import format_excel_title, lazy_property + +logger = logging.getLogger() + + +class OpSummary(ProfilingParser): + """ + op summary + """ + + FILE_PATTERN = r"^op_summary_[_\d]+\.csv$" + FILE_PATTERN_MSG = "op_summary_*.csv" + FILE_INFO = "op summary" + STATIC_OP_STATE = "static" + DYNAMIC_OP_STATE = "dynamic" + + def __init__(self, path: str) -> None: + super().__init__(path) + self.op_list: List[OpInfo] = [] + self._total_task_duration = 0.0 + self._total_task_wait_time = 0.0 + self._raw_data: List[List[str]] = [] + + def parse_from_file(self, file: str): + if not self._parse_csv(file): + return False + title_dict = dict(enumerate(self._raw_data[0])) + for op_data in self._raw_data[1:]: + op_info = OpInfo() + for idx, value in enumerate(op_data): + title = title_dict.get(idx, "") + formatted_title = format_excel_title(title) + if formatted_title == 'task_start_time' and 'us' in title and \ + value.replace('.', '').replace("E+", "").isnumeric(): + value = str(Decimal(value) * Decimal(1000)) + op_info.add_attr(formatted_title, value) + self.op_list.append(op_info) + self._total_task_duration += self.get_float(op_info.get_attr("task_duration")) + self._total_task_wait_time += self.get_float(op_info.get_attr("task_wait_time")) + if not self.op_list: + logger.error("No valid op info in %s", file) + return False + return True + + def get_static_shape_operators(self) -> List[Any]: + return [op_info.get_attr("op_name") for op_info in self.op_list if op_info.get_attr("op_state") == self.STATIC_OP_STATE] + + def get_total_task_duration(self): + """ + get total task duration of all operators + :return: + """ + return self._total_task_duration + + @lazy_property + def task_dict(self): + """ + task dict + """ + task_dict = {} + for op_info in self.op_list: + if op_info.op_name not in task_dict: + task_dict[op_info.op_name] = [op_info] + else: + task_dict[op_info.op_name].append(op_info) + + return task_dict diff --git a/profiler/advisor/common/profiling/tasktime.py b/profiler/advisor/common/profiling/tasktime.py new file mode 100644 index 0000000000..3ce09a7838 --- /dev/null +++ b/profiler/advisor/common/profiling/tasktime.py @@ -0,0 +1,75 @@ +""" +task time +""" +import logging +from typing import Dict, List + +from profiler.advisor.dataset.profiling.info_collection import TaskInfo +from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser + +logger = logging.getLogger() + +AICPU_TASK_TYPE = "AI_CPU" +AICORE_TASK_TYPE = "AI_CORE" + + +class TaskTime(ProfilingParser): + """ + task time info + """ + + FILE_PATTERN = r"^task_time_[_\d]+\.json$" + FILE_PATTERN_MSG = "task_time*.json" + FILE_INFO = "task time" + + def __init__(self, path: str) -> None: + super().__init__(path) + self._tasks: List[TaskInfo] = [] + self._aicore_tasks: List[TaskInfo] = [] + self._aicpu_tasks: List[TaskInfo] = [] + self._process_map: Dict[str, str] = {} + self._pid_map: Dict[str, str] = {} + + def get_aicpu_tasks(self): + """ + get aicpu tasks + :return: aicpu tasks + """ + return self._aicpu_tasks + + def get_aicore_tasks(self): + """ + get aicore tasks + :return: aicore tasks + """ + return self._aicore_tasks + + def parse_from_file(self, file: str): + if not self._parse_json(file): + return False + for item in self._raw_data: + if item.get("ph") != "M": # header + continue + if item.get("name") != "process_name": + continue + pid = item.get("pid") + pname = item["args"]["name"] + self._process_map[pid] = pname + self._pid_map[pname] = pid + for item in self._raw_data: + if item.get("ph") == "M": # header + continue + task = TaskInfo(item) + self._tasks.append(task) + if task.pid != self._pid_map.get("Task Scheduler"): + continue + if task.task_type == AICORE_TASK_TYPE: + self._aicore_tasks.append(task) + elif task.task_type == AICPU_TASK_TYPE: + self._aicpu_tasks.append(task) + self._aicore_tasks.sort(key=lambda x: x.start_time) + self._aicpu_tasks.sort(key=lambda x: x.start_time) + if not self._tasks: + logger.error("No valid task info in %s", file) + return False + return True diff --git a/profiler/advisor/config/profiling_data_version_config.yaml b/profiler/advisor/config/profiling_data_version_config.yaml new file mode 100644 index 0000000000..45f4b5c0f7 --- /dev/null +++ b/profiler/advisor/config/profiling_data_version_config.yaml @@ -0,0 +1,80 @@ +versions: + - version: 8.0.0 + dirs_pattern: + ^PROF_\d{6}_\d{17}_\w+$: + mindstudio_profiler_output: + [ op_summary, msprof ] + class_attr: + op_summary: OpSummary + msprof: Msprof + file_attr: + op_summary: ^op_summary_\d{14}\.csv$ + msprof: ^msprof_\d{14}\.json$ + + - version: 7.0.0 + dirs_pattern: + ^PROF_\d{6}_\d{17}_\w+$: + ^device_\d+$: + summary: + [ op_summary ] + timeline: + [ msprof, task_time ] + host: + sqlite: + [ ge_info ] + class_attr: + op_summary: OpSummary + task_time: TaskTime + msprof: Msprof + ge_info: GeInfo + file_attr: + op_summary: ^op_summary_\d+_\d+_\d{14}\.csv$ + task_time: ^task_time_\d+_\d+_\d{14}\.json$ + msprof: ^msprof_\d+_\d+_\d{14}\.json$ + ge_info: ge_info.db + + - version: 7.0.RC1 + dirs_pattern: + ^PROF_\d{6}_\d{17}_\w+$: + ^device_\d+$: + summary: + [ op_summary ] + timeline: + [ msprof, task_time ] + host: + sqlite: + [ ge_info ] + class_attr: + op_summary: OpSummary + task_time: TaskTime + msprof: Msprof + ge_info: GeInfo + file_attr: + op_summary: ^op_summary_\d+_\d+_\d+_\d{14}\.csv$ + task_time: ^task_time_\d+_\d+_\d+_\d{14}\.json$ + msprof: ^msprof_\d+_\d+_\d+_\d{14}\.json$ + ge_info: ge_info.db + + - version: 6.3.RC2 + dirs_pattern: + ^PROF_\d{6}_\d{17}_\w+$: + ^device_\d+$: + summary: + [ op_summary ] + timeline: + [ msprof, task_time ] + host: + sqlite: + [ ge_info ] + class_attr: + op_summary: OpSummary + task_time: TaskTime + msprof: Msprof + ge_info: GeInfo + file_attr: + op_summary: ^op_summary_\d+_\d+_\.csv$ + task_time: ^task_time_\d+_\d+_\.json$ + msprof: ^msprof_\d+_\d+_\.json$ + ge_info: ge_info.db + + diff --git a/profiler/advisor/dataset/cluster/cluster_dataset.py b/profiler/advisor/dataset/cluster/cluster_dataset.py index 4db50464ef..b8daedab08 100644 --- a/profiler/advisor/dataset/cluster/cluster_dataset.py +++ b/profiler/advisor/dataset/cluster/cluster_dataset.py @@ -18,8 +18,6 @@ class ClusterDataset(Dataset): def __init__(self, collection_path, data: dict, **kwargs) -> None: super().__init__(collection_path, data) - if not self.is_cluster_analysis_output_exist(): - self.cluster_analyze() def is_cluster_analysis_output_exist(self): """ @@ -34,6 +32,8 @@ class ClusterDataset(Dataset): return False def cluster_analyze(self): + if self.is_cluster_analysis_output_exist(): + return parameter = { Constant.COLLECTION_PATH: self.collection_path, Constant.ANALYSIS_MODE: "all" @@ -70,6 +70,7 @@ class ClusterStepTraceTimeDataSet(ClusterDataset): super().__init__(collection_path, data) def _parse(self): + self.cluster_analyze() step_data = self.load_csv_data(const.CLUSTER_STEP_TIME_CSV, ClusterStepTraceTimeBean) self._step_dict = self.formate_data(step_data) return True @@ -118,6 +119,7 @@ class ClusterCommunicationDataSet(ClusterDataset): return round(dividend / divisor, 4) def _parse(self): + self.cluster_analyze() communication_json = self.load_json_data(const.CLUSTER_COMM_JSON) self.process(communication_json) return True diff --git a/profiler/advisor/dataset/profiling/__init__.py b/profiler/advisor/dataset/profiling/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/dataset/profiling/builder_base.py b/profiler/advisor/dataset/profiling/builder_base.py new file mode 100644 index 0000000000..2bfe14f946 --- /dev/null +++ b/profiler/advisor/dataset/profiling/builder_base.py @@ -0,0 +1,39 @@ +""" +profiling base +""" +import logging +from typing import Dict, List + +from profiler.advisor.dataset.profiling.profiling_parser import ProfilingParser +from profiler.advisor.utils.utils import join_prof_path + +logger = logging.getLogger() + + +class ProfilingBuilderBase: + """ + profiling base + """ + DATA_LIST: List[Dict] = [] + + def __init__(self, path) -> None: + self._path = path + + def parse_data(self) -> bool: + """ + parse data for file in data_dir + """ + if isinstance(self, ProfilingParser): + return True + ret = False + for data in self.DATA_LIST: + class_name = data.get("class_name") + if class_name is not None: + if data.get("subdir_name"): + data_class = data.get("class_name")(join_prof_path(self._path, data.get("subdir_name"))) + else: + data_class = data.get("class_name")(self._path) + if data_class.parse_data(): + setattr(self, str(data.get("attr_name")), data_class) + ret = True + return ret diff --git a/profiler/advisor/dataset/profiling/db_manager.py b/profiler/advisor/dataset/profiling/db_manager.py new file mode 100644 index 0000000000..c9fb73c7cf --- /dev/null +++ b/profiler/advisor/dataset/profiling/db_manager.py @@ -0,0 +1,70 @@ +""" +connection manager +""" +import os +import re +from typing import List + +from sqlalchemy import MetaData, create_engine + + +class ConnectionManager: + """ + Connection Manager + """ + + def __init__(self, path, db_name): + self.db_path = os.path.join(path, db_name) + self.connection = create_engine(f'sqlite:///{self.db_path}') + self.metadata = MetaData() + self.metadata.reflect(bind=self.connection) + + def __call__(self, *args, **kwargs): + return self.connection + + @staticmethod + def check_db_exists(db_path:str, dbs:List) -> bool: + """ + check db exists + """ + if not os.path.isdir(db_path): + return False + for prof_db in dbs: + if not os.access(db_path, os.R_OK) or prof_db not in os.listdir(db_path): + return False + return True + + def check_table_exists(self, tables:List) -> bool: + """ + check table exists + """ + for table in tables: + if table not in self.metadata.tables: + return False + return True + + def check_column_exists(self, table_name:str, columns:List) -> bool: + """ + check column exists + """ + if table_name not in self.metadata.tables: + return False + for column in columns: + if column not in self.metadata.tables[table_name].columns: + return False + return True + + @classmethod + def get_connection(cls, path, dbs, tables=None, is_host=False): + """ + get connection + """ + if is_host: + pattern = r"/device_[0-9]" + path = re.sub(pattern, "/host", path) + if not cls.check_db_exists(path, dbs): + return None + conn = cls(path, dbs) + if tables and not conn.check_table_exists(tables): + return None + return conn diff --git a/profiler/advisor/dataset/profiling/device_info.py b/profiler/advisor/dataset/profiling/device_info.py new file mode 100644 index 0000000000..b58930777f --- /dev/null +++ b/profiler/advisor/dataset/profiling/device_info.py @@ -0,0 +1,61 @@ +""" +profiling info +""" +import json +import logging + +from profiler.advisor.config.config import Config +from profiler.advisor.utils.utils import get_file_path_from_directory + +logger = logging.getLogger() + + +class DeviceInfoParser: + """ + profiling info + device_id device 名称信息 + "aiv_num" ai vector 个数 + "ai_core_num" aicore 个数 + """ + DATA_LIST = [] + + def __init__(self, path) -> None: + self._path = path + + def parse_data(self) -> bool: + """ + parse profiling data + :return: true for success or false + """ + file_list = get_file_path_from_directory(self._path, lambda x: x.startswith("info.json.")) + if not file_list: + return False + for info in file_list: + if self._parse(info): + return True + return False + + @staticmethod + def _parse(info_file: str) -> bool: + if info_file.endswith("done"): + return False # skip info.json.0.done + try: + with open(info_file, encoding="utf-8") as file: + info = json.load(file) + except (IOError, ValueError) as error: + logger.error("Parse json info file %s failed : %s", info_file, error) + return False + if "DeviceInfo" not in info: + logger.error("No device info in json info file %s", info_file) + return False + config = Config() + for device_info in info["DeviceInfo"]: + if "id" in device_info: + config.set_config("device_id", device_info["id"]) + if "aiv_num" in device_info: + config.set_config("aiv_num", device_info["aiv_num"]) + if "ai_core_num" in device_info: + config.set_config("ai_core_num", device_info["ai_core_num"]) + return True + logger.error("No ai_core_num in json info file %s", info_file) + return False diff --git a/profiler/advisor/dataset/profiling/info_collection.py b/profiler/advisor/dataset/profiling/info_collection.py new file mode 100644 index 0000000000..b1f84313bb --- /dev/null +++ b/profiler/advisor/dataset/profiling/info_collection.py @@ -0,0 +1,270 @@ +""" +profiling info +""" +import decimal +import logging + +from profiler.advisor.utils.utils import lazy_property + +logger = logging.getLogger() + + +class Info: + """ + op info + """ + _attr_pre_fix_list = [""] + + def add_attr(self, key: str, value: str): + """ + add attr to op info + :param key: op info key + :param value: op info value + :return: None + """ + if not key or hasattr(self, key): + return + setattr(self, key, value) + + def has_attr(self, key: str, strict_mode=False): + """ + check if op info has attr key + :param key: attr key + :return: true or false + """ + if strict_mode: + return hasattr(self, key) + for prefix in self._attr_pre_fix_list: + attr = prefix + key + if hasattr(self, attr): + return True + return False + + def get_attr(self, key, strict_mode=False): + """ + get attr value by key + :param key: attr key + :return: attr value + """ + if strict_mode: + if hasattr(self, key): + return getattr(self, key) + else: + for prefix in self._attr_pre_fix_list: + attr = prefix + key + if key.startswith("mac") and prefix == "aiv_": + # e.g mac_ratio must match aic_mac_ratio, not aiv_mac_ratio + continue + if key.startswith("vec") and prefix == "aic_": + # e.g vec_ratio must match aiv_vec_ratio, not aic_vec_ratio + continue + if hasattr(self, attr): + return getattr(self, attr) + return "" + + def get_float_attr(self, attr, strict_mode=False): + """ + get attr value by key + :param key: attr key + :return: attr value + """ + try: + return float((self.get_attr(attr, strict_mode))) + except (ValueError, FloatingPointError): + pass + return 0 + + def get_decimal_attr(self, attr, strict_mode=False): + """ + get attr value by key + :param key: attr key + :return: attr value + """ + try: + return decimal.Decimal((self.get_attr(attr, strict_mode))) + except (ValueError, decimal.InvalidOperation): + pass + return decimal.Decimal(0) + + def get_attrs(self) -> dict: + """ + get attr list + :return: attr list + """ + return self.__dict__ + + +class OpInfo(Info): + """ + summary info + """ + + _attr_pre_fix_list = ["", "aic_", "aiv_"] + _mac_ratio_attrs = ["mac_ratio", "mac_fp16_ratio", "mac_int8_ratio", "aic_mac_ratio"] + _aicore_time_key = ["aicore_time", "aiv_time"] + _total_cycles_key = ["total_cycles", "aic_total_cycles", "aiv_total_cycles"] + + def __lt__(self, other): + return self.get_float_attr("task_start_time") < other.get_float_attr("task_start_time") + + @lazy_property + def is_cube_op(self) -> bool: + """ + check type of operator if cube or not + """ + for attr in self._mac_ratio_attrs: + if hasattr(self, attr): + try: + if float(getattr(self, attr)) > 0: + if hasattr(self, "ffts_type") and getattr(self, "ffts_type") == "1": + logger.warning( + "ffts type of op %s is vector buf mac ratio is not 0", getattr(self, "op_name") + ) + return True + except ValueError: + pass + # not cube op + if hasattr(self, "ffts_type") and getattr(self, "ffts_type") == "0": + logger.warning("ffts type of op %s is cube but mac ratio is 0", getattr(self, "op_name")) + return False + + @lazy_property + def has_mac_ratio(self) -> bool: + """ + check if op_info has mac ratio + """ + for attr in self._mac_ratio_attrs: + if attr in self.__dict__: + return True + return False + + def attr_sum(self, attr_list): + """sum of a list attrs""" + total = 0 + for attr in attr_list: + total += self.get_float_attr(attr, strict_mode=True) + return total + + def get_aicore_time(self): + """ + get sum of aicore time and ai vector core time + """ + return self.attr_sum(self._aicore_time_key) + + def get_total_cycles(self): + """ + get sum of total cycle for aicore and ai vector core + """ + return self.attr_sum(self._total_cycles_key) + + +class TaskInfo: + """ + task info + """ + EVENT_TYPE = {"metadata": ['M'], "duration": ['B', 'E'], "complete": ['X'], 'flow': ['s', 't', 'f']} + + def __init__(self, content: dict) -> None: + self._name = content.get("name", "") + self._pid = content.get("pid", 0) + self._tid = content.get("tid", 0) + self._start_time = float(content.get("ts", 0.0)) + self._dur = float(content.get("dur", 0.0)) + self._args = content.get("args", {}) + self._cat = content.get("cat", "") + self._id = content.get("id", "") + + @property + def pk_id(self): + """ + get id + :return: id + """ + return self._id + + @property + def pid(self): + """ + get pid + :return: pid + """ + return self._pid + + @property + def tid(self): + """ + get tid + :return: tid + """ + return self._tid + + @property + def task_type(self): + """ + get pid + :return: pid + """ + return self._args.get("Task Type", "NA") + + @property + def start_time(self): + """ + get starttime + :return: starttime + """ + return self._start_time + + @property + def end_time(self): + """ + get endtime + :return: endtime + """ + return self._start_time + self._dur + + @property + def dur(self): + """ + get duration + :return: duration + """ + return self._dur + + @property + def name(self): + """ + get task name + :return: task name + """ + return self._name + + @property + def stream_id(self): + """ + get stream_id + :return: steram id + """ + return self._args.get("Stream Id", "NA") + + @property + def task_id(self): + """ + get task id + :return: task_id + """ + return self._args.get("Task Id", "NA") + + @property + def args(self): + """ + get args of task + :return: args + """ + return self._args + + @property + def cat(self): + """ + get category of task + """ + return self._cat diff --git a/profiler/advisor/dataset/profiling/profiling_dataset.py b/profiler/advisor/dataset/profiling/profiling_dataset.py new file mode 100644 index 0000000000..836f30905f --- /dev/null +++ b/profiler/advisor/dataset/profiling/profiling_dataset.py @@ -0,0 +1,76 @@ +import logging +import os + +import yaml +from profiler.advisor.common import constant +from profiler.advisor.common.profiling.ge_info import GeInfo +from profiler.advisor.common.profiling.msprof import Msprof +from profiler.advisor.common.profiling.op_summary import OpSummary +from profiler.advisor.common.profiling.tasktime import TaskTime +from profiler.advisor.dataset.dataset import Dataset +from profiler.advisor.dataset.profiling.device_info import DeviceInfoParser +from profiler.advisor.utils.utils import join_prof_path + + +logger = logging.getLogger() + + +class ProfilingDataset(Dataset): + PROF_TYPE = "" + + def __init__(self, collection_path, data: dict, **kwargs) -> None: + self.cann_version = kwargs.get("cann_version", constant.DEFAULT_CANN_VERSION) + self.PROF_TYPE = kwargs.get("profiling_type", constant.DEFAULT_PROFILING_TYPE) + self.patterns = self.parse_pattern() + self.current_version_pattern = self.get_current_version_pattern() + super().__init__(collection_path, data) + + def _parse(self): + info = DeviceInfoParser(self.collection_path) + if info.parse_data(): + self._info = info + ret = False + if self.current_version_pattern is not None: + self.build_from_pattern(self.current_version_pattern["dirs_pattern"], self.collection_path) + ret = True + + return ret + + def build_from_pattern(self, dirs_pattern, current_path): + if isinstance(dirs_pattern, dict): + for key, value in dirs_pattern.items(): + self.build_from_pattern(value, join_prof_path(current_path, key)) + elif isinstance(dirs_pattern, list): + for item in dirs_pattern: + data_class = globals()[self.current_version_pattern.get('class_attr').get(item)] + data_class.FILE_PATTERN = self.current_version_pattern.get('file_attr').get(item) + data_object = data_class(current_path) + data_object.parse_data() + setattr(self, item, data_object) + else: + logger.warning(f"Unsupported arguments : %s to build %s", dirs_pattern, self.__class__.__name__) + + def get_current_version_pattern(self): + for version_config_dict in self.patterns['versions']: + if version_config_dict['version'] == self.cann_version: + return version_config_dict + return dict() + + def parse_pattern(self, config_path="config/profiling_data_version_config.yaml"): + + if not os.path.isabs(config_path): + config_path = os.path.join(os.path.dirname(__file__), + "../", "../", config_path) + + if not os.path.exists(config_path): + logger.warning("Skip parse profiling dataset, because %s does not exist.", config_path) + return [] + + with open(config_path, 'r') as f: + patterns = yaml.safe_load(f) + + return patterns + + def collection_path(self): + """collection_path""" + return self.collection_path diff --git a/profiler/advisor/dataset/profiling/profiling_parser.py b/profiler/advisor/dataset/profiling/profiling_parser.py new file mode 100644 index 0000000000..bb4caeb29e --- /dev/null +++ b/profiler/advisor/dataset/profiling/profiling_parser.py @@ -0,0 +1,132 @@ +import csv +import json +import os +import re +from typing import List, Dict + +from profiler.advisor.dataset.profiling.info_collection import logger +from profiler.advisor.utils.utils import get_file_path_from_directory, SafeOpen, format_excel_title + + +class ProfilingParser: + """ + profiling + """ + FILE_PATTERN = "" + FILE_PATTERN_MSG = "" + FILE_INFO = "" + FILE_PATH = "" + + def __init__(self, path: str) -> None: + self._path = path + self._raw_data: List[List[str]] = [] + self._filename = "" + + @staticmethod + def file_match_func(pattern): + """file match function""" + return lambda x: re.search(re.compile(pattern), x) + + def parse_data(self) -> bool: + """ + pase task time file + :return: true or false + """ + if self._parse_from_file(): + return True + return False + + def _parse_from_file(self): + file_list = get_file_path_from_directory(self._path, self.file_match_func(self.FILE_PATTERN)) + if not file_list: + return False + ## get last file + file = file_list[-1] + self.FILE_PATH = file + if len(file_list) > 1: + logger.warning("Multiple copies of %s were found, use %s", self.FILE_INFO, file) + return self.parse_from_file(file) + + @staticmethod + def get_float(data) -> float: + """ + get float or 0.0 + """ + try: + return float(data) + except (FloatingPointError, ValueError): + return 0.0 + + def parse_from_file(self, file): + """ + parse from file + """ + return False + + @staticmethod + def _check_csv_file_format(csv_file_name: str, csv_content: List[List[str]]): + if not csv_content: + logger.error("%s is empty", csv_file_name) + return False + return True + + def _parse_csv(self, file, check_csv=True) -> bool: + logger.debug("Parse file %s", file) + self._filename = os.path.splitext(os.path.basename(file))[0] + with SafeOpen(file, encoding="utf-8") as csv_file: + try: + csv_content = csv.reader(csv_file) + for row in csv_content: + self._raw_data.append(row) + if check_csv and not self._check_csv_file_format(file, self._raw_data): + logger.error("Invalid csv file : %s", file) + return False + except OSError as error: + logger.error("Read csv file failed : %s", error) + return False + + if not csv_file: + return False + if not self._raw_data: + logger.warning("File %s has no content", file) + return False + return True + + def _parse_json(self, file) -> bool: + logger.debug("Parse file %s", file) + self._filename = os.path.splitext(os.path.basename(file))[0] + try: + with open(file, encoding="utf-8") as json_file: + self._raw_data = json.load(json_file) + except (OSError, ValueError) as error: + logger.error("Parse json file %s failed : %s", file, error) + return False + return True + + def get_raw_data(self): + """ + get raw file name and data + """ + return self._filename, self._raw_data + + @staticmethod + def _get_csv_title(data: List, number=0, title_index=0): + """ + number = 0 replace (us) (ns).. + other replace " " to "_" + title_index: position of title default 0 + """ + title_dict: Dict[int, str] = {} + for idx, title in enumerate(data[title_index]): + if number == 0: + title_dict[idx] = format_excel_title(title) + else: + title_dict[idx] = title.replace(" ", "_") + return title_dict + + @property + def path(self): + """ + path + """ + return self._path diff --git a/profiler/advisor/display/html/templates/main.html b/profiler/advisor/display/html/templates/main.html index f1703c7d8c..251961d79d 100644 --- a/profiler/advisor/display/html/templates/main.html +++ b/profiler/advisor/display/html/templates/main.html @@ -72,7 +72,7 @@ table { width: 100%; - table-layout: auto; + table-layout: fixed; border-collapse: collapse; margin-top: 2px; margin-bottom: 5px; @@ -82,7 +82,7 @@ padding: 10px; word-wrap: break-word; word-break: break-all; - white-space: nowrap; + white-space: normal; border: 1px solid rgb(170, 169, 169); text-align: left; } @@ -140,7 +140,7 @@ {% for key, renders in render_list.items() %} {% if key == 'operator'%}
-

Profiling Operator Issues

+

computation

{% for render in renders %} {{render|safe}} @@ -159,7 +159,7 @@ {% endif %} {% endfor %}
diff --git a/profiler/advisor/display/html/templates/operator_ai_cpu.html b/profiler/advisor/display/html/templates/operator_ai_cpu.html new file mode 100644 index 0000000000..b3235a8802 --- /dev/null +++ b/profiler/advisor/display/html/templates/operator_ai_cpu.html @@ -0,0 +1,61 @@ +
+

AICPU Issues

+
+ + + + + + + + + + + + + +
DescriptionSuggestionElapsed Time(us)Time Ratio
{{ format_result.record.optimization_item.description|safe }}{{ format_result.suggestion|safe }}{{ format_result.task_duration|safe }}{{ format_result.record.statistics_item.task_duration_ratio|safe }}
+
+ {% for op_type, op_info in format_result.statistic %} +
{{ op_type|safe }}
+
+ + + + + + + + + + + +
Operator TypeCountsElapsed Time(us)
{{ op_info.summary.op_type|safe }}{{ op_info.summary.counts|safe }}{{ op_info.summary.total_duration|safe }}
+
+ {% for trace_stack, info in op_info.op_info_list %} +
+ {{ info.summary.op_type|safe }} | Input DType:({{info.op_info_list[0].input_data_types|safe}}) | Output DType:({{info.op_info_list[0].output_data_types|safe}}) | Counts:{{ info.summary.counts|safe}} | Elapsed Time(us):{{ + info.summary.total_duration|safe}} +
+
+ {% if info.op_info_list[0].suggestions|length > 0 %} +
+ {% for suggestion in info.op_info_list[0].suggestions %} +

+ Suggestion {{ loop.index|safe }}: {{suggestion|safe}} +

+ {% endfor %} +
+ {% else %} +

Suggestion 1: Modify code to avoid AICPU operator

+ {% endif %} +
+ {{ info.op_info_list[0].stack_info|safe }} +
+ {% endfor %} +
+
+ {% endfor %} +
+
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/operator_block_dim.html b/profiler/advisor/display/html/templates/operator_block_dim.html new file mode 100644 index 0000000000..4e2c832f62 --- /dev/null +++ b/profiler/advisor/display/html/templates/operator_block_dim.html @@ -0,0 +1,38 @@ +
+

Block Dim Issues

+
+ + + + + + + + + + + + + +
DescriptionSuggestionElapsed Time(us)Time Ratio
{{ format_result.record.optimization_item.description|safe }}{{ format_result.suggestion|safe }}{{ format_result.task_duration|safe }}{{ format_result.record.statistics_item.task_duration_ratio|safe }}
+
+ {% for op_type, op_info in format_result.statistic %} +
{{ op_type|safe }}
+
+ + + + + + + + + + + +
Operator TypeCountsElapsed Time(us)
{{ op_info.summary.op_type|safe }}{{ op_info.summary.counts|safe }}{{ op_info.summary.total_duration|safe }}
+
+ {% endfor %} +
+
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/operator_dynamic_shape.html b/profiler/advisor/display/html/templates/operator_dynamic_shape.html new file mode 100644 index 0000000000..59920b6c9e --- /dev/null +++ b/profiler/advisor/display/html/templates/operator_dynamic_shape.html @@ -0,0 +1,15 @@ +
+

Operator Dynamic Shape Issues

+
+ + + + + + + + + +
DescriptionSuggestion
{{ format_result.record.optimization_item.description|safe }}{{ format_result.suggestion|safe }}
+
+
\ No newline at end of file diff --git a/profiler/advisor/display/html/templates/operator_no_bound.html b/profiler/advisor/display/html/templates/operator_no_bound.html new file mode 100644 index 0000000000..cfbd20baad --- /dev/null +++ b/profiler/advisor/display/html/templates/operator_no_bound.html @@ -0,0 +1,38 @@ +
+

Operator No Bound Issues

+
+ + + + + + + + + + + + + +
DescriptionSuggestionElapsed Time(us)Time Ratio
{{ format_result.record.optimization_item.description|safe }}{{ format_result.suggestion|safe }}{{ format_result.task_duration|safe }}{{ format_result.record.statistics_item.task_duration_ratio|safe }}
+
+ {% for op_type, op_info in format_result.statistic %} +
{{ op_type|safe }}
+
+ + + + + + + + + + + +
Operator TypeCountsElapsed Time(us)
{{ op_info.summary.op_type|safe }}{{ op_info.summary.counts|safe }}{{ op_info.summary.total_duration|safe }}
+
+ {% endfor %} +
+
+
\ No newline at end of file diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index c920ad4ea2..ebe20baa2d 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -1,6 +1,7 @@ from collections import OrderedDict import os +from profiler.advisor.analyzer.computation.profiling_analyzer import ProfilingAnalyzer from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer from profiler.advisor.common.analyzer_scopes import SupportedScopes from profiler.advisor.utils.utils import Timer @@ -13,12 +14,14 @@ class Interface: "schedule": OrderedDict({ SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer }), - "computation": OrderedDict(), + "computation": OrderedDict({ + SupportedScopes.PORFILING_OPERATOR_ANALYSIS: ProfilingAnalyzer + }), "communication": OrderedDict(), "overall": OrderedDict(), "dataloader": OrderedDict(), "cluster": OrderedDict({ - SupportedScopes.SKOW_RANK: SlowRankAnalyzer, + SupportedScopes.SLOW_RANK: SlowRankAnalyzer, SupportedScopes.SLOW_LINK: SlowLinkAnalyzer }) } diff --git a/profiler/advisor/rules/aicpu_rules.yaml b/profiler/advisor/rules/aicpu_rules.yaml new file mode 100644 index 0000000000..053f4150e8 --- /dev/null +++ b/profiler/advisor/rules/aicpu_rules.yaml @@ -0,0 +1,107 @@ +DataTypeSuggeation: &DataTypeSuggeation "Data type {} in {} operator may cause AICPU issues, Try to convert to {} if possible." + +CommonChecker: + - DataTypeChecker: + cann_version: 7.0.RC1 + op_type: [ __ALL__ ] + ignore_type: [ cast, tensorequal, equal, nonzero, mul ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: 7.0.RC1 + op_type: [ cast ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: 7.0.RC1 + op_type: [ tensorequal ] + input: [ float, float32, float16, bool, int32, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: 7.0.RC1 + op_type: [ equal ] + input: [ float, float32, float16, bool, int32, int64, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: 7.0.RC1 + op_type: [ nonzero ] + input: [ float16, bool, dt_bf16 ] + output: [ int64 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: 7.0.RC1 + op_type: [ mul ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: 7.0.0 + op_type: [ __ALL__ ] + ignore_type: [ cast, tensorequal, equal, nonzero, mul ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] + output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: 7.0.0 + op_type: [ cast ] + input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: 7.0.0 + op_type: [ tensorequal ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: 7.0.0 + op_type: [ equal ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8 ] + output: [ bool ] + suggestion: *DataTypeSuggeation + + - DataTypeChecker: + cann_version: 7.0.0 + op_type: [ mul ] + input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] + output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] + suggestion: *DataTypeSuggeation + +ExampleGuideChecker: + - IndexPutChecker: + op_type: [index] + url: "https://wiki.huawei.com/domains/41510/wiki/76339/WIKI202311152358721?title=Index" + suggestion: 'Please modify source code followed by this LINK, try to replace index operator with equivalent operator.' + + - NonzeroChecker: + op_type: [ indexput, indexputv2 ] + url: "https://wiki.huawei.com/domains/41510/wiki/76339/WIKI202311152358721?title=IndexPut" + suggestion: 'Please modify source code followed by this LINK, try to replace indexput operator with equivalent operator.' + + - CastChecker: + op_type: [ argmin ] + url: "https://wiki.huawei.com/domains/41510/wiki/76339/WIKI202311152358721?title=ArgMin" + suggestion: 'Please update your cann-tookit to at least 7.0.RC1 version by this LINK.' + + - CastChecker: + op_type: [ unique ] + url: "https://wiki.huawei.com/domains/41510/wiki/76339/WIKI202311152358721?title=unique" + suggestion: 'Please modify source code followed by this LINK, try to replace unique operator with equivalent operator.' + + - CastChecker: + op_type: [ nonzero ] + url: "https://wiki.huawei.com/domains/41510/wiki/76339/WIKI202311152358721?title=unique" + suggestion: 'Please modify source code followed by this LINK, try to replace nonzero operator with equivalent operator.' \ No newline at end of file diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index 1a4444f1ec..f338fc7dd4 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -70,9 +70,9 @@ def singleton(cls): :param cls: any class :return: singleton handle - When using the singleton function, you need to manually specify arg='dataSet_path'. Otherwise, the singleton function + When using the singleton function, you need to manually specify collection_path='dataSet_path'. Otherwise, the singleton function is initialized by class name. - if cls has 'arg' property, _instance map will build by class_name and 'arg', the default value of + if cls has 'collection_path' property, _instance map will build by class_name and 'collection_path', the default value of collection path is class absolute path. _instance = {cls.name: {collection_path: instance}} @@ -277,7 +277,8 @@ def load_parameter(parameter, default): def get_supported_subclass(clazz: VersionControl.__class__, cann_version: str): """ - Returns a list of subclasses that support the specified version + Returns a list of subclasses that support the specified version, because of the __subclasses__(), + you need to import the all subclass first :param clazz: Class name which is extends to VersionControl.__class__ :param cann_version: The CANN software version :return: The list of subclasses that support the specified CANN version diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index d9b5b10da7..1164a78cd1 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -53,7 +53,7 @@ advisor_cli.add_command(compare_cli, name="compare") if __name__ == '__main__': advisor_cli.main( - ["analyze", "schedule", "-d", - r"/home/ma-user/work/profiling", + ["analyze", "all", "-d", + r"C:/xxx/profiling_data", ] ) diff --git a/profiler/test/tools/__init__.py b/profiler/test/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/test/tools/tool.py b/profiler/test/tools/tool.py new file mode 100644 index 0000000000..ee4b6f9bb1 --- /dev/null +++ b/profiler/test/tools/tool.py @@ -0,0 +1,38 @@ +import os +import re +import shutil +import shlex +from subprocess import Popen, PIPE + + +def delete_file(pattern, work_path): + file_list = os.listdir(work_path) + for file_name in file_list: + if re.match(pattern, file_name): + + os.remove(os.path.join(work_path, file_name)) + + +def recover_env(work_path="./"): + if os.path.exists("./log"): + shutil.rmtree("./log") + + if os.path.exists("./tune_ops_file.cfg"): + os.remove("./tune_ops_file.cfg") + + delete_file(r"ma_advisor_+", work_path) + + +def run_command(cmd): + # Make sure the process output can be displayed on the console + p = Popen(shlex.split(cmd, posix=False), stdout=PIPE, bufsize=0, universal_newlines=False) + p.wait() + + +def init_env(): + test_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../", "data", + "asight-0.3.52.dev0+ge3f3b41.d20231111-py3-none-any.whl")) + try: + import asight + except Exception: + run_command(f"pip install {test_dir}") diff --git a/profiler/test/ut/advisor/profiling/__init__.py b/profiler/test/ut/advisor/profiling/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/test/ut/advisor/profiling/test_profiling_analyzer.py b/profiler/test/ut/advisor/profiling/test_profiling_analyzer.py new file mode 100644 index 0000000000..e6c11f09db --- /dev/null +++ b/profiler/test/ut/advisor/profiling/test_profiling_analyzer.py @@ -0,0 +1,42 @@ +import os +import unittest + +from build.lib.profiler.advisor.analyzer.computation.profiling_analyzer import ProfilingAnalyzer +from profiler.advisor.common.constant import CANN_VERSION_C15 +from profiler.advisor.utils.utils import get_supported_subclass +from test.tools.tool import recover_env + + +class TestProfilingAnalyzer(unittest.TestCase): + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + # def test_profiling_optimize_and_make_render(self): + # data_root_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), + # "data/profiling_result", "cann700cnnTest_0001_20240113084145.001_ascend_pt") + # kwargs = { + # "analysis_mode": "profiling", + # "data_dir": data_root_dir + # } + + # data_list = Collector().load(white_list=['timeline_event_dataset', 'profiling_dataset'], **kwargs) + # # 获取所有ProfilingAnalyzerBase支持cann版本的子类 + # classes = get_supported_subclass(ProfilingAnalyzer, CANN_VERSION_C15) + + # results_list = [] + # for analyzer_clazz in classes: + # analyzer = analyzer_clazz(CANN_VERSION_C15) + # results_list.append(analyzer.optimize(data=data_list)) + + # # there has results_list and one problem dynamic shape here + # self.assertTrue(len(results_list) == 1) + + # if hasattr(analyzer, 'html_render'): + # analyzer.html_render.render_html() + # analyzer.html_render.save_to_file(f'ma_advisor_test.html') + + # self.assertTrue(hasattr(analyzer, 'html_render')) + + + diff --git a/profiler/test/ut/advisor/profiling/test_profiling_dataset.py b/profiler/test/ut/advisor/profiling/test_profiling_dataset.py new file mode 100644 index 0000000000..c312c1650e --- /dev/null +++ b/profiler/test/ut/advisor/profiling/test_profiling_dataset.py @@ -0,0 +1,46 @@ +import os +import unittest + +from profiler.advisor.config.config import Config +from profiler.advisor.common import constant +from test.tools.tool import recover_env + + +class TestProfilingDataset(unittest.TestCase): + + def setUp(self): + self.data_root_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), + "data/profiling_result", "cann700cnnTest_0001_20240113084145.001_ascend_pt") + kwargs = { + "analysis_mode": "profiling", + "data_dir": self.data_root_dir + } + self.data_list = Collector().load(white_list=['timeline_event_dataset', 'profiling_dataset'], **kwargs) + + @classmethod + def tearDownClass(cls) -> None: + recover_env() + + def test_profiling_dataset_build(self): + # check profiling data dir whether exists. + self.assertTrue(os.path.exists(self.data_root_dir)) + self.assertTrue(self.data_list.__contains__('profiling_dataset_base')) + + self.assertTrue(self.data_list['profiling_dataset_base'][0]) + # check ge_info.db in profiling data. + self.assertTrue(hasattr(self.data_list['profiling_dataset_base'][0], 'ge_info')) + self.assertTrue(len(self.data_list['profiling_dataset_base'][0].ge_info.op_state_info_list) > 0) + # check op_summary in profiling data. + self.assertTrue(hasattr(self.data_list['profiling_dataset_base'][0], 'op_summary')) + self.assertTrue(len(self.data_list['profiling_dataset_base'][0].op_summary.op_list) > 0) + # check task_time in profiling data. + self.assertTrue(hasattr(self.data_list['profiling_dataset_base'][0], 'task_time')) + self.assertTrue(len(self.data_list['profiling_dataset_base'][0].task_time._tasks) > 0) + # check msprof in profiling data. + self.assertTrue(hasattr(self.data_list['profiling_dataset_base'][0], 'msprof')) + self.assertTrue(len(self.data_list['profiling_dataset_base'][0].msprof.tasks) > 0) + + def test_profiling_type(self): + # check profiling type + self.assertTrue(hasattr(self.data_list['profiling_dataset_base'][0], 'PROF_TYPE')) + self.assertTrue(self.data_list['profiling_dataset_base'][0].PROF_TYPE in constant.SUPPORTED_PROFILING_TYPE) diff --git a/profiler/test/ut/advisor/test_utils.py b/profiler/test/ut/advisor/test_utils.py new file mode 100644 index 0000000000..a99b2c2475 --- /dev/null +++ b/profiler/test/ut/advisor/test_utils.py @@ -0,0 +1,49 @@ +import os +import unittest + +from profiler.advisor.analyzer.computation.aicpu.aicpu_checker import AicpuChecker +from profiler.advisor.analyzer.computation.op_compile.dynamic_shape_checker import DynamicShapeChecker +from profiler.advisor.analyzer.computation.bound.block_dim_checker import BlockDimChecker +from profiler.advisor.analyzer.computation.bound.operator_bound_checker import OperatorBoundChecker +from profiler.advisor.analyzer.computation.operator_checker import OperatorChecker +import profiler.advisor.common.constant as constant +from profiler.advisor.utils.utils import get_supported_subclass, singleton + + +@singleton +class SingletonTest: + @property + def timeline_dir(self) : + return self._timeline_dir + + @property + def id(self): + return self._id + + def __init__(self, collection_path=None, **kwargs) -> None: + self._timeline_dir = collection_path + self._id = kwargs.get('id') + + +class TestProfilingAnalyzer(unittest.TestCase): + @classmethod + def test_get_supported_subclass(cls): + clazz = get_supported_subclass(OperatorChecker, constant.DEFAULT_CANN_VERSION) + assert clazz.__contains__(OperatorBoundChecker) + assert clazz.__contains__(AicpuChecker) + assert clazz.__contains__(DynamicShapeChecker) + assert clazz.__contains__(BlockDimChecker) + + @classmethod + def test_singleton(cls): + single1 = SingletonTest(collection_path="data_path_1", id='single1') + single2 = SingletonTest(collection_path="data_path_2", id='single2') + single3 = SingletonTest(collection_path="data_path_1", id='single3') + assert single1.id != single2.id + assert single1.id == single3.id + + + +if __name__ == '__main__': + TestProfilingAnalyzer.test_get_supported_subclass() + TestProfilingAnalyzer.test_singleton() \ No newline at end of file diff --git a/setup.py b/setup.py index 8ee18763b5..0c0306444b 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ extras = { with open('requirements/build.txt', 'r') as f: requires = f.read().splitlines() -with open('requirements/test.txt', 'r') as f: +with open('requirements/tests.txt', 'r') as f: tests_requires = f.read().splitlines() tests_requires.extend(set(requires)) @@ -24,7 +24,7 @@ with open('version.txt', 'r') as f: version = f.read().strip() setup( - name="att-advisor", + name="msprof-analyze", version=version, description="Ascend advisor tools", packages=find_packages(), @@ -35,7 +35,7 @@ setup( tests_require=tests_requires, entry_points=""" [console_scripts] - att-advisor=profiler.cli.entrance:advisor_cli + msprof-analyze=profiler.cli.entrance:advisor_cli """ ) -- Gitee From 33118278a0e2ae381a41aafb4f4ed8bcdcf84d53 Mon Sep 17 00:00:00 2001 From: fanxiaotong Date: Wed, 15 May 2024 11:03:05 +0800 Subject: [PATCH 12/21] framework --- .../computation/npu_fused/__init__.py | 14 + .../npu_fused/compute_advice_base.py | 118 +++++++++ .../computation/npu_fused/csv_analyzer.py | 81 ++++++ .../computation/npu_fused/json_analyzer.py | 55 ++++ .../computation/npu_fused/npu_fused_advice.py | 113 ++++++++ .../computation/npu_fused/npu_slow_advice.py | 124 +++++++++ .../analyzer/computation/npu_fused/op_perf.py | 193 ++++++++++++++ .../overall/overall_summary_analyzer.py | 246 ++++++++++++++++++ profiler/advisor/common/constant.py | 229 ++++++++++++++++ profiler/advisor/common/trace_view_json.py | 209 +++++++++++++++ .../advisor/common/trace_view_preprocessor.py | 208 +++++++++++++++ profiler/advisor/interface/interface.py | 5 +- profiler/cluster_analyse/cluster_analysis.py | 14 +- .../common_func/file_manager.py | 4 +- .../compute_advice/test_npu_slow_advice.py | 12 +- 15 files changed, 1608 insertions(+), 17 deletions(-) create mode 100644 profiler/advisor/analyzer/computation/npu_fused/__init__.py create mode 100644 profiler/advisor/analyzer/computation/npu_fused/compute_advice_base.py create mode 100644 profiler/advisor/analyzer/computation/npu_fused/csv_analyzer.py create mode 100644 profiler/advisor/analyzer/computation/npu_fused/json_analyzer.py create mode 100644 profiler/advisor/analyzer/computation/npu_fused/npu_fused_advice.py create mode 100644 profiler/advisor/analyzer/computation/npu_fused/npu_slow_advice.py create mode 100644 profiler/advisor/analyzer/computation/npu_fused/op_perf.py create mode 100644 profiler/advisor/analyzer/overall/overall_summary_analyzer.py create mode 100644 profiler/advisor/common/trace_view_json.py create mode 100644 profiler/advisor/common/trace_view_preprocessor.py diff --git a/profiler/advisor/analyzer/computation/npu_fused/__init__.py b/profiler/advisor/analyzer/computation/npu_fused/__init__.py new file mode 100644 index 0000000000..8400fd5ecd --- /dev/null +++ b/profiler/advisor/analyzer/computation/npu_fused/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/advisor/analyzer/computation/npu_fused/compute_advice_base.py b/profiler/advisor/analyzer/computation/npu_fused/compute_advice_base.py new file mode 100644 index 0000000000..3916de201b --- /dev/null +++ b/profiler/advisor/analyzer/computation/npu_fused/compute_advice_base.py @@ -0,0 +1,118 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import abstractmethod +from collections import defaultdict +import os + +from profiler.cluster_analyse.common_func.file_manager import FileManager +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.common import constant as const + + +class ComputeAdviceBase(BaseAnalyzer): + ASCEND_PT = 'ascend_pt' + ASCEND_PROFILER_OUTPUT = 'ASCEND_PROFILER_OUTPUT' + KERNEL_DETAIL_FILE = "kernel_details.csv" + TRACE_VIEW_FILE = "trace_view.json" + + def __init__(self, collection_path: str, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, + torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + self.kernel_details_path = "" + self.has_preparse = False + self.preparse_data = defaultdict(list) + self.call_stack = None + self.trace_view_path = "" + + def path_check(self): + """ + check whether input path is valid + """ + if not os.path.exists(self.collection_path): + print("[ERROR] Path: {} is not exist.".format(self.collection_path)) + return False + if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"): + self.kernel_details_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", + "kernel_details.csv") + if not os.path.exists(self.kernel_details_path): + print("[ERROR] kernel_details.csv is not exist in the Path: {}.".format( + os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT"))) + return False + elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path) == "kernel_details.csv": + self.kernel_details_path = self.collection_path + else: + print("[ERROR] Please input ascend_pt or kernel_details.csv") + return False + print("[INFO] Start to analyse the target file: {}".format(self.kernel_details_path)) + self.preparse() + return True + + def has_callstack(self): + if self.call_stack is not None: + return self.call_stack + profiler_info_json_path = "" + for file in os.listdir(self.collection_path): + if file.startswith("profiler_info"): + profiler_info_json_path = os.path.join(self.collection_path, file) + break + if not profiler_info_json_path: + self.call_stack = False + return self.call_stack + self.trace_view_path = os.path.join(self.collection_path, self.ASCEND_PROFILER_OUTPUT, "trace_view.json") + if not os.path.exists(profiler_info_json_path) or not os.path.exists(self.trace_view_path): + self.call_stack = False + return self.call_stack + info = FileManager.read_json_file(profiler_info_json_path) + if not info.get("config") or not info.get("config").get("common_config") \ + or not info.get("config").get("common_config").get("with_stack"): + self.call_stack = False + return self.call_stack + activities = info.get("config").get("common_config").get("activities") + if not activities or "ProfilerActivity.CPU" not in activities: + self.call_stack = False + return self.call_stack + self.call_stack = info.get("config").get("common_config").get("with_stack") + return self.call_stack + + @abstractmethod + def run(self): + """ + analyze profiling data and advice + """ + + def output(self): + """ + output relevant data + """ + self.output_format_data[self.DATA] = self.cur_data + self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck + self.output_format_data[self.ADVICE] = self.cur_advice + + def preparse(self): + if self.has_preparse: + return + + def optimize(self): + pass + + def make_record(self): + """ + make record for what and how to optimize + """ + pass + + def make_render(self): + pass diff --git a/profiler/advisor/analyzer/computation/npu_fused/csv_analyzer.py b/profiler/advisor/analyzer/computation/npu_fused/csv_analyzer.py new file mode 100644 index 0000000000..c85c14d618 --- /dev/null +++ b/profiler/advisor/analyzer/computation/npu_fused/csv_analyzer.py @@ -0,0 +1,81 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import multiprocessing + +import pandas as pd +import numpy as np + +from common_func_advisor.constant import Constant +from .op_perf import OpPerfFactory + + +class CSVAnalyzer: + def __init__(self, path) -> None: + self._path = path + + def process(self): + df = pd.read_csv(self._path, dtype={"Start Time(us)": str}) + # 分析是否存在可融合的算子 + op_type_list = df["Type"].tolist() + duration_list = df["Duration(us)"].tolist() + start_times = df["Start Time(us)"].tolist() + # 去除末尾的\t分隔符 + start_times = [start_time[:-1] for start_time in start_times] + result_list = [] + for pattern in Constant.PATTERN_DICT.keys(): + result_list.extend(self.find_all_sub_lists(op_type_list, duration_list, start_times, pattern)) + data_frame = pd.DataFrame(result_list) + data_frame.columns = ["pattern_name", "pattern", "len", "count", "duration sum(us)", "op durations(us)", + "index", "first_timestamp"] + return data_frame + + @staticmethod + def find_all_sub_lists(op_type_list, duration_list, start_times, expect_sub_list): + # 创建一个空字典,用来存储子列表和它们的出现次数和起始位置 + len_sub_list = len(expect_sub_list) + expect_sub_list = tuple(expect_sub_list) + sublist_dict = {} + # 遍历列表,从每个位置开始,取长度为N的子列表 + for i in range(len(op_type_list) - len_sub_list + 1): + sublist = tuple(op_type_list[i:i + len_sub_list]) + if sublist != expect_sub_list: + continue + # 如果子列表已经在字典中,就增加它的出现次数,否则就初始化为1 + if sublist in sublist_dict: + # count + sublist_dict[sublist][0] += 1 + # index + sublist_dict[sublist][1].append(i) + # total duration + sublist_dict[sublist][2] += sum(duration_list[i:i + len_sub_list]) + # duration + zip_data = zip(sublist_dict[sublist][3], duration_list[i:i + len_sub_list]) + sublist_dict[sublist][3] = [a + b for a, b in zip_data] + else: + sublist_dict[sublist] = [1, [i], sum(duration_list[i:i + len_sub_list]), + duration_list[i:i + len_sub_list], len_sub_list, start_times[i]] + # 创建一个空列表,用来存储所有重复的子列表 + repeated_sublists = [] + for sublist, (count, index, duration_sum, op_durations, sublist_len, first_time) in sublist_dict.items(): + pattern_name = Constant.PATTERN_DICT.get(sublist, "unknown") + op_durations = [round(num, 2) for num in op_durations] + repeated_sublists.append([pattern_name, sublist, sublist_len, count, + duration_sum, op_durations, index, first_time]) + if len(sublist_dict) == 0: + pattern_name = Constant.PATTERN_DICT.get(expect_sub_list, "unknown") + repeated_sublists.append([pattern_name, expect_sub_list, 0, 0, 0, 0, 0, 0]) + # 返回所有重复的子列表 + return repeated_sublists diff --git a/profiler/advisor/analyzer/computation/npu_fused/json_analyzer.py b/profiler/advisor/analyzer/computation/npu_fused/json_analyzer.py new file mode 100644 index 0000000000..fd2a72ffa3 --- /dev/null +++ b/profiler/advisor/analyzer/computation/npu_fused/json_analyzer.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd + +from common_func_advisor.trace_view_json import TraceViewJson + + +class JSONAnalyzer(object): + def __init__(self, path): + self._path = path + + def get_custom_code(self, data: pd.DataFrame, ts_col: str, output_col: str): + trace_json = TraceViewJson(self._path) + callstacks = pd.DataFrame(columns=[output_col]) + + for i, row in data.iterrows(): + if ts_col not in data.columns.tolist(): + print("[ERROR] No {} col found in data columns.".format(ts_col)) + return callstacks + timestamp = row[ts_col] + flow_event = trace_json.get_torch_2_npu_flow_event(timestamp) + if not flow_event.valid(): + print("[ERROR] Get flow event failed for pattern {}.".format(row['pattern'])) + callstacks.loc[i] = "" + continue + flow_event_s_key = flow_event.s_point_ts + python_dur_events = trace_json.get_python_dur_events_contain_ts(flow_event_s_key) + if not python_dur_events: + print("[ERROR] No python dur event found for pattern {}.".format(row['pattern'])) + callstacks.loc[i] = "" + continue + # 保持新老版本callstack兼容性 + if python_dur_events[0].args.get("Call stack"): + # 旧版本 + callstack = python_dur_events[0].args.get("Call stack").split(";") + else: + python_dur_events.sort(key=lambda e: e.ts) + # 新版本 + callstack = [event.name for event in python_dur_events if event.cat == "python_function"] + callstack_str = "\n".join(callstack) + callstacks.loc[i] = callstack_str + return callstacks diff --git a/profiler/advisor/analyzer/computation/npu_fused/npu_fused_advice.py b/profiler/advisor/analyzer/computation/npu_fused/npu_fused_advice.py new file mode 100644 index 0000000000..7ec711d800 --- /dev/null +++ b/profiler/advisor/analyzer/computation/npu_fused/npu_fused_advice.py @@ -0,0 +1,113 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from abc import ABC + +import pandas as pd + +from profiler.advisor.analyzer.computation.npu_fused.compute_advice_base import ComputeAdviceBase +from profiler.advisor.analyzer.computation.npu_fused.csv_analyzer import CSVAnalyzer +from profiler.advisor.analyzer.computation.npu_fused.json_analyzer import JSONAnalyzer +from profiler.advisor.common import constant as const +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord + + +class NpuFusedAdvice(ComputeAdviceBase, ABC, ): + NPU_FUSED_ADVICE = "npu_fused_advice" + + def __init__(self, collection_path: str, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, + torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + self.cur_data = dict() + self.cur_bottleneck = str() + self.cur_advice = str() + self.kernel_details_path = "" + self.call_stack = None + + def run(self): + if not self.path_check(): + return self.output_format_data + self.process() + self.output() + return self.output_format_data + + def process(self): + csv_analyzer = CSVAnalyzer(self.kernel_details_path) + all_pattern_data = csv_analyzer.process() + all_pattern_data = all_pattern_data.sort_values(by='duration sum(us)', ascending=False) + filter_data = all_pattern_data.get(all_pattern_data.get("duration sum(us)", 0) > 0) + if not self.has_callstack(): + print("[Warning] No call stack info found, advice will be incomplete") + self.cur_data = filter_data + else: + json_analyzer = JSONAnalyzer(self.trace_view_path) + custom_code = json_analyzer.get_custom_code(filter_data, "first_timestamp", "custom code") + self.cur_data = pd.concat([filter_data, custom_code], axis=1) + op_num = len(self.cur_data.index) + op_dur = filter_data["duration sum(us)"].sum() + if op_num > 0: + index = 0 + self.cur_bottleneck = f"The computing time of fusable op is {round(op_dur, 2)} ms." + self.cur_advice = "" + for _, row in self.cur_data.iterrows(): + advice = f"Advice {index}:\n" + cur_op = "[" + ", ".join(row.loc["pattern"]) + "]" + npu_fused_op = row.loc["pattern_name"] + advice += f"Replace {cur_op} with {npu_fused_op}. " + if self.call_stack: + advice += f"This pattern first happened in: \n{row['custom code']}" + if index != op_num - 1: + advice += "\n" + index += 1 + self.cur_advice += advice + + def optimize(self): + if not self.path_check(): + return self.output_format_data + self.process() + self.output() + return self.output_format_data + + def make_record(self): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem( + NpuFusedAdvice.NPU_FUSED_ADVICE, + self.bottleneck_str, + self.cur_advices + ) + self.result.add(OptimizeRecord(optimization_item)) + + # self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) + # for data_type, data in self.cur_data.items(): + # if data: + # self.result.add_detail(const.DATA + data_type, self.cur_data_table[data_type]["headers"], self.cur_data_table[data_type]["data"][0]) + + def make_render(self): + result_for_html = { + "Description" : self.cur_bottleneck, + "suggestion" : self.cur_advice, + "details" : [{}] + } + + self.html_render.render_template(key="cluster", + title=NpuFusedAdvice.NPU_FUSED_ADVICE, + template_dir="templates", + template_name="cluster_analysis.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + result=result_for_html) diff --git a/profiler/advisor/analyzer/computation/npu_fused/npu_slow_advice.py b/profiler/advisor/analyzer/computation/npu_fused/npu_slow_advice.py new file mode 100644 index 0000000000..0dd0a3225f --- /dev/null +++ b/profiler/advisor/analyzer/computation/npu_fused/npu_slow_advice.py @@ -0,0 +1,124 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from abc import ABC +import multiprocessing + +import pandas as pd + +from profiler.advisor.analyzer.computation.npu_fused.compute_advice_base import ComputeAdviceBase +from profiler.advisor.analyzer.computation.npu_fused.op_perf import OpPerfFactory +from profiler.advisor.common import constant as const +from profiler.advisor.common.constant import PerfColor +from profiler.advisor.common.trace_view_json import TraceViewJson +from profiler.advisor.common.constant import Constant +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord + + +class NpuSlowAnalyzer(ComputeAdviceBase, ABC): + OP_PERF_SHEET = "op_perf" + npu_slow_advice = "NPU_SLOW_ADVICE" + + def __init__(self, collection_path: str, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, + torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + self.kernel_details_path = "" + self.data = pd.DataFrame() + + @staticmethod + def save_to_excel(data: pd.DataFrame, file_path: str) -> None: + writer = pd.ExcelWriter(file_path, engine="xlsxwriter", mode="w") + data.index.name = Constant.TITLE.INDEX + data.to_excel(writer, index=True, sheet_name=NpuSlowAnalyzer.OP_PERF_SHEET) + NpuSlowAnalyzer.color_sheet(data, writer.book, writer.sheets[NpuSlowAnalyzer.OP_PERF_SHEET]) + writer.sheets[NpuSlowAnalyzer.OP_PERF_SHEET].freeze_panes = "A2" + writer.close() + + @staticmethod + def color_sheet(data: pd.DataFrame, workbook, worksheet): + color_rgb = { + PerfColor.GREEN.name: workbook.add_format({'bg_color': '#C6EFCE'}), + PerfColor.YELLOW.name: workbook.add_format({'bg_color': '#FFEB9C'}), + PerfColor.RED.name: workbook.add_format({'bg_color': '#FFC7CE'}), + } + for row in data.iterrows(): + color = row[1][Constant.TITLE.COLOR] + fill_format = color_rgb.get(color) + if not fill_format: + continue + worksheet.set_row(row[0] + 1, None, fill_format) + + @staticmethod + def update_op_row(row: tuple): + return OpPerfFactory.build(row[1]).update() + + def get_call_stack(self, data: pd.DataFrame, index_id: int, ts_col: str) -> str: + if not self.has_callstack(): + print("There is no call stack info, please set 'with_stack=True'") + return "" + trace_json = TraceViewJson(self.trace_view_path) + return trace_json.get_call_stack(data, index_id, ts_col) + + def run(self): + if not self.path_check(): + return self.data + self.process() + return self.data + + def process(self): + self.data = pd.read_csv(self.kernel_details_path, dtype={"Start Time(us)": str}) + # 去除末尾的\t分隔符 + self.data["Start Time(us)"] = self.data["Start Time(us)"].apply(lambda x: x[:-1]) + pool = multiprocessing.Pool(multiprocessing.cpu_count()) + result = pool.map(self.update_op_row, self.data.iterrows()) + pool.close() + self.data = pd.DataFrame(result) + + def optimize(self): + if not self.path_check(): + return self.data + self.process() + + def make_record(self): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem( + NpuSlowAnalyzer.npu_slow_advice, + self.bottleneck_str, + self.cur_advices + ) + self.result.add(OptimizeRecord(optimization_item)) + + # self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) + # for data_type, data in self.cur_data.items(): + # if data: + # self.result.add_detail(const.DATA + data_type, self.cur_data_table[data_type]["headers"], self.cur_data_table[data_type]["data"][0]) + + def make_render(self): + result_for_html = { + "Description" : self.cur_bottleneck, + "suggestion" : self.cur_advice, + "details" : [{}] + } + + self.html_render.render_template(key="cluster", + title=NpuSlowAnalyzer.npu_slow_advice, + template_dir="templates", + template_name="cluster_analysis.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + result=result_for_html) diff --git a/profiler/advisor/analyzer/computation/npu_fused/op_perf.py b/profiler/advisor/analyzer/computation/npu_fused/op_perf.py new file mode 100644 index 0000000000..dbcaad8c1c --- /dev/null +++ b/profiler/advisor/analyzer/computation/npu_fused/op_perf.py @@ -0,0 +1,193 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import functools +from typing import Dict +from profiler.advisor.common.constant import Constant, CoreType, PerfColor + + +class OpPerfFactory: + @classmethod + def build(cls, op_row: Dict): + if op_row.get(Constant.TITLE.TASK_TYPE) == CoreType.AIV: + return VecOpPerf(op_row) + elif op_row.get(Constant.TITLE.TASK_TYPE) == CoreType.AIC: + return CubeOpPerf(op_row) + else: + return OpPerf(op_row) + + +class OpPerf: + def __init__(self, op_row: Dict): + if "OP Type" in op_row.keys(): + Constant.update_title() + self.row = op_row + self.model_name = op_row.get("Model Name") + self.model_id = op_row.get("Model ID") + self.task_id = op_row.get("Task ID") + self.stream_id = op_row.get("Stream ID") + self.infer_id = op_row.get("Infer ID") + self.op_name = op_row.get("Name") + self.op_type = op_row.get("Type") + self.task_type = op_row.get("Accelerator Core") + self.task_start_time = op_row.get("Start Time(us)") + self.task_duration = op_row.get("Duration(us)") + self.task_wait_time = op_row.get("Wait Time(us)") + self.block_dim = op_row.get("Block Dim") + self.mix_block_dim = op_row.get("Mix Block Dim") + + self.hf32_eligible = op_row.get("HF32 Eligible") + self.input_shapes = op_row.get("Input Shapes") + self.input_data_types = op_row.get("Input Data Types") + self.input_formats = op_row.get("Input Formats") + self.output_shapes = op_row.get("Output Shapes") + self.output_data_types = op_row.get("Output Data Types") + self.output_formats = op_row.get("Output Formats") + self.context_id = op_row.get("Context ID") + self.aicore_time = op_row.get("aicore_time(us)") + self.aic_total_cycles = op_row.get("aic_total_cycles") + + self.aic_mac_time = op_row.get("aic_mac_time(us)") + self.aic_mac_ratio = op_row.get("aic_mac_ratio") + self.aic_scalar_time = op_row.get("aic_scalar_time(us)") + self.aic_scalar_ratio = op_row.get("aic_scalar_ratio") + self.aic_mte1_time = op_row.get("aic_mte1_time(us)") + self.aic_mte1_ratio = op_row.get("aic_mte1_ratio") + self.aic_mte2_time = op_row.get("aic_mte2_time(us)") + self.aic_mte2_ratio = op_row.get("aic_mte2_ratio") + self.aic_fixpipe_time = op_row.get("aic_fixpipe_time(us)") + self.aic_fixpipe_ratio = op_row.get("aic_fixpipe_ratio") + self.aic_icache_miss_rate = op_row.get("aic_icache_miss_rate") + self.aiv_time = op_row.get("aiv_time(us)") + self.aiv_total_cycles = op_row.get("aiv_total_cycles") + self.aiv_vec_time = op_row.get("aiv_vec_time(us)") + self.aiv_vec_ratio = op_row.get("aiv_vec_ratio") + self.aiv_scalar_time = op_row.get("aiv_scalar_time(us)") + self.aiv_scalar_ratio = op_row.get("aiv_scalar_ratio") + self.aiv_mte2_time = op_row.get("aiv_mte2_time(us)") + + self.aiv_mte2_ratio = op_row.get("aiv_mte2_ratio") + self.aiv_mte3_time = op_row.get("aiv_mte3_time(us)") + self.aiv_mte3_ratio = op_row.get("aiv_mte3_ratio") + self.aiv_icache_miss_rate = op_row.get("aiv_icache_miss_rate") + self.cube_utilization = op_row.get("cube_utilization( %)") + + @staticmethod + def get_dtype_size(dtype_str: str): + return Constant.DTYPE_SIZE_MAP.get(dtype_str.lower(), 0) + + @staticmethod + def get_element_count(shape: list): + return functools.reduce(lambda x, y: int(x) * int(y), shape) + + @staticmethod + def shape_to_tuple(shape_str: str) -> tuple: + if not isinstance(shape_str, str): + return [] + shape_str = shape_str.strip('"') + split_shape = shape_str.strip(';') + if not split_shape: + return [] + pairs = split_shape.split(';') + shape_result = [] + for pair in pairs: + pair = pair.strip(";") + elements = pair.split(',') + elements = tuple(int(element) if "" != element else 0 for element in elements) + shape_result.append(elements) + return tuple(shape_result) + + @staticmethod + def dtype_to_tuple(dtypes_str: str) -> tuple: + if not isinstance(dtypes_str, str): + return [] + dtypes_str = dtypes_str.strip('"') + split_dtypes = dtypes_str.strip(';') + if not split_dtypes: + return [] + pairs = split_dtypes.split(';') + return tuple(pairs) + + def get_mac_ratio(self): + return self.aic_mac_ratio + + def get_size(self, shapes_str, dtypes_str): + shapes = self.shape_to_tuple(shapes_str) + dtypes = self.dtype_to_tuple(dtypes_str) + if len(shapes) > len(dtypes): + print(f"[ERROR] The size of shape is greater than that of dtypes.") + return 0 + if len(shapes) < len(dtypes): + shapes = list(shapes) + shapes.extend([(1,)] * (len(dtypes) - len(shapes))) + all_size = 0 + for index, shape in enumerate(shapes): + element_count = self.get_element_count(shape) + dtype_size = self.get_dtype_size(dtypes[index]) + all_size += element_count * dtype_size + return all_size + + def get_calc_size(self): + # input and output bytes (MB) + if not self.input_shapes or not self.output_shapes: + print("[ERROR] There is no tensor data, do not assess vector op performance.") + return 0 + intput_size = self.get_size(self.input_shapes, self.input_data_types) + output_size = self.get_size(self.output_shapes, self.output_data_types) + return (intput_size + output_size) / (Constant.BYTE_UNIT_TRANS * Constant.BYTE_UNIT_TRANS) + + def get_throughput(self): + # throughput(GB/s) + if not self.task_duration or abs(self.task_duration) < 1e-6: + print("[ERROR] There is no task_duration, do not assess vector op performance.") + return 0 + return self.row[Constant.TITLE.SIZE] / Constant.BYTE_UNIT_TRANS / self.task_duration * Constant.UNIT_TRANS * Constant.UNIT_TRANS + + def get_perf_color(self): + return PerfColor.WHITE + + def update(self): + self.row[Constant.TITLE.SIZE] = self.get_calc_size() + self.row[Constant.TITLE.THROUGHPUT] = self.get_throughput() + self.row[Constant.TITLE.COLOR] = self.get_perf_color().name + return self.row + + +class VecOpPerf(OpPerf): + def get_perf_color(self) -> PerfColor: + throughput = self.row[Constant.TITLE.THROUGHPUT] + op_duration = self.task_duration + tp_threshold = Constant.TP_THRESHOLD + if throughput == 0: + return PerfColor.WHITE + if throughput < tp_threshold / 2 and op_duration > 20: + return PerfColor.RED + elif tp_threshold / 2 <= throughput < tp_threshold: + return PerfColor.YELLOW + else: + return PerfColor.GREEN + + +class CubeOpPerf(OpPerf): + def get_perf_color(self) -> PerfColor: + aic_mac_ratio = self.get_mac_ratio() + if not aic_mac_ratio: + print("[WARNING] There is no aic_mac_ratio, do not assess cube op performance.") + return PerfColor.WHITE + elif aic_mac_ratio < 0.6: + return PerfColor.RED + elif 0.6 <= aic_mac_ratio < 0.8: + return PerfColor.YELLOW + else: + return PerfColor.GREEN diff --git a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py new file mode 100644 index 0000000000..f46bb2c1d5 --- /dev/null +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -0,0 +1,246 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import logging +from typing import Dict, List + +from profiler.advisor.display.html.render import HTMLRender +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.compare_tools.compare_backend.utils.constant import Constant +from profiler.advisor.common import constant as const +from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface + + +class OverallSummaryAnalyzer(BaseAnalyzer): + OVERALL_SUMMARY_ANALYZER = "overall_summary_analysis" + advice_map = { + "Computing Time": "if you want more detailed advice please go to compute_perf_analysis.ipynb.", + "Uncovered Communication Time": "if you want more detailed advice please go to cluster_perf_analysis.ipynb.", + "Free Time": "if you want more detailed advice please go to timeline_perf_analysis.ipynb." + } + time_name_map = { + "Computing Time": "computing", + "Uncovered Communication Time(Wait Time)": "communication", + "Free Time": "free", + 'Cube Time(Num)': 'Cube Time', + 'Vector Time(Num)': 'Vector Time', + 'Flash Attention Time(Forward)(Num)': 'Flash Attention Time(Forward)', + 'Flash Attention Time(Backward)(Num)': 'Flash Attention Time(Backward)', + 'Other Time': "Other Computing Time", + 'SDMA Time(Num)': 'SDMA Time' + } + performance_time_dict = { + "Computing Time": ['Cube Time(Num)', 'Vector Time(Num)', 'Flash Attention Time(Forward)(Num)', + 'Flash Attention Time(Backward)(Num)', 'Other Time'], + "Uncovered Communication Time(Wait Time)": [], + "Free Time": ['SDMA Time(Num)'] + } + + def __init__(self, collection_path: str, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, + torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): + super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + self.base_collection_path = kwargs.get("base_collection_path", "") + self._has_base_collection = False + self._is_minimal_profiling = False + self.cur_data = {} + self.cur_data_table = {} + self.cur_bottleneck = {} + self.cur_advices = "" + self._headers = [] + self._base_data = [] + self._comparison_data = [] + self.html_render = HTMLRender() + self.result = OptimizeResult() + self.bottleneck_str = "" + self.bottleneck_table = {} + + @staticmethod + def split_duration_and_num(time_value: str) -> tuple: + split_data = time_value.split("s") # time value example: 0.229s(1756) + duration, num = 0.0, None + if len(split_data) >= 2: + try: + num = int(split_data[1].strip("()")) + except ValueError: + pass + if len(split_data) >= 1: + try: + duration = float(split_data[0]) + except ValueError: + print(f"[WARNING] Invalid time value: {time_value}.") + return duration, num + + @staticmethod + def calculate_ratio(dividend, divisor): + if not divisor: + return float("inf") + return dividend / divisor + + def path_check(self): + if self.base_collection_path: + if os.path.exists(self.base_collection_path): + self._has_base_collection = True + else: + print(f"[WARNING] Invalid path which not exists: {self.base_collection_path}.") + return os.path.exists(self.collection_path) + + def process(self): + base_collection_path = self.base_collection_path if self._has_base_collection else self.collection_path + result_data = ComparisonInterface(base_collection_path, self.collection_path).compare(Constant.OVERALL_COMPARE) + for data in result_data.values(): + self._headers = data.get("headers", []) + rows = data.get("rows", []) + if len(rows) == 2: + self._base_data = rows[0] + self._comparison_data = rows[1] + if not self._headers or not self._comparison_data: + return + self._is_minimal_profiling = 'E2E Time(Not minimal profiling)' not in self._headers + if self._has_base_collection: + self.cur_data["comparison_result"] = result_data + time_category_dict = {} + for time_category, time_list in self.performance_time_dict.items(): + time_value = self.get_time_value(time_category, self._comparison_data) + if time_value == Constant.INVALID_VALUE: + continue + duration, _ = self.split_duration_and_num(time_value) + time_category = time_category.split("(")[0] + time_category_dict[time_category] = duration + self.get_sub_category_time(time_category, time_list, duration) + self.cur_data["overall_data"] = time_category_dict + + def get_time_value(self, header_name: str, data_list: list): + try: + data_index = self._headers.index(header_name) + except ValueError: + return Constant.INVALID_VALUE + try: + time_value = data_list[data_index] + except IndexError: + return Constant.INVALID_VALUE + return time_value + + def get_sub_category_time(self, category: str, time_list: list, total_duration: float): + sub_time_dict = {} + for time_name in time_list: + time_value = self.get_time_value(time_name, self._comparison_data) + if time_value == Constant.INVALID_VALUE: + continue + sub_time_dict.setdefault(f"{category} Subtype", []).append(self.time_name_map.get(time_name, "")) + duration, num = self.split_duration_and_num(time_value) + sub_time_dict.setdefault(f"Duration(s)", []).append(duration) + sub_time_dict.setdefault(f"Duration Ratio", []).append( + "{:.2%}".format(self.calculate_ratio(duration, total_duration))) + sub_time_dict.setdefault(f"Kernel Number", []).append(num) + self.cur_data[self.time_name_map.get(category)] = sub_time_dict + + def identify_bottleneck(self): + overall_data = self.cur_data.get("overall_data") + if not overall_data: + return + e2e_time = '%.3f' % sum([data for data in overall_data.values()]) + overall_bottleneck = f"The Model E2E Time is {e2e_time}s.\n" + comparison_bottleneck = "" + for time_type, time_value in overall_data.items(): + # add subtype time bottleneck + advice = self.advice_map.get(time_type, "") + self.cur_bottleneck[self.time_name_map.get(time_type)] = f"{time_type} is {time_value}s.\n{advice}" + # add overall bottleneck + overall_bottleneck += f" -- {time_type} is {time_value}s\n" + if time_type == "Free Time" and self._is_minimal_profiling and self.calculate_ratio(time_value, + e2e_time) > 0.1: + overall_bottleneck += "percentage of free time exceed the threshold 10%." + if not self._has_base_collection: + continue + # add comparison bottleneck + time_type_origin = "Uncovered Communication Time(Wait Time)" \ + if time_type == "Uncovered Communication Time" else time_type + base_duration, _ = self.split_duration_and_num(self.get_time_value(time_type_origin, self._base_data)) + if time_value > base_duration: + ratio = "{:.2%}".format(self.calculate_ratio(time_value - base_duration, base_duration)) + comparison_bottleneck += f"{time_type} exceeds the benchmark by {ratio}\n" + self.cur_bottleneck["overall_data"] = overall_bottleneck + self.cur_bottleneck["comparison_result"] = comparison_bottleneck + + def optimize(self): + if self.path_check(): + self.process() + self.identify_bottleneck() + self.format_bottleneck() + self.format_cur_data() + self.make_record() + self.make_render() + return self.result + + def format_bottleneck(self): + result = '' + headers = [] + data_list = [] + data = [] + for key, value in self.cur_bottleneck.items(): + result += f'{key}: {value} \n' + headers.append(key) + data.append(value) + data_list.append(data) + self.bottleneck_str = result + self.bottleneck_table["headers"] = headers + self.bottleneck_table["data"] = data_list + + def format_cur_data(self): + if not self.cur_data: + return + data_table = {} + for data_type, data in self.cur_data.items(): + if data: + headers = [key for key in data] + data_list = [data[key] for key in data] + data_table["headers"] = headers + data_table["data"] = [data_list] + self.cur_data_table[data_type] = data_table + + + def make_record(self): + """ + make record for what and how to optimize + """ + optimization_item = OptimizeItem( + OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + self.bottleneck_str, + self.cur_advices + ) + self.result.add(OptimizeRecord(optimization_item)) + + self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) + for data_type, data in self.cur_data.items(): + if data: + self.result.add_detail(const.DATA + data_type, self.cur_data_table[data_type]["headers"], self.cur_data_table[data_type]["data"][0]) + + def make_render(self): + result_for_html = { + "Description" : self.bottleneck_str, + "suggestion" : self.cur_advices, + "details" : [self.bottleneck_table] + } + + self.html_render.render_template(key="cluster", + title=OverallSummaryAnalyzer.OVERALL_SUMMARY_ANALYZER, + template_dir="templates", + template_name="cluster_analysis.html", + cann_version=self.cann_version, + torch_version=self.torch_version, + result=result_for_html) diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index 664753c724..1d23b1d9ac 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -1,3 +1,229 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from enum import Enum + + +class CsvTitle: + MODEL_NAME = "Model Name" + MODEL_ID = "Model ID" + TASK_ID = "Task ID" + STREAM_ID = "Stream ID" + INFER_ID = "Infer ID" + TASK_START_TIME = "Task Start Time(us)" + TASK_WAIT_TIME = "Task Wait Time(us)" + BLOCK_DIM = "Block Dim" + MIX_BLOCK_DIM = "Mix Block Dim" + HF32_ELIGIBLE = "HF32 Eligible" + INPUT_SHAPES = "Input Shapes" + INPUT_DATA_TYPES = "Input Data Types" + INPUT_FORMATS = "Input Formats" + OUTPUT_SHAPES = "Output Shapes" + OUTPUT_DATA_TYPES = "Output Data Types" + OUTPUT_FORMATS = "Output Formats" + CONTEXT_ID = "Context ID" + AICORE_TIME = "aicore_time(us)" + AIC_TOTAL_CYCLES = "aic_total_cycles" + AIC_MAC_TIME = "aic_mac_time(us)" + AIC_MAC_RATIO = "aic_mac_ratio" + AIC_SCALAR_TIME = "aic_scalar_time(us)" + AIC_SCALAR_RATIO = "aic_scalar_ratio" + AIC_MTE1_TIME = "aic_mte1_time(us)" + AIC_MTE1_RATIO = "aic_mte1_ratio" + AIC_MTE2_TIME = "aic_mte2_time(us)" + AIC_MTE2_RATIO = "aic_mte2_ratio" + AIC_FIXPIPE_TIME = "aic_fixpipe_time(us)" + AIC_FIXPIPE_RATIO = "aic_fixpipe_ratio" + AIC_ICACHE_MISS_RATE = "aic_icache_miss_rate" + AIV_TIME = "aiv_time(us)" + AIV_TOTAL_CYCLES = "aiv_total_cycles" + AIV_VEC_TIME = "aiv_vec_time(us)" + AIV_VEC_RATIO = "aiv_vec_ratio" + AIV_SCALAR_TIME = "aiv_scalar_time(us)" + AIV_SCALAR_RATIO = "aiv_scalar_ratio" + AIV_MTE2_TIME = "aiv_mte2_time(us)" + AIV_MTE2_RATIO = "aiv_mte2_ratio" + AIV_MTE3_TIME = "aiv_mte3_time(us)" + AIV_MTE3_RATIO = "aiv_mte3_ratio" + AIV_ICACHE_MISS_RATE = "aiv_icache_miss_rate" + CUBE_UTILIZATION = "cube_utilization( %)" + TASK_DURATION_SUM = "Task Duration Sum(us)" + TASK_DURATION_MEAN = "Task Duration Mean(us)" + TASK_DURATION_STD = "Task Duration Std(us)" + TASK_DURATION_RATIO = "Task Duration Ratio(100%)" + SIZE = "size(MB)" + THROUGHPUT = "throughput(GB/s)" + COLOR = "color" + GAP = "Gap(us)" + DURATION_SUM = "Duration Sum(us)" + COUNT = "Count" + MAX_DURATION = "Max Duration(us)" + MIN_DURATION = "Min Duration(us)" + AVG_DURATION = "Avg Duration(us)" + DURATION_RATIO = "Duration Ratio" + INDEX = "Index" + + +# 定义CSV_TITILE_V1类,继承自CSV_TITILE类, 适配旧版csv +class CsvTitleV1(CsvTitle): + OP_NAME = "Op Name" + OP_TYPE = "OP Type" + TASK_TYPE = "Task Type" + TASK_DURATION = "Task Duration(us)" + + +# 定义CSV_TITILE_V1类,继承自CSV_TITILE类, 适配新版csv +class CsvTitleV2(CsvTitle): + OP_NAME = "Name" + OP_TYPE = "Type" + TASK_TYPE = "Accelerator Core" + TASK_DURATION = "Duration(us)" + + +class Constant: + DTYPE_SIZE_MAP = {"int8": 1, "uint8": 1, + "int16": 2, "uint16": 2, + "int32": 4, "uint32": 4, + "int64": 8, "uint64": 8, + "float16": 2, + "bfloat16": 2, + "bf16": 2, + "dt_bf16": 2, + "float32": 4, + "float": 4, + "float64": 8, + "complex64": 8, + "complex128": 16, + "bool": 1} + TP_THRESHOLD = 1150 + MAX_INPUT_MODE_LEN = 30 + MAX_INPUT_ADVICE_LEN = 30 + SMALL_OP_DUR_RATIO = 0.2 + SMALL_OP_NUM_RATIO = 0.2 + BYTE_UNIT_TRANS = 1024 + UNIT_TRANS = 1000 + + # mode list + COMPUTE = "compute" + TIMELINE = "timeline" + CLUSTER = "cluster" + OVERALL = "overall" + PIPELINE = "pipeline" + + # advice list + SLOW_RANK = "slow rank" + SLOW_LINK = "slow link" + KERNEL = "kernel" + + # compute + NPU_FUSED = "npu_fused" + NPU_SLOW = "npu_slow" + + # timeline + OPTIM = "optimizer" + OP_SCHE = "op_schedule" + + # overall + SUMMARY = "summary" + + PT_PROF_SUFFIX = "ascend_pt" + ASCEND_PROFILER_OUTPUT = "ASCEND_PROFILER_OUTPUT" + COLLECTION_PATH = "collection_path" + CLUSTER_ANALYSIS_OUTPUT = "cluster_analysis_output" + KERNEL_DETAILS_CSV = "kernel_details.csv" + CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" + CLUSTER_COMM_JSON = "cluster_communication.json" + + # pipline + OP_NAME = "name" + OP_TID = "tid" + PID = "pid" + TS = "ts" + DUR = "dur" + CAT = "cat" + ARGS = "args" + PH = "ph" + ID = "id" + PH_START = "s" + PH_BEGIN = "B" + PH_END = "E" + PH_META = "M" + PH_X = "X" + CNAME = "cname" + PROCESS_NAME = "process_name" + FRAMEWORK_NAME = "Python" + ASCEND_HARDWARE_NAME = "Ascend Hardware" + ASYNC_NPU = "async_npu" + STEP_PREFIX = "ProfilerStep#" + FP_ATEN_OP = "aten" + FP_C10D_OP = "c10d" + HCOM_OP_PREFIX = "hcom_" + BP_AUTOGRAD_OP = "autograd" + TRACE_VIEW_JSON = "trace_view.json" + + # pattern_dict key: pattern, value: pattern name + PATTERN_DICT = {("Add", "DropOutDoMask", "Add"): "bias_dropout_add", + ("BatchMatMul", "Mul", "Cast", "Mul", "MaskedFill", "SoftmaxV2", "Cast", "DropOutDoMask", + "AsStrided", "BatchMatMul", "Transpose"): "FA", + ("Transpose", "Transpose", "Transpose", "Mul", "Transpose", "BatchMatMulV2", "MaskedFill", + "Cast", "SoftmaxV2", "Cast", "DropOutDoMask", "BatchMatMulV2", "Transpose"): "FA", + ("Transpose", "BatchMatMulV2", "Transpose", "Transpose", "BatchMatMulV2", "ZerosLike", + "DropOutDoMask", "Cast", "SoftmaxGrad", "Cast", "MaskedFill", "BatchMatMulV2", + "BatchMatMulV2", "Mul"): "FA", + ("Cast", "Square", "ReduceMeanD", "Add", "Rsqrt", "Cast", "Cast", "Mul", "Cast", "Cast", + "Mul", "Cast"): "RMSNORM", + ("Cast", "LayerNorm", "Cast"): "LayerNorm", + ("Add", "LayerNorm"): "AddLayerNorm", + ("Add", "LayerNormV3"): "AddLayerNorm", + ("Gelu", "Add"): "GeluAdd", + ("Cast", "Square", "MemSet", "ReduceMean", "Add", "Rsqrt", "Mul", "Cast", "Mul"): "RMSNorm", + ("BatchMatMul", "RealDiv", "Add", "Maximum", "SoftmaxV2", "Cast", "BatchMatMul"): "FA", + ("BatchMatMulV2", "RealDiv", "Add", "Cast", "Maximum", "Cast", "SoftmaxV2", "AsStrided", + "BatchMatMulV2"): "FA", + ("BatchMatMulV2", "RealDiv", "Add", "Cast", "SoftmaxV2", "Cast", "BroadcastTo", + "BatchMatMulV2"): "FA", + ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Cast", "Mul", "Add"): "RotaryMul", + ("Mul", "AsStrided", "Neg", "AsStrided", "ConcatD", "Mul", "Add"): "RotaryMul", + ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Mul", "Add"): "RotaryMul", + ("MatMulV2", "Swish", "MatMulV2", "Mul", "MatMulV2"): "FFN", + ("Transpose", "Transpose", "GatherElement", "Transpose"): "GatherElement", + ("Slice", "Slice", "Swish", "Mul"): "torch_npu.npu_swiglu", + ("Cast", "Mul", "MaskedFill", "SoftmaxV2", "Cast"): "torch_npu.npu_scaled_masked_softmax", + ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Mul"): "torch_npu.npu_rotary_mul", + ("Cast", "Square", "ReduceMeanD", "Add", "Rsqrt", "Mul", "Cast", "Mul"): "torch_npu.npu_rms_norm"} + TITLE = CsvTitleV2 + + @classmethod + def update_title(cls): + cls.TITLE = CsvTitleV1 + + +class CoreType: + AIV = "AI_VECTOR_CORE" + AIC = "AI_CORE" + AICPU = "AI_CPU" + MIX_AIV = "MIX_AIV" + MIX_AIC = "MIX_AIC" + HCCL = "HCCL" + + +class PerfColor(Enum): + WHITE = 0 + GREEN = 1 + YELLOW = 2 + RED = 3 + # timeline DEQUEUE = "Dequeue" DEQUEUE_SEP = "@" @@ -120,3 +346,6 @@ CLUSTER_ANALYSIS_OUTPUT = "cluster_analysis_output" KERNEL_DETAILS_CSV = "kernel_details.csv" CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" CLUSTER_COMM_JSON = "cluster_communication.json" + +BOTTLENECK = "bottleneck" +DATA = "data" \ No newline at end of file diff --git a/profiler/advisor/common/trace_view_json.py b/profiler/advisor/common/trace_view_json.py new file mode 100644 index 0000000000..8171f06ee2 --- /dev/null +++ b/profiler/advisor/common/trace_view_json.py @@ -0,0 +1,209 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from abc import abstractmethod +from dataclasses import dataclass +from dataclasses import field +from typing import Dict +from typing import List + +import pandas as pd + +from common_func.file_manager import FileManager + + +@dataclass +class TraceObj: + ph: str = "" + bp: str = "" + cat: str = "" + name: str = "" + pid: int = 0 + tid: int = 0 + id: int = 0 + ts: str = "" + dur: float = 0.0 + args: dict = field(default='unknown') + + @abstractmethod + def hash(self): + raise Exception("To be implemented") + + def valid(self): + return self.name != "" + + def check_hashable(self): + if not self.valid(): + raise Exception("Illegal {} to hash".format(self.__class__.name)) + + +@dataclass +class Process(TraceObj): + def hash(self): + self.check_hashable() + # msprof 保证name唯一性 + return self.args.get("name") + + +@dataclass +class Thread(TraceObj): + def hash(self): + self.check_hashable() + # msprof 保证name唯一性 + return self.args.get("name") + + +@dataclass +class DurationEvent(TraceObj): + def hash(self): + self.check_hashable() + return self.ts + + +@dataclass +class FlowEvent(TraceObj): + s_point_ts: str = "" + e_point_ts: str = "" + + def hash(self): + self.check_hashable() + return self.e_point_ts + + +class TraceViewJson: + + def __init__(self, path): + self.processes: Dict[str, Process] = dict() + self.threads: Dict[str, Thread] = dict() + self.python_dur_events: Dict[str, DurationEvent] = dict() + self.cann_dur_events: Dict[str, DurationEvent] = dict() + self.ascend_hardware_dur_events: Dict[str, DurationEvent] = dict() + self.torch_2_npu_flow_events: Dict[str, FlowEvent] = dict() + traces = FileManager.read_json_file(path) + self._load_obj(traces) + + def get_call_stack(self, data: pd.DataFrame, index_id: int, ts_col: str) -> str: + if ts_col not in data.columns.tolist(): + print("[ERROR] No {} col found in data columns.".format(ts_col)) + return "" + row = data.loc[index_id] + timestamp = row[ts_col] + flow_event = self.get_torch_2_npu_flow_event(timestamp) + if not flow_event.valid(): + print("[ERROR] Get flow event failed for pattern {}.".format(row['pattern'])) + return "" + flow_event_s_key = flow_event.s_point_ts + python_dur_events = self.get_python_dur_events_contain_ts(flow_event_s_key) + if not python_dur_events: + print("[ERROR] No python dur event found for pattern {}.".format(row['pattern'])) + return "" + # 保持新老版本callstack兼容性 + if python_dur_events[0].args.get("Call stack"): + # 旧版本 + call_stack_list = python_dur_events[0].args.get("Call stack").split(";") + else: + python_dur_events.sort(key=lambda e: e.ts) + # 新版本 + call_stack_list = [event.name for event in python_dur_events if event.cat == "python_function"] + call_stack = "\n".join(call_stack_list) + return call_stack + + def get_torch_2_npu_flow_event(self, end_time) -> FlowEvent: + if not self.torch_2_npu_flow_events or not self.torch_2_npu_flow_events.get(end_time): + print("[ERROR] Find flow event failed for ts: {}".format(end_time)) + return FlowEvent() + return self.torch_2_npu_flow_events.get(end_time) + + def get_python_dur_events_contain_ts(self, ts) -> List[DurationEvent]: + res = [] + for event in self.python_dur_events.values(): + if float(event.ts) <= float(ts) <= float(event.ts) + event.dur: + res.append(event) + return res + + def _load_obj(self, traces): + self._load_format(traces) + if not self._check_format(): + print("[ERROR] parse json failed for error format") + return + self._load_duration_events(traces) + self._load_torch_to_npu_flow_events(traces) + + def _check_format(self): + # 当前功能只需要这两个process,可扩展 + check_processes = ['Python', 'Ascend Hardware'] + for check_process in check_processes: + if check_process in self.processes: + continue + print("[ERROR] {} process not found in json.".format(check_process)) + return False + return True + + # 加载pid, tid头 + def _load_format(self, traces: List[Dict]): + for i, trace in enumerate(traces): + if trace.get('name') == 'process_name': + if not trace.get('args') or not trace.get('args').get('name') or not trace.get('pid'): + continue + process = Process(**trace) + self.processes[process.hash()] = process + if trace.get('name') == 'thread_name': + if not trace.get('args') or not trace.get('args').get('name') or not trace.get('tid'): + continue + thread = Thread(**trace) + self.threads[thread.hash()] = thread + + def _load_duration_events(self, traces: List[Dict]): + def check_events(_trace): + return _trace.get('name') and _trace.get("ts") and _trace.get("dur") + + python_pid = self.processes.get("Python").pid + cann_pid = self.processes.get("CANN").pid + ascend_hardware_pid = self.processes.get("Ascend Hardware").pid + for i, trace in enumerate(traces): + if trace.get('ph') != 'X': + continue + if not check_events(trace): + continue + event = DurationEvent(**trace) + if trace.get('pid') == python_pid: + self.python_dur_events[event.hash()] = event + elif trace.get('pid') == cann_pid: + self.cann_dur_events[event.hash()] = event + elif trace.get("pid") == ascend_hardware_pid: + self.ascend_hardware_dur_events[event.hash()] = event + + def _load_torch_to_npu_flow_events(self, traces: List[Dict]): + def check_events(_trace): + return _trace.get('name') and _trace.get("id") and _trace.get("ts") + + flow_events_table_by_id = dict() + + python_pid = self.processes.get("Python") + for i, trace in enumerate(traces): + if trace.get('ph') != 's' and trace.get('ph') != 'f' and trace.get('pid') != python_pid: + continue + if not check_events(trace): + continue + event = flow_events_table_by_id.get(trace.get("id")) + if not event: + event = FlowEvent(**trace) + if trace.get('ph') == 's': + event.s_point_ts = trace.get('ts') + else: + event.e_point_ts = trace.get('ts') + flow_events_table_by_id[event.id] = event + + self.torch_2_npu_flow_events = {eve.hash(): eve for eve in flow_events_table_by_id.values()} diff --git a/profiler/advisor/common/trace_view_preprocessor.py b/profiler/advisor/common/trace_view_preprocessor.py new file mode 100644 index 0000000000..14a13066f6 --- /dev/null +++ b/profiler/advisor/common/trace_view_preprocessor.py @@ -0,0 +1,208 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import sys +from typing import Optional +from dataclasses import dataclass + +from profiler.advisor.common.constant import Constant + + +@dataclass +class FineTraceViewData: + py_pid: int = -1 + fp_tid: int = -1 + bp_tid: int = -1 + ascend_pid: int = -1 + min_ts: str = str(sys.maxsize) + max_ts: str = "0" + hcom_tids: list = None + fp_ops: list = None + bp_ops: list = None + hcom_ops: list = None + npu_ops_ts_dur: dict = None + torch_to_npu_links: list = None + + def __post_init__(self): + self.hcom_tids = self.hcom_tids or [] + self.fp_ops = self.fp_ops or [] + self.bp_ops = self.bp_ops or [] + self.hcom_ops = self.hcom_ops or [] + self.npu_ops_ts_dur = self.npu_ops_ts_dur or {} + self.torch_to_npu_links = self.torch_to_npu_links or [] + + def sort(self): + self.fp_ops.sort(key=lambda x: x[Constant.TS]) + self.bp_ops.sort(key=lambda x: x[Constant.TS]) + self.hcom_ops.sort(key=lambda x: x[Constant.TS]) + self.torch_to_npu_links.sort(key=lambda x: x[Constant.TS]) + + +class TraceViewPreProcessor: + """ + Trace view data preprocess + """ + + @staticmethod + def _is_fp_op(op_name: str) -> bool: + """ + check whether op is fp op + """ + return op_name.startswith(Constant.FP_ATEN_OP) or op_name.startswith(Constant.FP_C10D_OP) + + @staticmethod + def _is_fp_data(data: dict, fp_tid: int, py_pid: int) -> bool: + """ + check whether data is valid fp data + """ + return data[Constant.OP_TID] == fp_tid and \ + Constant.TS in data and Constant.DUR in data and \ + not data[Constant.OP_NAME].startswith(Constant.STEP_PREFIX) and \ + data[Constant.PID] == py_pid + + @staticmethod + def _is_bp_op(op_name: str) -> bool: + """ + check whether op is bp op + """ + return op_name.startswith(Constant.BP_AUTOGRAD_OP) + + @staticmethod + def _is_bp_data(data: dict, bp_tid: int, py_pid: int) -> bool: + """ + check whether data is valid bp data + """ + return data[Constant.OP_TID] == bp_tid and \ + Constant.TS in data and Constant.DUR in data and \ + data[Constant.PID] == py_pid + + @staticmethod + def _is_torch_to_npu_link(data: dict, fp_tid: int) -> bool: + """ + check whether data is torch to npu link + """ + return Constant.CAT in data and data[Constant.CAT] == Constant.ASYNC_NPU and \ + data[Constant.PH] == Constant.PH_START and \ + data[Constant.PID] == fp_tid + + @staticmethod + def _is_send_recv_op(op_name: str) -> bool: + """ + check whether op is hcom send or recv op + """ + # eg: hcom_BatchSendRecv__101_0_1 + p1 = re.compile(r'hcom_\w+SendRecv__\d+') + # eg: hcom_send__101_0_1 + p2 = re.compile(r'hcom_send__\d+') + # eg: hcom_receive__101_0_1 + p3 = re.compile(r'hcom_receive__\d+') + return bool(p1.match(op_name)) or bool(p2.match(op_name)) or bool(p3.match(op_name)) + + @staticmethod + def _is_hcom_op(op_name: str) -> bool: + """ + check whether data is hcom data + """ + return op_name.startswith(Constant.HCOM_OP_PREFIX) + + @staticmethod + def _is_python_process(data: dict) -> bool: + """ + check whether data is python process + """ + return Constant.PH in data and data[Constant.PH] == Constant.PH_META and \ + data[Constant.OP_NAME] == Constant.PROCESS_NAME and \ + data[Constant.ARGS][Constant.OP_NAME] == Constant.FRAMEWORK_NAME + + @staticmethod + def _is_step_op(data: dict) -> bool: + """ + check whether data is step data + """ + return data[Constant.OP_NAME].startswith(Constant.STEP_PREFIX) + + @staticmethod + def _is_ascend_process(data: dict) -> bool: + """ + check whether data is ascend process data + """ + return Constant.PH in data and data[Constant.PH] == Constant.PH_META and \ + data[Constant.OP_NAME] == Constant.PROCESS_NAME and \ + data[Constant.ARGS][Constant.OP_NAME] == Constant.ASCEND_HARDWARE_NAME + + @staticmethod + def _is_npu_op(data: dict, ascend_pid: int) -> bool: + """ + check whether data is npu op + """ + return Constant.PH in data and data[Constant.PH] == Constant.PH_X and \ + not data[Constant.OP_NAME].isupper() and \ + data[Constant.PID] == ascend_pid + + def process(self, raw_data: list) -> Optional[FineTraceViewData]: + """ + preprocess raw data + """ + if not raw_data: + print("[ERROR] No raw data found in trace view data.") + return None + + raw_fp_tids, raw_bp_tids, raw_hcom_tids = set(), set(), set() + fine_data = FineTraceViewData() + + # counting fp ops and bp ops tid and ascend pid + for data in raw_data: + if self._is_fp_op(data[Constant.OP_NAME]): + raw_fp_tids.add(data[Constant.OP_TID]) + elif self._is_bp_op(data[Constant.OP_NAME]): + raw_bp_tids.add(data[Constant.OP_TID]) + elif self._is_send_recv_op(data[Constant.OP_NAME]): + fine_data.hcom_ops.append(data) + raw_hcom_tids.add(data[Constant.OP_TID]) + elif self._is_python_process(data): + fine_data.py_pid = data[Constant.PID] + elif self._is_ascend_process(data): + fine_data.ascend_pid = data[Constant.PID] + + # find max and min ts in hcom ops + if self._is_hcom_op(data[Constant.OP_NAME]): + # for compatibility with old data (ts is float type) + ts = data[Constant.TS] if not isinstance(data[Constant.TS], float) else str(data[Constant.TS]) + fine_data.min_ts = min(fine_data.min_ts, ts) + fine_data.max_ts = max(fine_data.max_ts, ts) + + unique_fp_tid = list(raw_fp_tids - raw_bp_tids) + unique_bp_tid = list(raw_bp_tids) + fine_data.hcom_tids = list(raw_hcom_tids) + + if not unique_fp_tid or not unique_bp_tid: + print("[INFO] No fp or bp tid found in trace view data.") + else: + fine_data.fp_tid, fine_data.bp_tid = unique_fp_tid[0], unique_bp_tid[0] + + # filter fp ops and bp ops and torch_to_npu_links + for data in raw_data: + if self._is_fp_data(data, fine_data.fp_tid, fine_data.py_pid): + fine_data.fp_ops.append(data) + elif self._is_bp_data(data, fine_data.bp_tid, fine_data.py_pid): + fine_data.bp_ops.append(data) + elif self._is_torch_to_npu_link(data, fine_data.fp_tid): + fine_data.torch_to_npu_links.append(data) + elif self._is_npu_op(data, fine_data.ascend_pid): + fine_data.npu_ops_ts_dur[data[Constant.TS]] = data[Constant.DUR] + + fine_data.sort() + return fine_data diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 156922f4d1..af801b9ded 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -4,14 +4,15 @@ from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import Ti from profiler.advisor.utils.utils import Timer from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyzer from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer - +from profiler.advisor.analyzer.overall.overall_summary_analyzer import OverallSummaryAnalyzer +from profiler.advisor.analyzer.computation.npu_fused.npu_slow_advice import NpuSlowAnalyzer class Interface: supported_analyzer = { "schedule": [TimelineFusionOpsAnalyzer], "computation": [], "communication": [], - "overall": [], + "overall": [OverallSummaryAnalyzer], "dataloader": [], "cluster": [SlowRankAnalyzer, SlowLinkAnalyzer] } diff --git a/profiler/cluster_analyse/cluster_analysis.py b/profiler/cluster_analyse/cluster_analysis.py index fd127fdc03..9ec33928aa 100644 --- a/profiler/cluster_analyse/cluster_analysis.py +++ b/profiler/cluster_analyse/cluster_analysis.py @@ -16,13 +16,13 @@ import argparse import os -from cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor -from cluster_data_preprocess.mindspore_data_preprocessor import MindsporeDataPreprocessor -from communication_group.communication_group_generator import CommunicationGroupGenerator -from common_func.constant import Constant -from common_func.file_manager import FileManager -from common_func.path_manager import PathManager -from analysis.analysis_facade import AnalysisFacade +from profiler.cluster_analyse.cluster_data_preprocess.pytorch_data_preprocessor import PytorchDataPreprocessor +from profiler.cluster_analyse.cluster_data_preprocess.mindspore_data_preprocessor import MindsporeDataPreprocessor +from profiler.cluster_analyse.communication_group.communication_group_generator import CommunicationGroupGenerator +from profiler.cluster_analyse.common_func.constant import Constant +from profiler.cluster_analyse.common_func.file_manager import FileManager +from profiler.cluster_analyse.common_func.path_manager import PathManager +from profiler.cluster_analyse.analysis.analysis_facade import AnalysisFacade class ClusterAnalysis: diff --git a/profiler/cluster_analyse/common_func/file_manager.py b/profiler/cluster_analyse/common_func/file_manager.py index 28ecbeaaf1..00c9fb1bbe 100644 --- a/profiler/cluster_analyse/common_func/file_manager.py +++ b/profiler/cluster_analyse/common_func/file_manager.py @@ -17,8 +17,8 @@ import os import csv import json -from common_func.constant import Constant -from common_func.path_manager import PathManager +from profiler.cluster_analyse.common_func.constant import Constant +from profiler.cluster_analyse.common_func.path_manager import PathManager class FileManager: diff --git a/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py b/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py index 8830d49599..894367d070 100644 --- a/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py +++ b/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py @@ -6,7 +6,7 @@ import csv import unittest from advisor_backend.interface import Interface -from advisor_backend.compute_advice.npu_slow_advice import NpuSlowAdvice +from advisor_backend.compute_advice.npu_slow_advice import class TestNpuSlowAdvice(unittest.TestCase): @@ -186,7 +186,7 @@ class TestNpuSlowAdvice(unittest.TestCase): self.create_kernel_details() interface = Interface(self.ASCEND_PT_DIR) data = interface.get_data('compute', 'npu_slow') - call_stack = NpuSlowAdvice(self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") + call_stack = (self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") self.assertEqual(9, len(data)) self.assertEqual("", call_stack) @@ -197,8 +197,8 @@ class TestNpuSlowAdvice(unittest.TestCase): interface = Interface(self.ASCEND_PT_DIR) data = interface.get_data('compute', 'npu_slow') slow_op_data = data[data["color"] == "RED"] - NpuSlowAdvice.save_to_excel(data, file_path=os.path.join(self.ASCEND_PT_DIR, "slow_op.xlsx")) - call_stack = NpuSlowAdvice(self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") + .save_to_excel(data, file_path=os.path.join(self.ASCEND_PT_DIR, "slow_op.xlsx")) + call_stack = (self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") self.assertEqual(9, len(data)) self.assertEqual(2, len(slow_op_data)) print(call_stack) @@ -213,8 +213,8 @@ class TestNpuSlowAdvice(unittest.TestCase): interface = Interface(self.ASCEND_PT_DIR) data = interface.get_data('compute', 'npu_slow') slow_op_data = data[data["color"] == "RED"] - NpuSlowAdvice.save_to_excel(data, file_path=os.path.join(self.ASCEND_PT_DIR, "slow_op.xlsx")) - call_stack = NpuSlowAdvice(self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") + .save_to_excel(data, file_path=os.path.join(self.ASCEND_PT_DIR, "slow_op.xlsx")) + call_stack = (self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") self.assertEqual(9, len(data)) self.assertEqual(2, len(slow_op_data)) print(call_stack) -- Gitee From 0eda53956df2f3d2aef3a19b30df56289911454b Mon Sep 17 00:00:00 2001 From: shpity Date: Thu, 16 May 2024 18:28:20 +0800 Subject: [PATCH 13/21] add graph op fusion strategy --- profiler/advisor/analyzer/base_analyzer.py | 2 +- .../computation/aicpu/aicpu_checker.py | 4 +- .../computation/bound/block_dim_checker.py | 2 +- .../bound/operator_bound_checker.py | 2 +- .../op_compile/dynamic_shape_checker.py | 2 +- .../advisor/analyzer/graph_fusion/__init__.py | 0 .../graph_fusion/graph_fusion_analyzer.py | 47 ++ .../graph_fusion/graph_fusion_checker.py | 207 ++++++++ profiler/advisor/common/analyzer_scopes.py | 1 + profiler/advisor/common/graph/__init__.py | 0 profiler/advisor/common/graph/graph.py | 135 +++++ profiler/advisor/common/graph/graph_match.py | 355 +++++++++++++ profiler/advisor/common/graph/graph_parser.py | 413 +++++++++++++++ profiler/advisor/dataset/__init__.py | 6 - profiler/advisor/dataset/graph_dataset.py | 53 ++ .../display/html/templates/fusion.html | 47 ++ profiler/advisor/interface/interface.py | 8 +- profiler/advisor/rules/op_fusion_pass.yaml | 491 ++++++++++++++++++ profiler/cli/__init__.py | 2 +- profiler/cli/entrance.py | 2 +- .../cluster_analyse/common_func/db_manager.py | 3 + profiler/test/tools/tool.py | 2 +- 22 files changed, 1766 insertions(+), 18 deletions(-) create mode 100644 profiler/advisor/analyzer/graph_fusion/__init__.py create mode 100644 profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py create mode 100644 profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py create mode 100644 profiler/advisor/common/graph/__init__.py create mode 100644 profiler/advisor/common/graph/graph.py create mode 100644 profiler/advisor/common/graph/graph_match.py create mode 100644 profiler/advisor/common/graph/graph_parser.py create mode 100644 profiler/advisor/dataset/graph_dataset.py create mode 100644 profiler/advisor/display/html/templates/fusion.html create mode 100644 profiler/advisor/rules/op_fusion_pass.yaml diff --git a/profiler/advisor/analyzer/base_analyzer.py b/profiler/advisor/analyzer/base_analyzer.py index 160f05c464..e2f3abc537 100644 --- a/profiler/advisor/analyzer/base_analyzer.py +++ b/profiler/advisor/analyzer/base_analyzer.py @@ -80,7 +80,7 @@ class BaseAnalyzer(VersionControl, metaclass=ABCMeta): key = dataset_cls.get_key() if key not in self.dataset_list: self.dataset_list[key] = [] - self.dataset_list[key].append(dataset) + self.dataset_list[key].append(dataset) @staticmethod def get_first_data_by_key(data, key) -> Union[Dataset, None]: diff --git a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py index 4654d97225..052711f29b 100644 --- a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py +++ b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py @@ -42,7 +42,7 @@ class AicpuChecker(OperatorChecker): def load_aicpu_rules(self, rule_path="rules/aicpu_rules.yaml") -> Dict: if not os.path.isabs(rule_path): rule_path = os.path.join(os.path.dirname(__file__), - "../../computation/", "../", rule_path) + "../../../", rule_path) if not os.path.exists(rule_path): logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path) @@ -146,7 +146,7 @@ class AicpuChecker(OperatorChecker): return True def make_render(self, html_render, record): - html_render.render_template(key="operator", + html_render.render_template(key="computation", template_dir="templates", template_name="operator_ai_cpu.html", format_result=self.format_operator_result(record, constant.OPERATOR_LIST_UNLIMIT)) diff --git a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py index d1a1384b8d..d90ef56c78 100644 --- a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py +++ b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py @@ -43,7 +43,7 @@ class BlockDimChecker(OperatorChecker): return True def make_render(self, html_render, record): - html_render.render_template(key="operator", + html_render.render_template(key="computation", template_dir="templates", template_name="operator_block_dim.html", format_result=self.format_operator_result(record, constant.OPERATOR_OUT_TOPK)) diff --git a/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py b/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py index d919eb7d5f..4ede3c94e6 100644 --- a/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py +++ b/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py @@ -44,7 +44,7 @@ class OperatorBoundChecker(OperatorChecker): return True def make_render(self, html_render, record): - html_render.render_template(key="operator", + html_render.render_template(key="computation", template_dir="templates", template_name="operator_no_bound.html", format_result=self.format_operator_result(record, constant.OPERATOR_OUT_TOPK)) diff --git a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py index 4d405eb918..746cc71606 100644 --- a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py +++ b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py @@ -76,7 +76,7 @@ class DynamicShapeChecker(OperatorChecker): return format_result def make_render(self, html_render, record): - html_render.render_template(key="operator", + html_render.render_template(key="computation", template_dir="templates", template_name="operator_dynamic_shape.html", format_result=self.format_operator_result(record)) diff --git a/profiler/advisor/analyzer/graph_fusion/__init__.py b/profiler/advisor/analyzer/graph_fusion/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py b/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py new file mode 100644 index 0000000000..059950089d --- /dev/null +++ b/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py @@ -0,0 +1,47 @@ +from typing import List +from functools import partial + +from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.dataset.graph_dataset import GraphDataset +from profiler.advisor.analyzer.graph_fusion.graph_fusion_checker import GraphFusionRules +from profiler.advisor.dataset.profiling.profiling_dataset import ProfilingDataset +from profiler.advisor.display.html.render import HTMLRender + + +class FusionOPAnalyzer(BaseAnalyzer): + """ + fusion optimizer + """ + RULES = dict(graph_dataset=partial(GraphFusionRules, "rules/op_fusion_pass.yaml")) + dataset_cls_list = [GraphDataset, ProfilingDataset] + + def __init__(self, collection_path, **kwargs) -> None: + super(FusionOPAnalyzer, self).__init__(collection_path, **kwargs) + self.result = OptimizeResult() + self.html_render = HTMLRender() + + @BaseAnalyzer.check_data((GraphDataset.get_key(),)) + def optimize(self): + """ + :return: result + """ + self._check(self.dataset_list.get("GraphDataset"), self.dataset_list.get("ProfilingDataset")) + return self.result + + def _check(self, graph_data: List[GraphDataset], + profiling_data: List[ProfilingDataset] = None) -> None: + for _, rule in self.RULES.items(): + checker = rule() + if profiling_data is None: + checker.find_fusion_matched_issues(graph_data) + else: + checker.find_fusion_matched_issues_with_times(graph_data, profiling_data) + checker.make_record(self.result) + checker.make_render(self.html_render) + + def make_record(self): + pass + + def make_render(self): + pass diff --git a/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py new file mode 100644 index 0000000000..e64020fdfe --- /dev/null +++ b/profiler/advisor/analyzer/graph_fusion/graph_fusion_checker.py @@ -0,0 +1,207 @@ +import logging +from typing import List + +from tqdm import tqdm + +from profiler.advisor.result.result import OptimizeResult +from profiler.advisor.result.item import OptimizeItem, OptimizeRecord, StatisticsItem +from profiler.advisor.common.graph.graph import Graph +from profiler.advisor.common.graph.graph_parser import QueryGraphParser +from profiler.advisor.dataset.graph_dataset import GraphDataset +from profiler.advisor.common.graph.graph_match import find_isomorphisms + +logger = logging.getLogger() + + +class GraphFusionRules: + def __init__(self, fusion_rules: str): + self.fusion_rules = fusion_rules + self.candidates = [] + self.task_duration_list = [] + + @staticmethod + def build_query_graph(query_graphs) -> List[Graph]: + for _, query_graph in query_graphs.fusion_rules.items(): + for sub_graph in query_graph: + graph = Graph(*sub_graph) + graph.build() + yield graph + + def find_fusion_matched_issues(self, graphs: List[GraphDataset]): + query_graphs = QueryGraphParser(self.fusion_rules) + with tqdm(total=query_graphs.num_rules, leave=False, ncols=100, unit=" rules") as pbar: + pbar.set_description(f"Searching Isomorphic Subgraph") + for query_graph in self.build_query_graph(query_graphs): + query_candidates = find_isomorphisms(query_graph.graph, graphs[0].graphs[-1].graph) + pbar.update(1) + if len(query_candidates) > 0: + self.candidates.append(query_candidates) + + def find_fusion_matched_issues_with_times(self, graphs: List[GraphDataset], profiling): + self.find_fusion_matched_issues(graphs) + if len(self.candidates) == 0 or len(profiling) == 0: + return + + if not hasattr(profiling[0], 'op_summary') or profiling[0].op_summary is None: + if hasattr(profiling[0], 'msprof'): + self.match_time_from_msprof(profiling[0].msprof) + return + else: + logger.warning("Skip analyze operator because of not containing op summary.") + return + + self.match_time_from_summary(profiling[0].op_summary) + time_duration_sum = [] + for task_duration in self.task_duration_list: + time_duration_sum.append(sum([sum(duration) for duration in task_duration])) + time_duration_index = sorted(range(len(time_duration_sum)), + key=time_duration_sum.__getitem__, + reverse=True) + self.task_duration_list = [self.task_duration_list[i] for i in time_duration_index] + self.candidates = [self.candidates[i] for i in time_duration_index] + + def match_time_from_summary(self, op_summary): + op_dict = op_summary.task_dict + for candidates in self.candidates: + candidate_duration = [] + for candidate in candidates: + duration_list = [] + for node in candidate.values(): + if node.op_name not in op_dict or op_dict[node.op_name][0].op_type.lower() != node.op_type.lower(): + logger.warning("Operator %s is missing in op summary, which will be set to 0.", node.op_name) + duration_list.append(0.0) + continue + duration_list.append(float(op_dict[node.op_name][0].task_duration)) + candidate_duration.append(duration_list) + self.task_duration_list.append(candidate_duration) + + def match_time_from_msprof(self, msprof): + op_dict = dict() + for task in msprof.tasks: + if "item_id" not in task.args: + continue + op_dict[task.args["item_id"]] = {"task_duration": task.dur} + for candidates in self.candidates: + candidate_duration = [] + for candidate in candidates: + duration_list = [] + for node in candidate.values(): + if node.op_name not in op_dict: + logger.warning("Operator %s is missing in msprof, which will be set to 0.", node.op_name) + duration_list.append(0.0) + continue + duration_list.append(float(op_dict[node.op_name].get("task_duration"))) + candidate_duration.append(duration_list) + self.task_duration_list.append(candidate_duration) + + def make_render(self, html_render): + if not self.candidates: + return + + candidates_list = [] + for case_id, nodes in enumerate(self.candidates): + candidate_dict = dict() + candidate_dict['counts'] = len(nodes) + candidate_dict['matches'] = [] + has_time_info = False + if self.task_duration_list: + has_time_info = True + candidate_dict['total_duration'] = round(sum(sum(duration) for duration in + self.task_duration_list[case_id]), 2) + for node_index, refer_node in enumerate(nodes): + match = [] + index = 0 + pass_name = ','.join(item.op_type for item in refer_node.keys()) + for query_node, host_node in refer_node.items(): + fusion_pattern = query_node.op_pass + + if 'op_pass' not in candidate_dict: + candidate_dict['op_pass'] = fusion_pattern + if 'fusion_pattern' not in candidate_dict: + candidate_dict['fusion_pattern'] = pass_name + match_attr = dict() + match_attr['op_name'] = host_node.op_name + match_attr['dtype'] = query_node.op_type + if has_time_info: + match_attr['duration'] = round(self.task_duration_list[case_id][node_index][index], 2) + index += 1 + match.append(match_attr) + match_attr = dict() + match_attr['op_name'] = "-" + match_attr['dtype'] = "-" + if has_time_info: + match_attr['duration'] = round(sum(self.task_duration_list[case_id][node_index]), 2) + match.append(match_attr) + candidate_dict['matches'].append(match) + candidates_list.append(candidate_dict) + html_render.render_template(key="computation", + template_dir="templates", + template_name="fusion.html", + candidates=candidates_list) + + def make_record(self, result: OptimizeResult): + """ + make record for what and how to optimize + """ + if not self.candidates: + return + + optimization_item = OptimizeItem( + "fusion issue", + f"Found {len(self.candidates)} fusion issues", + ["Check fusion issues detail in att_advisor*.html"] + ) + total_time = 0.0 + for candidate in self.task_duration_list: + for duration in candidate: + total_time += sum(duration) + statistics_item = StatisticsItem(0, + total_time, + sum([len(candidate) for candidate in self.candidates]) + ) + result.add(OptimizeRecord(optimization_item, statistics_item)) + + record_title = [ + "issue_id", "graph_name", "op_name", "fusion_structure", "fusion_pattern", + "op_type", "input_shape", "input_format", + "input_dtype", "output_shape", "output_format", "output_dtype" + ] + result.add_detail('fusion issues', headers=record_title) + + for case_id, nodes in enumerate(self.candidates): + for _, refer_node in enumerate(nodes): + pass_name = ','.join(item.op_type for item in refer_node.keys()) + for query_node, host_node in refer_node.items(): + fusion_pattern = query_node.op_pass + detail = [ + case_id, + host_node.graph_name, + host_node.op_name, + pass_name, + fusion_pattern, + query_node.op_type, + self.get_attr_shape(host_node, "input", "shape"), + self.get_attr_type(host_node, "input", "format"), + self.get_attr_type(host_node, "input", "dtype"), + self.get_attr_shape(host_node, "output", "shape"), + self.get_attr_type(host_node, "output", "format"), + self.get_attr_type(host_node, "output", "dtype"), + ] + result.add_detail('fusion issues', detail=detail) + + @staticmethod + def get_attr_shape(node, type_name: str, attr_name: str) -> str: + attr_shape = [] + node_attrs = getattr(node, type_name, []) + for attrs in node_attrs: + attr = getattr(attrs, attr_name, []) + attr_shape.append(",".join(attr)) + return ";".join(attr_shape) + + @staticmethod + def get_attr_type(node, type_name: str, attr_name: str) -> str: + attr_type = [] + node_attrs = getattr(node, type_name, []) + for attr in node_attrs: + attr_type.append(getattr(attr, attr_name, "")) + return ";".join(attr_type) diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py index 03d7759a72..0c6a2ac260 100644 --- a/profiler/advisor/common/analyzer_scopes.py +++ b/profiler/advisor/common/analyzer_scopes.py @@ -3,6 +3,7 @@ class SupportedScopes: # used for specify fourth-level commands and define the key of the result dict # the key defined bellow must be the same as value TIMELINE_FUSION_OPS = "timeline_fusion_ops" + GRAPH = "graph" SLOW_RANK = "slow_rank" SLOW_LINK = "slow_link" PORFILING_OPERATOR_ANALYSIS = "profiling_operator_analysis" diff --git a/profiler/advisor/common/graph/__init__.py b/profiler/advisor/common/graph/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/advisor/common/graph/graph.py b/profiler/advisor/common/graph/graph.py new file mode 100644 index 0000000000..6bab2042de --- /dev/null +++ b/profiler/advisor/common/graph/graph.py @@ -0,0 +1,135 @@ +import logging +from typing import Dict, List, Tuple, Callable, Any, Optional, Union + +import networkx as nx + +from profiler.advisor.common.graph.graph_parser import HostGraphNode, QueryGraphNode + +logger = logging.getLogger() + + +class Graph: + """ + Graph Struct + """ + + # pylint: disable=too-many-instance-attributes + def __init__(self, + nodes: Dict[str, Optional[Union[HostGraphNode, QueryGraphNode]]] = None, + edges: List[Tuple[Optional[Union[HostGraphNode, QueryGraphNode]], + Optional[Union[HostGraphNode, QueryGraphNode]]]] = None, + name: str = None): + self.name = name + self.graph = nx.DiGraph(name=name) + self.nodes = nodes if nodes is not None else {} + self.edges = edges if edges is not None else list() + + def build(self): + for op_name, node in self.nodes.items(): + # add node and mark op_name as tag + self.add_node(node, + op_type=node.op_type + ) + for edge in self.edges: + self.add_edge(*edge) + return self.graph + + def get_size(self) -> Dict[str, int]: + if not hasattr(self.graph, "nodes"): + return {"edges": 0, "nodes": 0} + + return {"edges": len(self.graph.edges), + "nodes": len(self.graph.nodes)} + + def add_node(self, node: HostGraphNode, **kwargs): + if node is None: + return + self.graph.add_node(node, **kwargs) + + def add_edge(self, pre_node: HostGraphNode, next_node: HostGraphNode): + if pre_node is None or next_node is None: + return + + if pre_node not in self.graph or \ + next_node not in self.graph: + logging.error("Nodes between edge should be both exists.") + return + + self.graph.add_edge(pre_node, next_node) + + def add_node_with_edge(self, node, adj_nodes: List[HostGraphNode]): + self.add_node(node) + for adj in adj_nodes: + self.add_edge(node, adj) + + def remove_node(self, node: HostGraphNode = None) -> None: + if node is None: + return + + self.graph.remove_node(node) + + def remove_edge(self, pre_node: HostGraphNode = None, next_node: HostGraphNode = None) -> None: + if pre_node is None or next_node is None: + raise ValueError(f"Invalid edge from {pre_node} to {pre_node}.") + + self.remove_edge(pre_node, next_node) + + def get_subgraph(self, nodes: List[HostGraphNode]) -> nx.DiGraph: + nodes = list(set(nodes)) + for node in nodes: + if not self.is_node_exists(node): + raise ValueError(f"Failed to subtract subgraph because {node.op_name} is not in the graph.") + + return self.graph.subgraph(nodes) + + def highlight_subgraph(self, subgraph: nx.DiGraph = None) -> None: + pass + + def get_node(self, node: HostGraphNode): + if node not in self.graph: + return + + return self.graph[node] + + def get_node_by_name(self, node_name: str): + return self.nodes.get(node_name, None) + + def is_node_exists(self, node: HostGraphNode): + return node in self.graph + + def draw(self, + graph: nx.DiGraph = None, + with_labels: bool = False, + labels: Dict[HostGraphNode, Any] = None, + pos_func: Callable = None, + font_weight: str = "bold", + savefig: bool = False, + node_size: int = 50, + **kwargs + ): + try: + import matplotlib.pylab as plt + except ImportError: + logger.error('Please install matplotlib first by using `pip install matplotlib`.') + return + + if graph is None: + graph = self.graph + + pos = pos_func(graph) if pos_func is not None else None + + if with_labels: + if labels is None: + labels = {k: f"{k}\n({v['op_name']})" for k, v in graph.nodes.items()} + + nx.draw(graph, + with_labels=with_labels, + pos=pos, + node_size=node_size, + font_weight=font_weight, + labels=labels, + **kwargs + ) + if savefig: + plt.savefig(self.name + ".png") + plt.show() diff --git a/profiler/advisor/common/graph/graph_match.py b/profiler/advisor/common/graph/graph_match.py new file mode 100644 index 0000000000..d0dfc16295 --- /dev/null +++ b/profiler/advisor/common/graph/graph_match.py @@ -0,0 +1,355 @@ +import itertools +import logging +from functools import lru_cache +from collections import deque +from typing import Dict, Generator, List, Callable, Hashable, Tuple + +import networkx as nx + + +@lru_cache() +def match_node_attr_fun(query_node: Hashable, + host_node: Hashable, + query_graph: nx.Graph, + host_graph: nx.Graph + ) -> bool: + """ + Check query node matches the attributes in host graph + + :param query_node: Query graph node + :param host_node: Host graph node + :param query_graph: Query Graph + :param host_graph: Host graph + :return: bool, match or not + """ + # get node attr + if query_node not in query_graph.nodes or host_node not in host_graph.nodes: + return False + + query_node = query_graph.nodes[query_node] + host_node = host_graph.nodes[host_node] + for attr, val in query_node.items(): + if attr not in host_node: + return False + if isinstance(host_node[attr], str) and isinstance(val, str): + if host_node[attr].lower() != val.lower(): + return False + else: + if host_node[attr] != val: + return False + return True + + +@lru_cache() +def match_node_struct_fun(query_node: Hashable, + host_node: Hashable, + query_graph: nx.Graph, + host_graph: nx.Graph + ) -> bool: + """ + Check query node matches the structure in host graph + + :param query_node: Query graph node + :param host_node: Host graph node + :param query_graph: Query Graph + :param host_graph: Host graph + :return: bool, match or not + """ + if query_node not in query_graph.nodes or host_node not in host_graph.nodes: + return False + + return host_graph.degree(host_node) >= query_graph.degree(query_node) + + +@lru_cache() +def match_edge_attr_fun(query_edge: Tuple[Hashable, Hashable], + host_edge: Tuple[Hashable, Hashable], + query_graph: nx.Graph, + host_graph: nx.Graph + ) -> bool: + """ + Check query edge matches the attr in host graph + + :param query_edge: Query graph edge + :param host_edge: Host graph edge + :param query_graph: Query Graph + :param host_graph: Host graph + :return: bool, match or not + """ + # get edge attr + if query_edge not in query_graph.edges or host_edge not in host_graph.edges: + return False + + query_edge = query_graph.edges[query_edge] + host_edge = host_graph.edges[host_edge] + for attr, val in query_edge.items(): + if attr not in host_edge: + return False + if isinstance(host_edge[attr], str) and isinstance(val, str): + if host_edge[attr].lower() != val.lower(): + return False + else: + if host_edge[attr] != val: + return False + return True + + +def find_isomorphisms(query_graph: nx.Graph, + host_graph: nx.Graph, + *args, + _node_attr_fun: Callable = match_node_attr_fun, + _node_struct_fun: Callable = match_node_struct_fun, + _edge_attr_fun: Callable = match_edge_attr_fun, + limit: int = None, + **kwargs) -> List[Dict[Hashable, Hashable]]: + """ + Find all the sub graphs that are isomorphic to query_graph in host_graph . + + :param query_graph: The graph object to query + :param host_graph: The graph object to be queried + :param args: Position args + :param _node_attr_fun: The function to match node attr + :param _node_struct_fun: The function to match node structural + :param _edge_attr_fun: The function to match edge attr + :param limit: The limitation for the number of returned mappings + :param kwargs: Keyword args + :return: Matched node mapping list + ``` + [{query_id: host_id, ...}, ...] + ``` + """ + candidates = [] + for query_result in find_isomorphisms_iter( + query_graph, + host_graph, + *args, + _node_attr_fun=_node_attr_fun, + _node_struct_fun=_node_struct_fun, + _edge_attr_fun=_edge_attr_fun, + **kwargs + ): + candidates.append(query_result) + if limit and len(candidates) >= limit: + return candidates + return candidates + + +def find_isomorphisms_iter(query_graph: nx.Graph, + host_graph: nx.Graph, + directed: bool = None, + _node_attr_fun: Callable = None, + _node_struct_fun: Callable = None, + _edge_attr_fun: Callable = None, + ) -> Generator[Dict[Hashable, Hashable], None, None]: + """ + A generation to find one isomorphic subgraph in host_graph for query_graph. + + :param query_graph: The graph object to query + :param host_graph: The graph object to be queried + :param directed: Whether direction should be considered during search + :param _node_attr_fun: The function to match node attr + :param _node_struct_fun: The function to match node structural + :param _edge_attr_fun: The function to match edge attr + :return: Yield mappings from query node IDs to host graph IDs: {query_id: host_id, ...} + + """ + if directed is None: + # query graph and host graph should consider directions. + if isinstance(query_graph, nx.DiGraph) and \ + isinstance(host_graph, nx.DiGraph): + directed = True + else: + directed = False + + # Initialize queue + dq = deque() + dq.appendleft({}) + + while len(dq) > 0: + backbone = dq.pop() + next_candidate_backbones = get_next_candidates(backbone=backbone, + query_graph=query_graph, + host_graph=host_graph, + directed=directed, + _node_attr_fun=_node_attr_fun, + _node_struct_fun=_node_struct_fun, + _edge_attr_fun=_edge_attr_fun, + ) + for candidate in next_candidate_backbones: + # find a legal isomorphism + if len(candidate) == len(query_graph): + yield candidate + else: + # continue to search + dq.appendleft(candidate) + + +def get_next_candidates( + backbone: Dict, + query_graph: nx.Graph, # noqa + host_graph: nx.Graph, # noqa + next_node: Hashable = None, + directed: bool = True, # noqa + _node_attr_fun: Callable = None, # noqa + _node_struct_fun: Callable = None, # noqa + _edge_attr_fun: Callable = None # noqa +) -> List[Dict[Hashable, Hashable]]: + """ + Get a list of candidate node assignments for the next "step" of this map. + + :param backbone: Mapping of query node IDs to one set of host graph IDs + :param next_node: Optional suggestion for the next node to assign + :return: List[Dict[Hashable, Hashable]]: A new list of node mappings with one additional element mapped + """ + node_priority = {n: 1 for n in query_graph.nodes} + candidate_nodes = [] + + if next_node is None and len(backbone) == 0: + # Start case + next_node = max(node_priority.keys(), + key=lambda x: node_priority.get(x, 0)) + + for node in host_graph.nodes: + if _node_attr_fun(next_node, node, query_graph, host_graph) and \ + _node_struct_fun(next_node, node, query_graph, host_graph): + candidate_nodes.append({next_node: node}) + return candidate_nodes + + nodes_with_maximum_backbone = [] + for query_node_id in query_graph.nodes: + if query_node_id in backbone: + continue + + backbone_neighbors = [] + if not directed: + backbone_neighbors = query_graph.adj[query_node_id] + else: + # nx.DiGraph.pred: A <- B: find previous node from B to A + # nx.DiGraph.adj: A -> B : find next node from A to B + backbone_neighbors = list(set(query_graph.adj[query_node_id]).union(set(query_graph.pred[query_node_id]))) + + query_backbone_node_count = sum([1 for _node in backbone_neighbors if _node in backbone]) + if query_backbone_node_count > 0: + # Find a longer backbone node + nodes_with_maximum_backbone.append(query_node_id) + + # next_node is connected to the current backbone. + next_node = max(nodes_with_maximum_backbone, key=lambda x: node_priority.get(x, 0)) + + # verify all edges between `next_node` and nodes in the backbone are exist in host graph + # Step1: find all edges between `next_node` and nodes in the backbone + next_edge_edges = [] + for _node in query_graph.adj[next_node]: + if _node in backbone: + # `next_node` -> `_node` + next_edge_edges.append((None, next_node, _node)) + + if directed: + for _node in query_graph.pred[next_node]: + if _node in backbone: + # `_node` -> `next_node` + next_edge_edges.append((_node, next_node, None)) + + if len(next_edge_edges) == 0: + logging.warning("Find node without any edge, which is invalid.") + return [] + # Step2: verify candidate nodes that have such edges in the host graph + candidate_nodes = [] + if len(next_edge_edges) == 1: + source, _, target = next_edge_edges[0] + if not directed: + candidate_nodes = list(host_graph.adj[backbone[target]]) + else: + if source is not None: + # means `source` is a `from` edge + candidate_nodes = list(host_graph.adj[backbone[source]]) + elif target is not None: + # means `target` is a `from` edge + candidate_nodes = list(host_graph.pred[backbone[target]]) + + elif len(next_edge_edges) > 1: + candidate_nodes_set = set() + for (source, _, target) in candidate_nodes: + if not directed: + candidate_nodes_from_this_edge = host_graph.adj[backbone[target]] + else: + if source is not None: + candidate_nodes_from_this_edge = host_graph.adj[backbone[source]] + else: # target is not None: + candidate_nodes_from_this_edge = host_graph.pred[backbone[target]] + + if len(candidate_nodes_set) > 0: + candidate_nodes_set = candidate_nodes_set.intersection(candidate_nodes_from_this_edge) + else: + # Initialize candidate_nodes_set + candidate_nodes_set.update(candidate_nodes_from_this_edge) + candidate_nodes = list(candidate_nodes_set) + + tentative_results = [] + for _node in candidate_nodes: + if all([_node not in backbone.values(), + _node_attr_fun(next_node, _node, query_graph, host_graph), + _node_struct_fun(next_node, _node, query_graph, host_graph)] + ): + tentative_results.append({**backbone, + next_node: _node}) + + final_candidates = check_edges_mapping(tentative_results, + query_graph=query_graph, + host_graph=host_graph, + _edge_attr_fun=_edge_attr_fun) + return final_candidates + + +def check_edges_mapping(candidates: List[Dict[Hashable, Hashable]], + query_graph: nx.Graph, + host_graph: nx.Graph, + _edge_attr_fun: Callable = None + ) -> List[Dict[Hashable, Hashable]]: + """ + Check that all edges between the assigned nodes exist in the host graph. + + :param candidates: mapping nodes candidates + :param query_graph: The graph object to query + :param host_graph: The graph object to be queried + :param _edge_attr_fun: The function to match edge attr + :return: + """ + monomorphism_candidates = [] + + for candidate in candidates: + if len(candidate) != len(query_graph): + monomorphism_candidates.append(candidate) + continue + + all_pass_flag = True + for edge_start, edge_end in query_graph.edges: + # check edge in host graph + if not host_graph.has_edge(candidate[edge_start], candidate[edge_end]): + all_pass_flag = False + break + + # check edge attr + if _edge_attr_fun is None or not _edge_attr_fun( + (edge_start, edge_end), + (candidate[edge_start], candidate[edge_end]), + query_graph, + host_graph + ): + all_pass_flag = False + break + + if all_pass_flag: + monomorphism_candidates.append(candidate) + + # Isomorphisms check + final_candidates = [] + for candidate in monomorphism_candidates: + all_product = itertools.product(candidate.keys(), candidate.keys()) + for edge_start, edge_end in all_product: + if not query_graph.has_edge(edge_start, edge_end) and \ + host_graph.has_edge(candidate[edge_start], candidate[edge_end]): + break + else: + final_candidates.append(candidate) + return final_candidates diff --git a/profiler/advisor/common/graph/graph_parser.py b/profiler/advisor/common/graph/graph_parser.py new file mode 100644 index 0000000000..d4c67fc191 --- /dev/null +++ b/profiler/advisor/common/graph/graph_parser.py @@ -0,0 +1,413 @@ +import os +import logging +import yaml +import itertools +from collections import deque +from dataclasses import dataclass +from typing import List, Tuple, Dict + +logger = logging.getLogger() + + +@dataclass +class Tensor: + def __init__(self): + super().__init__() + self.shape = [] + self.origin_shape = [] + self.shape_range = [] + self.origin_shape_range = [] + self.dtype = "" + self.origin_data_type = "" + self.format = "" + self.origin_format = [] + + +@dataclass +class Attr: + + def __init__(self): + super().__init__() + self.key = str() + self.value = [] + + +class HostGraphNode: + def __init__(self): + super().__init__() + self.graph_name = str() + self.op_name = str() + self.op_type = str() + self.inputs = [] + self.input = [] + self.outputs = [] + self.output = [] + self.strides = [] + self.pads = [] + self.groups = "" + self.dilations = [] + self.kernelname = "" + self._attrs = [] + + def __repr__(self): + return f"" + + +@dataclass +class HostGraph: + def __init__(self): + super().__init__() + self.name = "" + self.nodes = {} + self.inputs = [] + self.edges = [] + self.model_name = None + self.file_path = None + + def build(self): + """build a graph""" + for name, node in self.nodes.items(): + for input_node in node.inputs: + if input_node not in self.nodes: + continue + self.nodes[input_node].outputs.append(name) + + +class HostGraphParser: + """ + Parse graph metadata from text file + """ + def __init__(self, file_path): + self.buffer = deque(maxlen=100) + self.line_no = 0 + self._file_path = file_path + self.edges: List[Tuple[HostGraphNode, HostGraphNode]] = [] + self.nodes: Dict[str, HostGraphNode] = {} + self.graphs = self._parse(self._file_path) + self._get_node_dict() + self._get_edges_list() + del self.graphs[0] + + @staticmethod + def _get_key_value( line): + res = line.split(':', 1) + return res[0].strip(), res[1].strip().strip('"') + + @staticmethod + def _parse_attr(key, value, obj): + if not isinstance(obj, list) and not obj: + return + if key == "dim" and hasattr(obj, "shape"): + obj.shape.append(value) + elif key == "name" and hasattr(obj, "op_name"): + obj.op_name = value + elif key == "name" and hasattr(obj, "name"): + obj.name = value + elif key == "dtype" and hasattr(obj, "dtype"): + obj.dtype = value + elif key == "layout" and hasattr(obj, "format"): + obj.format = value + elif key == "type" and hasattr(obj, "op_type"): + obj.op_type = value + elif key == "input" and hasattr(obj, "input"): + obj.inputs.append(value.strip('"').split(':')[0]) + elif key == "key" and hasattr(obj, "key"): + obj.key = value + elif hasattr(obj, key): + setattr(obj, key, value) + elif isinstance(obj, list) and key != "val_type": + obj.append(value) + + def _parse_struct(self, in_file, key, in_obj): + + def parse_shape(file, obj): + obj = self._parse_line(file, obj) + + def parse_input_desc(file, obj): + tensor = self._parse_line(file, Tensor()) + if obj and hasattr(obj, "input"): + obj.input.append(tensor) + + def parse_out_desc(file, obj): + tensor = self._parse_line(file, Tensor()) + if obj and hasattr(obj, "output"): + obj.output.append(tensor) + + def parse_op(file, obj: HostGraph): + node = self._parse_line(file, HostGraphNode()) + if hasattr(obj, "name"): + node.graph_name = obj.name + if obj and hasattr(obj, "nodes") and node.op_name: + obj.nodes[node.op_name] = node + + def parse_graph(file, obj): + graph = self._parse_line(file, HostGraph()) + obj.append(graph) + + def parse_attr(file, obj): + attr = self._parse_line(file, Attr()) + if hasattr(obj, attr.key): + if attr.key not in ['format']: + setattr(obj, attr.key, attr.value) + elif attr.key.endswith("_kernelname"): + setattr(obj, "kernelname", attr.value) + if obj and hasattr(obj, "get_attrs"): + obj.get_attrs().append(attr) + + def parse_list(file, obj): + value = [] + self._parse_line(file, value) + if isinstance(obj, list): + obj.append(value) + else: + obj = value + + def parse_value(file, obj): + if hasattr(obj, "value"): + obj.value = self._parse_line(file, obj.value) + + def parse_default(file, _obj=None): + """function with unused argument""" + self._parse_line(file, None) + + parse_methods = { + "shape": parse_shape, + "input_desc": parse_input_desc, + "output_desc": parse_out_desc, + "op": parse_op, + "graph": parse_graph, + "attr": parse_attr, + "list_list_int": parse_list, + "list_list_i": parse_list, + "list": parse_list, + "value": parse_value, + } + parse_methods.get(key, parse_default)(in_file, in_obj) + + def _read_line(self, file): + self.line_no += 1 + line = file.readline() + if line.strip().endswith('}'): + end_line = "" + while self.buffer and not end_line.strip().endswith("{"): + end_line = self.buffer.pop() + else: + self.buffer.append(line) + return line.strip() + + def _parse_line(self, file, obj=None): + line = self._read_line(file) + try: + while line and not line.endswith("}"): + if line.endswith('{'): + key = line.rstrip('{').strip() + self._parse_struct(file, key, obj) + else: + key, value = self._get_key_value(line) + self._parse_attr(key, value, obj) + line = self._read_line(file) + except Exception as exception: + if self.buffer: + logger.debug("***********************graph content**************************") + while self.buffer: + line = self.buffer.popleft() + logger.debug(line) + logger.debug("***********************graph content**************************") + raise exception + return obj + + def _parse(self, graph_file): + # pylint:disable=broad-except + graph_list = [] + with open(graph_file, "r", encoding="gbk") as file: + try: + graph_list = self._parse_line(file, graph_list) + except Exception: + logger.error( + "Parse line %s of file %s failed, make sure the format is correct.", self.line_no, graph_file + ) + graphs = [] + for graph in graph_list: + if isinstance(graph, HostGraph): + graphs.append(graph) + for graph in graphs: + graph.model_name = graphs[0].name + graph.file_path = self._file_path + graph.build() + return graphs + + def _get_edges_list(self) -> None: + if len(self.graphs) <= 0: + return + + def is_repeat_edge(edge, edge_collector): + for _edge in edge_collector: + if edge[0].op_name == _edge[0].op_name and edge[1].op_name == _edge[1].op_name: + return True + return False + + for node in self.nodes.values(): + for input_node_name in node.inputs: + if input_node_name not in self.nodes: + continue + input_node = self.nodes[input_node_name] + if not is_repeat_edge((input_node, node), self.edges): + self.edges.append((input_node, node)) + for output_node_name in node.outputs: + if output_node_name not in self.nodes: + continue + output_node = self.nodes[output_node_name] + if not is_repeat_edge((node, output_node), self.edges): + self.edges.append((node, output_node)) + + def _get_node_dict(self) -> None: + if not self.graphs: + self.nodes = {} + return + self.nodes = {node.op_name: node for graph in self.graphs for node in graph.nodes.values()} + + +class QueryGraphNode: + """ + Graph Node + """ + _ID = 0 + + def __init__(self, op_type: str, op_pass: str): + self._op_type = op_type + self._id = QueryGraphNode._ID + self._op_pass = op_pass + QueryGraphNode._ID += 1 + + def get_property(self, name): + """ + get property + """ + return getattr(self, name, lambda: None) + + @property + def op_type(self): + return self._op_type + + @property + def op_name(self): + return self._op_type + "_id_" + str(self._id) + + @property + def op_pass(self): + return self._op_pass + + @op_type.setter + def op_type(self, op_type): + self._op_type = op_type + + def __eq__(self, other): + return self._op_type == other._op_type and \ + self._id == other._id + + def __hash__(self): + return hash(self._op_type + str(self._id)) + + @staticmethod + def trim_string(string: str, length: int = -1): + """ + + Trim string to target length + :param string: Original string + :param length: Target length of string, -1 indicates original string. + :return: Trimmed string + """ + if string is None or not isinstance(string, str): + raise TypeError(f"Param string must be a string type but got {type(string)}.") + + if length <= -1 or len(string) <= length: + return string + + return string[:length] + + +class QueryGraphParser: + def __init__(self, rule_database_path: str): + self._fusion_rules: Dict[str, List[Tuple]] = dict() + self.load_database(rule_database_path) + self.num_rules = sum([len(v) for v in self._fusion_rules.values()]) + + @property + def fusion_rules(self): + return self._fusion_rules + + def load_database(self, rule_database): + if not os.path.isabs(rule_database): + rule_database = os.path.join(os.path.dirname(__file__), + "../", "../", + rule_database) + + if not os.path.exists(rule_database): + raise FileNotFoundError(f"Path {rule_database} does not exist.") + with open(rule_database, 'r') as f: + database = yaml.safe_load(f) + self.parse_yaml(database) + + def parse_yaml(self, yaml_database): + fusion_strategy_list = yaml_database.get("GraphFusion", []) + if yaml_database.get("UBFusion", []): + fusion_strategy_list.extend(yaml_database.get("UBFusion", [])) + for fusion_strategy in fusion_strategy_list: + if not isinstance(fusion_strategy, dict): + continue + (fusion_name, strategy), = fusion_strategy.items() + version = strategy.get("version", 0) + if version == 0 or version == "0": + self._fusion_rules[fusion_name] = self.build_query_graph_v0(fusion_name, + strategy.get('struct', [])) + elif version == 1 or version == "1": + self._fusion_rules[fusion_name] = self.build_query_graph_v1(fusion_name, + strategy.get('nodes', []), + strategy.get('edges', [])) + + @staticmethod + def build_query_graph_v0(graph_name: str, graph_struct: List[str]) -> List[Tuple]: + nodes = dict() + graphs = [] + edges = [] + + pre_node, next_node = None, None + for node in graph_struct: + pre_node = next_node + next_node = QueryGraphNode(node, graph_name) + nodes[next_node.op_name] = next_node + if pre_node is None or next_node is None: + continue + edges.append((pre_node, next_node,)) + graphs.append((nodes, edges, graph_name,)) + return graphs + + @staticmethod + def build_query_graph_v1(graph_name: str, + nodes_list: List[Dict], + edges_list: List[List[str]]) -> List[Tuple]: + graphs = [] + node_index = dict() + multi_node_list = [] + for index, node in enumerate(nodes_list): + (node_name, op_type), = node.items() + if isinstance(op_type, str): + op_type = [op_type] + multi_node_list.append([QueryGraphNode(op, graph_name) for op in op_type]) + node_index[node_name] = index + + multi_node = list(itertools.product(*multi_node_list)) + + for index, sub_nodes in enumerate(multi_node): + sub_graph_name = graph_name if index == 0 else f"{graph_name}#{index}" + sub_edge = [] + sub_node = dict() + for node in sub_nodes: + sub_node[node.op_name] = node + for edge in edges_list: + pre_node, next_node = edge + pre_node_index, next_node_index = node_index.get(pre_node), node_index.get(next_node) + sub_edge.append((sub_nodes[pre_node_index], sub_nodes[next_node_index])) + sub_graph = (sub_node, sub_edge, sub_graph_name,) + graphs.append(sub_graph) + return graphs diff --git a/profiler/advisor/dataset/__init__.py b/profiler/advisor/dataset/__init__.py index 9fac2c8eb3..e69de29bb2 100644 --- a/profiler/advisor/dataset/__init__.py +++ b/profiler/advisor/dataset/__init__.py @@ -1,6 +0,0 @@ -# import asight # noqa -# import asight.datasets.graph_dataset -# -# from .graph_dataset import GraphDataset as GraphD -# -# asight.datasets.graph_dataset.GraphDataset = GraphD diff --git a/profiler/advisor/dataset/graph_dataset.py b/profiler/advisor/dataset/graph_dataset.py new file mode 100644 index 0000000000..c6dd0448b4 --- /dev/null +++ b/profiler/advisor/dataset/graph_dataset.py @@ -0,0 +1,53 @@ +import logging +from typing import List + +from profiler.advisor.dataset.dataset import Dataset +from profiler.advisor.common.graph.graph_parser import HostGraphParser +from profiler.advisor.common.graph.graph import Graph +from profiler.advisor.utils.utils import load_parameter, lazy_property, get_file_path_from_directory + +logger = logging.getLogger() + + +class GraphDataset(Dataset): + """ + data directory dataset + """ + FILE_PATTERN = "ATT_ADVISOR_GRAPH_FILE" + + def __init__(self, collection_path, data: dict = None, **kwargs) -> None: + self.graph_files: List[HostGraphParser] = [] + super().__init__(collection_path, data) + + def _parse(self): + graph_list = get_file_path_from_directory(self.collection_path, + lambda file: file.endswith( + load_parameter(self.FILE_PATTERN, "_Build.txt"))) + + for graph_file_path in graph_list[-1:]: + logger.info("Prepare to parse %s as default graph.", graph_file_path) + graph_file = HostGraphParser(graph_file_path) + self.graph_files.append(graph_file) + return self.graph_files + + @lazy_property + def graphs(self) -> List[Graph]: + """ + get a list of graphs + return: List[Graph] + """ + graphs = [] + for parser in self.graph_files: + graph = Graph(nodes=parser.nodes, + edges=parser.edges, + name="Default") + graph.build() + graphs.append(graph) + graphs.sort(key=lambda g: g.name) + del self.graph_files[0] # remove previous useless data + return graphs + + @property + def is_empty(self) -> bool: + """check empty graph dataset""" + return len(self.graphs()) == 0 diff --git a/profiler/advisor/display/html/templates/fusion.html b/profiler/advisor/display/html/templates/fusion.html new file mode 100644 index 0000000000..605a9d748f --- /dev/null +++ b/profiler/advisor/display/html/templates/fusion.html @@ -0,0 +1,47 @@ +{% if candidates|length > 0 %} +
+

Fusion Issues

+
+
+ {% for node in candidates %} +
{{node.op_pass|safe}}
+
+ + + + + + + + + + + +
StructureCountsElapsed Time(us)
{{ node.fusion_pattern|safe }}{{ node.counts|safe }}{{ node.total_duration|safe }}
+
+ {% for match in node.matches %} +
SubGraph {{ loop.index|safe }}
+
+ + + + + + + {% for node in match %} + + + + + + {% endfor %} +
OP NameOP TypeElapsed Time(us)
{{ node.op_name|safe }}{{ node.dtype|safe }}{{ node.duration|safe }}
+
+ {% endfor %} +
+
+ {% endfor %} +
+
+
+{% endif %} diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index ebe20baa2d..c9f0f150fc 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -1,10 +1,11 @@ -from collections import OrderedDict import os +from collections import OrderedDict +from profiler.advisor.utils.utils import Timer from profiler.advisor.analyzer.computation.profiling_analyzer import ProfilingAnalyzer from profiler.advisor.analyzer.schedule.fusion_ops.fusion_ops_analyzer import TimelineFusionOpsAnalyzer +from profiler.advisor.analyzer.graph_fusion.graph_fusion_analyzer import FusionOPAnalyzer from profiler.advisor.common.analyzer_scopes import SupportedScopes -from profiler.advisor.utils.utils import Timer from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyzer from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer @@ -15,7 +16,8 @@ class Interface: SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer }), "computation": OrderedDict({ - SupportedScopes.PORFILING_OPERATOR_ANALYSIS: ProfilingAnalyzer + SupportedScopes.PORFILING_OPERATOR_ANALYSIS: ProfilingAnalyzer, + SupportedScopes.GRAPH: FusionOPAnalyzer }), "communication": OrderedDict(), "overall": OrderedDict(), diff --git a/profiler/advisor/rules/op_fusion_pass.yaml b/profiler/advisor/rules/op_fusion_pass.yaml new file mode 100644 index 0000000000..3ff69a5782 --- /dev/null +++ b/profiler/advisor/rules/op_fusion_pass.yaml @@ -0,0 +1,491 @@ +Elementwise: &Elementwise [ Relu, Pow, Add, Sub, Mul, Div, Abs, Ceil, Log, Sqrt, Exp, LeakyRelu ] + +GraphFusion: + - FlashAttentionFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [ Mul ] + - node_3: [ Softmax, SoftmaxV2 ] + - node_4: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + - [ node_3, node_4 ] + + - FlashAttentionFusionPass_V2: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [ Mul ] + - node_3: [ TransData ] + - node_4: [ Softmax, SoftmaxV2 ] + - node_5: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + - [ node_3, node_4 ] + - [ node_4, node_5 ] + + - BMMStridedSliceDGeluFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [StridedSliceD] + - node_3: [Relu] + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + + - BMMConfusionTransposeDFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [ ConfusionTransposeD ] + - node_3: [ Relu ] + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + + - BMMConfusionTransposeDFusionPass_V2: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [ ConfusionTransposeD ] + edges: + - [ node_1, node_2 ] + + - Conv2DAddGroupNormFusionPass: + version: 0 + struct: [ Conv2D, Add, GroupNorm ] + + - RMSnormAddFusionPass: + version: 0 + struct: [ RMSnorm, Add ] + + - ConvToFullyConnectionFusionPass: + version: 0 + struct: [ Conv ] + + - ZConcatv2dFusionPass: + version: 0 + struct: [ ConcatV2d, ConcatV2d ] + + - BatchMatMulReduceMeanFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMulV2, BatchMatMul, MatMul, MatMulV2 ] + - node_2: [ Add ] + - node_3: [ Relu ] + - node_4: [ ReduceMean ] + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + - [ node_3, node_4 ] + + - PadDepthwiseConv2dFusionPass: + version: 0 + struct: [ PadD, DepthwiseConv2D ] + + - ConvBatchnormFusionPass: + version: 1 + nodes: + - node_1: [ Conv2d, Conv3d, DepthwiseConv2d ] + - node_2: [ Batchnorm ] + + edges: + - [ node_1, node_2 ] + + - AConv2dMulFusion: + version: 1 + nodes: + - node_1: [ Conv2d, Conv3d ] + - node_2: [ Mul ] + + edges: + - [ node_1, node_2 ] + + - TBEConvAddFusion: + version: 1 + nodes: + - node_1: [ Conv2d, Conv3d ] + - node_2: [ Add ] + + edges: + - [ node_1, node_2 ] + + - ZBNupdateReluV2Conv2DBNreducePass: + version: 0 + struct: [ BNTrainingUpdate, ReluV2, Conv2D, BNTrainingReduce ] + + - ASplitConv2dConcatPass: + version: 1 + nodes: + - node_1: [ MatMul, MatMulV2, BatchMatMul, BatchMatMulV2 ] + - node_2: [ Cast ] + + edges: + - [ node_1, node_2 ] + + - MatMulBiasAddFusionPass: + version: 1 + nodes: + - node_1: [ MatMul, MatMulV2, BatchMatMul, BatchMatMulV2 ] + - node_2: [ BiasAdd, Add ] + + edges: + - [ node_1, node_2 ] + + - Conv2DbpInputBiasAddFusionPass: + version: 0 + struct: [ Conv2DBackpropInput, BiasAdd ] + + - BatchMatmulV2ReduceFusionPass: + version: 0 + struct: [ BatchMatMulV2, ReduceSumD ] + + - BatchMatmulV2ReduceFusionPass_V2: + version: 0 + struct: [ BatchMatMulV2, Cast, ReduceSumD ] + + - Conv3DbpInputBiasAddFusionPass: + version: 0 + struct: [ Conv3DBackpropInputD, BiasAdd ] + + - AFullyConnectionReshapePass: + version: 0 + struct: [ FullyConnection, Reshape ] + + - GemmTransFusionPass: + version: 0 + struct: [ Transpose, Gemm ] + + - Resnet50DbnDwFusionPass: + version: 0 + struct: [ BNTrainingReduceGrad, Conv2DBackpropFilterD ] + + - CastReluCastFusionPass: + version: 0 + struct: [ Cast, Relu, Cast ] + + - PadConv2dFusionPass: + version: 1 + nodes: + - node_1: [ PadD, PadDV3 ] + - node_2: [ Conv2D ] + + edges: + - [ node_1, node_2 ] + + - Conv2DTransposeBatchnormFusionPass: + version: 1 + nodes: + - node_1: [ Conv2dTranspose ] + - node_2: [ BatchNorm, BNInference ] + + edges: + - [ node_1, node_2 ] + + - AvgPoolV2GradFusionPass: + version: 0 + struct: [ AvgPooV2lGrad ] + + - DropOutDoMaskFusionPass: + version: 0 + struct: [ DropOutDoMaskV3D ] + + - ConvCastFusionPass: + version: 0 + struct: [ Conv2D, Cast ] + + - ConvCastFusionPass_V2: + version: 0 + struct: [ Conv2D, TransData, Cast ] + + - StridedSliceConcatFusionPass: + version: 1 + nodes: + - node_1: [ StridedSliceD ] + - node_2: [ StridedSliceD ] + - node_3: [ ConcatD ] + + edges: + - [ node_1, node_3 ] + - [ node_2, node_3 ] + + - ConvCastFusionPass: + version: 0 + struct: [ SplitV ] + + - AInplaceAddFusionPass: + version: 0 + struct: [ InplaceAdd ] + + - AInplaceSubFusionPass: + version: 0 + struct: [ InplaceSub ] + + - AInplaceUpdateFusionPass: + version: 0 + struct: [ InplaceUpdate ] + +UBFusion: + - TbeConv3dElemwisePass: + version: 1 + nodes: + - node_1: [ Conv3D ] + - node_2: *Elementwise + edges: + - [ node_1, node_2 ] + + - TbeConv3dDxElemwisePass: + version: 0 + struct: [ Conv3dBackpropInput, AddN, LeakyReluGrad ] + + - TbeConv3dDxElemwisePass_V2: + version: 0 + struct: [ Conv3dBackpropInput, LeakyReluGrad ] + + - MatMulDropoutDoMaskV3dFusionPass: + version: 0 + struct: [ MatMul, Dropout_do_mask_v3_d, Add ] + + - BatchMatMulDropoutDoMaskV3dFusionPass: + version: 0 + struct: [ BatchMatMul, Dropout_do_mask_v3_d, Add ] + + - MatmulReduceSumUbFusion: + version: 0 + struct: [ BatchMatMul, ReduceSum ] + + - TbeBatchMatMulElementWiseFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMul, GEMM ] + - node_2: *Elementwise + + edges: + - [ node_1, node_2 ] + + - ATbeMatMulElemwiseFusionPass: + version: 1 + nodes: + - node_1: [ MatMul, GEMM ] + - node_2: *Elementwise + + edges: + - [ node_1, node_2 ] + + - MatmulConfusiontransposeUbFusion: + version: 0 + struct: [ MatMul, matmul_transpose ] + + - TbeFullyconnectionElemwiseDequantFusionPass: + version: 1 + nodes: + - node_1: [ BatchMatMul, MatMul, FullyConnection ] + - node_2: *Elementwise + + edges: + - [ node_1, node_2 ] + + - BatchMatmulConfusiontransposeUbFusion: + version: 0 + struct: [ BatchMatMul, batchmatmul_transpose ] + + - TbeConvSigmoidMulQuantFusionPass: + version: 1 + nodes: + - node_1: [ Conv ] + - node_2: [ Sigmoid ] + - node_3: [ Mul ] + - node_4: [ Quant ] + + edges: + - [ node_1, node_2 ] + - [ node_1, node_3 ] + - [ node_2, node_3 ] + - [ node_3, node_4 ] + + - TbeConv2DReluv2Pass: + version: 0 + struct: [ Conv2D, ReluV2 ] + + - TbeConvDoubleInFusionPass: + version: 1 + nodes: + - node_1: [ Conv2D ] + - node_2: *Elementwise + - node_3: *Elementwise + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + + - TbeConv2dAddClipMulDivFusionPass: + version: 0 + struct: [ Conv2D, Add, Clip, Mul, Div ] + + - TbeConv2dAddClipMulDivFusionPass_V2: + version: 0 + struct: [ Conv2D, Add, Clip, Mul ] + + - TbeConv2dAddRelu6MulMulFusionPass: + version: 1 + nodes: + - node_1: [ Conv2D, DepthwiseConv2D ] + - node_2: [ Add ] + - node_3: [ Relu6 ] + - node_4: [ Mul ] + - node_5: [ Mul ] + + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + - [ node_3, node_4 ] + - [ node_4, node_5 ] + + - ConvClipByValueFusionPass: + version: 1 + nodes: + - node_1: [ Conv2D ] + - node_2: *Elementwise + edges: + - [ node_1, node_2 ] + + - TbeAippConvReluMaxpoolingFusion: + version: 1 + nodes: + - node_1: [ Conv2D ] + - node_2: *Elementwise + - node_3: [ MaxPool, MaxPoolv3 ] + + edges: + - [ node_1, node_2 ] + - [ node_2, node_3 ] + + - TbeReduceElemwiseFusionPass: + version: 1 + nodes: + - node_1: *Elementwise + - node_2: [ CommReduce ] + edges: + - [ node_1, node_2 ] + + - TbeReadSelectEltwiseFusionPass: + version: 1 + nodes: + - node_1: [ ReadSelect ] + - node_2: *Elementwise + + edges: + - [ node_1, node_2 ] + + - TbeEltwiseWriteSelectFusionPass: + version: 1 + nodes: + - node_1: *Elementwise + - node_2: [ write_select ] + + edges: + - [ node_1, node_2 ] + + - TbeEltwiseFusionPass: + version: 1 + nodes: + - node_1: *Elementwise + - node_2: *Elementwise + + edges: + - [ node_1, node_2 ] + + - TbeConvBnreduceFusionPass: + version: 0 + struct: [ Convolution, bn_reduce ] + + - TbeBnupdateEltwiseFusionPass: + version: 1 + nodes: + - node_1: [ bn_update ] + - node_2: *Elementwise + edges: + - [ node_1, node_2 ] + + - TbeConv2DBackpropElemwiseFusionPass: + version: 1 + nodes: + - node_1: [ Conv2DBackpropInputD, Conv2DTransposeD, Deconvolution ] + - node_2: [ Add, ReluGradV2 ] + + edges: + - [ node_1, node_2 ] + + - TbeDxElemwisePass: + version: 1 + nodes: + - node_1: [ Conv2DBackpropInputD, Conv2DTransposeD, Deconvolution ] + - node_2: [ LeakyRelu, Prelu ] + + edges: + - [ node_1, node_2 ] + + - TbeConv2dBackpropRequantFusionPass: + version: 1 + nodes: + - node_1: [ Conv2DBackpropInputD, Conv2DTransposeD, Deconvolution ] + - node_2: [ AscendRequant ] + + edges: + - [ node_1, node_2 ] + + - TbeDwTransdataFusionPass: + version: 1 + nodes: + - node_1: [ Transdate ] + - node_2: [ Transdate ] + - node_3: [ Conv2DBackpropFilter ] + + edges: + - [ node_1, node_3 ] + - [ node_2, node_3 ] + + - TbeDxTransdataFusionPass: + version: 1 + nodes: + - node_1: [ Transdate ] + - node_2: [ Transdate ] + - node_3: [ Conv2DBackpropInput ] + + edges: + - [ node_1, node_3 ] + - [ node_2, node_3 ] + + - TbeEltwiseCastFusionPass: + version: 1 + nodes: + - node_1: [ Relu, Add, Mul, Sqrt ] + - node_2: [ Cast ] + + edges: + - [ node_1, node_2 ] + + - TbeEltwiseCastFusionPass_V2: + version: 1 + nodes: + - node_1: [ Cast ] + - node_2: [ Relu, Add, Mul, Sqrt ] + + + edges: + - [ node_1, node_2 ] + + - TbeConv2DBackpropDequantFusionPass: + version: 1 + nodes: + - node_1: [ Conv2DBackpropInputD, Conv2DTransposeD, Deconvolution ] + - node_2: [ AscendDequant ] + + + edges: + - [ node_1, node_2 ] diff --git a/profiler/cli/__init__.py b/profiler/cli/__init__.py index 4666e2d531..2cba173de1 100644 --- a/profiler/cli/__init__.py +++ b/profiler/cli/__init__.py @@ -1,4 +1,4 @@ from profiler.advisor.config.config import Config from profiler.advisor.utils.utils import Timer -Config().set_log_path(f"ma_advisor_{Timer().strftime}.xlsx") +Config().set_log_path(f"att_advisor_{Timer().strftime}.xlsx") diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index 1164a78cd1..8bc5a364ac 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -54,6 +54,6 @@ advisor_cli.add_command(compare_cli, name="compare") if __name__ == '__main__': advisor_cli.main( ["analyze", "all", "-d", - r"C:/xxx/profiling_data", + r"C:\Users\admin\Downloads\llama-13B\llama-13b", ] ) diff --git a/profiler/cluster_analyse/common_func/db_manager.py b/profiler/cluster_analyse/common_func/db_manager.py index 039473d707..47dd30f0db 100644 --- a/profiler/cluster_analyse/common_func/db_manager.py +++ b/profiler/cluster_analyse/common_func/db_manager.py @@ -15,6 +15,9 @@ import os import sqlite3 +import sys + +sys.path.append("../../") from common_func.constant import Constant from common_func.empty_class import EmptyClass diff --git a/profiler/test/tools/tool.py b/profiler/test/tools/tool.py index ee4b6f9bb1..18699951fa 100644 --- a/profiler/test/tools/tool.py +++ b/profiler/test/tools/tool.py @@ -20,7 +20,7 @@ def recover_env(work_path="./"): if os.path.exists("./tune_ops_file.cfg"): os.remove("./tune_ops_file.cfg") - delete_file(r"ma_advisor_+", work_path) + delete_file(r"att_advisor_+", work_path) def run_command(cmd): -- Gitee From faa848f0f6be5980cb895935488b6a6a566032cc Mon Sep 17 00:00:00 2001 From: fanxiaotong Date: Fri, 17 May 2024 17:43:27 +0800 Subject: [PATCH 14/21] advisor --- .../computation/npu_fused/__init__.py | 14 -- .../npu_fused/compute_advice_base.py | 118 ---------- .../computation/npu_fused/csv_analyzer.py | 81 ------- .../computation/npu_fused/json_analyzer.py | 55 ----- .../computation/npu_fused/npu_fused_advice.py | 113 ---------- .../computation/npu_fused/npu_slow_advice.py | 124 ---------- .../analyzer/computation/npu_fused/op_perf.py | 193 ---------------- profiler/advisor/common/constant.py | 211 ------------------ profiler/advisor/common/trace_view_json.py | 209 ----------------- .../advisor/common/trace_view_preprocessor.py | 208 ----------------- profiler/advisor/interface/interface.py | 1 - 11 files changed, 1327 deletions(-) delete mode 100644 profiler/advisor/analyzer/computation/npu_fused/__init__.py delete mode 100644 profiler/advisor/analyzer/computation/npu_fused/compute_advice_base.py delete mode 100644 profiler/advisor/analyzer/computation/npu_fused/csv_analyzer.py delete mode 100644 profiler/advisor/analyzer/computation/npu_fused/json_analyzer.py delete mode 100644 profiler/advisor/analyzer/computation/npu_fused/npu_fused_advice.py delete mode 100644 profiler/advisor/analyzer/computation/npu_fused/npu_slow_advice.py delete mode 100644 profiler/advisor/analyzer/computation/npu_fused/op_perf.py delete mode 100644 profiler/advisor/common/trace_view_json.py delete mode 100644 profiler/advisor/common/trace_view_preprocessor.py diff --git a/profiler/advisor/analyzer/computation/npu_fused/__init__.py b/profiler/advisor/analyzer/computation/npu_fused/__init__.py deleted file mode 100644 index 8400fd5ecd..0000000000 --- a/profiler/advisor/analyzer/computation/npu_fused/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/advisor/analyzer/computation/npu_fused/compute_advice_base.py b/profiler/advisor/analyzer/computation/npu_fused/compute_advice_base.py deleted file mode 100644 index 3916de201b..0000000000 --- a/profiler/advisor/analyzer/computation/npu_fused/compute_advice_base.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import abstractmethod -from collections import defaultdict -import os - -from profiler.cluster_analyse.common_func.file_manager import FileManager -from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer -from profiler.advisor.common import constant as const - - -class ComputeAdviceBase(BaseAnalyzer): - ASCEND_PT = 'ascend_pt' - ASCEND_PROFILER_OUTPUT = 'ASCEND_PROFILER_OUTPUT' - KERNEL_DETAIL_FILE = "kernel_details.csv" - TRACE_VIEW_FILE = "trace_view.json" - - def __init__(self, collection_path: str, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, - torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): - super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) - self.kernel_details_path = "" - self.has_preparse = False - self.preparse_data = defaultdict(list) - self.call_stack = None - self.trace_view_path = "" - - def path_check(self): - """ - check whether input path is valid - """ - if not os.path.exists(self.collection_path): - print("[ERROR] Path: {} is not exist.".format(self.collection_path)) - return False - if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"): - self.kernel_details_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", - "kernel_details.csv") - if not os.path.exists(self.kernel_details_path): - print("[ERROR] kernel_details.csv is not exist in the Path: {}.".format( - os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT"))) - return False - elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path) == "kernel_details.csv": - self.kernel_details_path = self.collection_path - else: - print("[ERROR] Please input ascend_pt or kernel_details.csv") - return False - print("[INFO] Start to analyse the target file: {}".format(self.kernel_details_path)) - self.preparse() - return True - - def has_callstack(self): - if self.call_stack is not None: - return self.call_stack - profiler_info_json_path = "" - for file in os.listdir(self.collection_path): - if file.startswith("profiler_info"): - profiler_info_json_path = os.path.join(self.collection_path, file) - break - if not profiler_info_json_path: - self.call_stack = False - return self.call_stack - self.trace_view_path = os.path.join(self.collection_path, self.ASCEND_PROFILER_OUTPUT, "trace_view.json") - if not os.path.exists(profiler_info_json_path) or not os.path.exists(self.trace_view_path): - self.call_stack = False - return self.call_stack - info = FileManager.read_json_file(profiler_info_json_path) - if not info.get("config") or not info.get("config").get("common_config") \ - or not info.get("config").get("common_config").get("with_stack"): - self.call_stack = False - return self.call_stack - activities = info.get("config").get("common_config").get("activities") - if not activities or "ProfilerActivity.CPU" not in activities: - self.call_stack = False - return self.call_stack - self.call_stack = info.get("config").get("common_config").get("with_stack") - return self.call_stack - - @abstractmethod - def run(self): - """ - analyze profiling data and advice - """ - - def output(self): - """ - output relevant data - """ - self.output_format_data[self.DATA] = self.cur_data - self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck - self.output_format_data[self.ADVICE] = self.cur_advice - - def preparse(self): - if self.has_preparse: - return - - def optimize(self): - pass - - def make_record(self): - """ - make record for what and how to optimize - """ - pass - - def make_render(self): - pass diff --git a/profiler/advisor/analyzer/computation/npu_fused/csv_analyzer.py b/profiler/advisor/analyzer/computation/npu_fused/csv_analyzer.py deleted file mode 100644 index c85c14d618..0000000000 --- a/profiler/advisor/analyzer/computation/npu_fused/csv_analyzer.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import multiprocessing - -import pandas as pd -import numpy as np - -from common_func_advisor.constant import Constant -from .op_perf import OpPerfFactory - - -class CSVAnalyzer: - def __init__(self, path) -> None: - self._path = path - - def process(self): - df = pd.read_csv(self._path, dtype={"Start Time(us)": str}) - # 分析是否存在可融合的算子 - op_type_list = df["Type"].tolist() - duration_list = df["Duration(us)"].tolist() - start_times = df["Start Time(us)"].tolist() - # 去除末尾的\t分隔符 - start_times = [start_time[:-1] for start_time in start_times] - result_list = [] - for pattern in Constant.PATTERN_DICT.keys(): - result_list.extend(self.find_all_sub_lists(op_type_list, duration_list, start_times, pattern)) - data_frame = pd.DataFrame(result_list) - data_frame.columns = ["pattern_name", "pattern", "len", "count", "duration sum(us)", "op durations(us)", - "index", "first_timestamp"] - return data_frame - - @staticmethod - def find_all_sub_lists(op_type_list, duration_list, start_times, expect_sub_list): - # 创建一个空字典,用来存储子列表和它们的出现次数和起始位置 - len_sub_list = len(expect_sub_list) - expect_sub_list = tuple(expect_sub_list) - sublist_dict = {} - # 遍历列表,从每个位置开始,取长度为N的子列表 - for i in range(len(op_type_list) - len_sub_list + 1): - sublist = tuple(op_type_list[i:i + len_sub_list]) - if sublist != expect_sub_list: - continue - # 如果子列表已经在字典中,就增加它的出现次数,否则就初始化为1 - if sublist in sublist_dict: - # count - sublist_dict[sublist][0] += 1 - # index - sublist_dict[sublist][1].append(i) - # total duration - sublist_dict[sublist][2] += sum(duration_list[i:i + len_sub_list]) - # duration - zip_data = zip(sublist_dict[sublist][3], duration_list[i:i + len_sub_list]) - sublist_dict[sublist][3] = [a + b for a, b in zip_data] - else: - sublist_dict[sublist] = [1, [i], sum(duration_list[i:i + len_sub_list]), - duration_list[i:i + len_sub_list], len_sub_list, start_times[i]] - # 创建一个空列表,用来存储所有重复的子列表 - repeated_sublists = [] - for sublist, (count, index, duration_sum, op_durations, sublist_len, first_time) in sublist_dict.items(): - pattern_name = Constant.PATTERN_DICT.get(sublist, "unknown") - op_durations = [round(num, 2) for num in op_durations] - repeated_sublists.append([pattern_name, sublist, sublist_len, count, - duration_sum, op_durations, index, first_time]) - if len(sublist_dict) == 0: - pattern_name = Constant.PATTERN_DICT.get(expect_sub_list, "unknown") - repeated_sublists.append([pattern_name, expect_sub_list, 0, 0, 0, 0, 0, 0]) - # 返回所有重复的子列表 - return repeated_sublists diff --git a/profiler/advisor/analyzer/computation/npu_fused/json_analyzer.py b/profiler/advisor/analyzer/computation/npu_fused/json_analyzer.py deleted file mode 100644 index fd2a72ffa3..0000000000 --- a/profiler/advisor/analyzer/computation/npu_fused/json_analyzer.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pandas as pd - -from common_func_advisor.trace_view_json import TraceViewJson - - -class JSONAnalyzer(object): - def __init__(self, path): - self._path = path - - def get_custom_code(self, data: pd.DataFrame, ts_col: str, output_col: str): - trace_json = TraceViewJson(self._path) - callstacks = pd.DataFrame(columns=[output_col]) - - for i, row in data.iterrows(): - if ts_col not in data.columns.tolist(): - print("[ERROR] No {} col found in data columns.".format(ts_col)) - return callstacks - timestamp = row[ts_col] - flow_event = trace_json.get_torch_2_npu_flow_event(timestamp) - if not flow_event.valid(): - print("[ERROR] Get flow event failed for pattern {}.".format(row['pattern'])) - callstacks.loc[i] = "" - continue - flow_event_s_key = flow_event.s_point_ts - python_dur_events = trace_json.get_python_dur_events_contain_ts(flow_event_s_key) - if not python_dur_events: - print("[ERROR] No python dur event found for pattern {}.".format(row['pattern'])) - callstacks.loc[i] = "" - continue - # 保持新老版本callstack兼容性 - if python_dur_events[0].args.get("Call stack"): - # 旧版本 - callstack = python_dur_events[0].args.get("Call stack").split(";") - else: - python_dur_events.sort(key=lambda e: e.ts) - # 新版本 - callstack = [event.name for event in python_dur_events if event.cat == "python_function"] - callstack_str = "\n".join(callstack) - callstacks.loc[i] = callstack_str - return callstacks diff --git a/profiler/advisor/analyzer/computation/npu_fused/npu_fused_advice.py b/profiler/advisor/analyzer/computation/npu_fused/npu_fused_advice.py deleted file mode 100644 index 7ec711d800..0000000000 --- a/profiler/advisor/analyzer/computation/npu_fused/npu_fused_advice.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from abc import ABC - -import pandas as pd - -from profiler.advisor.analyzer.computation.npu_fused.compute_advice_base import ComputeAdviceBase -from profiler.advisor.analyzer.computation.npu_fused.csv_analyzer import CSVAnalyzer -from profiler.advisor.analyzer.computation.npu_fused.json_analyzer import JSONAnalyzer -from profiler.advisor.common import constant as const -from profiler.advisor.result.item import OptimizeItem, OptimizeRecord - - -class NpuFusedAdvice(ComputeAdviceBase, ABC, ): - NPU_FUSED_ADVICE = "npu_fused_advice" - - def __init__(self, collection_path: str, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, - torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): - super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) - self.cur_data = dict() - self.cur_bottleneck = str() - self.cur_advice = str() - self.kernel_details_path = "" - self.call_stack = None - - def run(self): - if not self.path_check(): - return self.output_format_data - self.process() - self.output() - return self.output_format_data - - def process(self): - csv_analyzer = CSVAnalyzer(self.kernel_details_path) - all_pattern_data = csv_analyzer.process() - all_pattern_data = all_pattern_data.sort_values(by='duration sum(us)', ascending=False) - filter_data = all_pattern_data.get(all_pattern_data.get("duration sum(us)", 0) > 0) - if not self.has_callstack(): - print("[Warning] No call stack info found, advice will be incomplete") - self.cur_data = filter_data - else: - json_analyzer = JSONAnalyzer(self.trace_view_path) - custom_code = json_analyzer.get_custom_code(filter_data, "first_timestamp", "custom code") - self.cur_data = pd.concat([filter_data, custom_code], axis=1) - op_num = len(self.cur_data.index) - op_dur = filter_data["duration sum(us)"].sum() - if op_num > 0: - index = 0 - self.cur_bottleneck = f"The computing time of fusable op is {round(op_dur, 2)} ms." - self.cur_advice = "" - for _, row in self.cur_data.iterrows(): - advice = f"Advice {index}:\n" - cur_op = "[" + ", ".join(row.loc["pattern"]) + "]" - npu_fused_op = row.loc["pattern_name"] - advice += f"Replace {cur_op} with {npu_fused_op}. " - if self.call_stack: - advice += f"This pattern first happened in: \n{row['custom code']}" - if index != op_num - 1: - advice += "\n" - index += 1 - self.cur_advice += advice - - def optimize(self): - if not self.path_check(): - return self.output_format_data - self.process() - self.output() - return self.output_format_data - - def make_record(self): - """ - make record for what and how to optimize - """ - optimization_item = OptimizeItem( - NpuFusedAdvice.NPU_FUSED_ADVICE, - self.bottleneck_str, - self.cur_advices - ) - self.result.add(OptimizeRecord(optimization_item)) - - # self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) - # for data_type, data in self.cur_data.items(): - # if data: - # self.result.add_detail(const.DATA + data_type, self.cur_data_table[data_type]["headers"], self.cur_data_table[data_type]["data"][0]) - - def make_render(self): - result_for_html = { - "Description" : self.cur_bottleneck, - "suggestion" : self.cur_advice, - "details" : [{}] - } - - self.html_render.render_template(key="cluster", - title=NpuFusedAdvice.NPU_FUSED_ADVICE, - template_dir="templates", - template_name="cluster_analysis.html", - cann_version=self.cann_version, - torch_version=self.torch_version, - result=result_for_html) diff --git a/profiler/advisor/analyzer/computation/npu_fused/npu_slow_advice.py b/profiler/advisor/analyzer/computation/npu_fused/npu_slow_advice.py deleted file mode 100644 index 0dd0a3225f..0000000000 --- a/profiler/advisor/analyzer/computation/npu_fused/npu_slow_advice.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from abc import ABC -import multiprocessing - -import pandas as pd - -from profiler.advisor.analyzer.computation.npu_fused.compute_advice_base import ComputeAdviceBase -from profiler.advisor.analyzer.computation.npu_fused.op_perf import OpPerfFactory -from profiler.advisor.common import constant as const -from profiler.advisor.common.constant import PerfColor -from profiler.advisor.common.trace_view_json import TraceViewJson -from profiler.advisor.common.constant import Constant -from profiler.advisor.display.html.render import HTMLRender -from profiler.advisor.result.result import OptimizeResult -from profiler.advisor.result.item import OptimizeItem, OptimizeRecord - - -class NpuSlowAnalyzer(ComputeAdviceBase, ABC): - OP_PERF_SHEET = "op_perf" - npu_slow_advice = "NPU_SLOW_ADVICE" - - def __init__(self, collection_path: str, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, - torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): - super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) - self.kernel_details_path = "" - self.data = pd.DataFrame() - - @staticmethod - def save_to_excel(data: pd.DataFrame, file_path: str) -> None: - writer = pd.ExcelWriter(file_path, engine="xlsxwriter", mode="w") - data.index.name = Constant.TITLE.INDEX - data.to_excel(writer, index=True, sheet_name=NpuSlowAnalyzer.OP_PERF_SHEET) - NpuSlowAnalyzer.color_sheet(data, writer.book, writer.sheets[NpuSlowAnalyzer.OP_PERF_SHEET]) - writer.sheets[NpuSlowAnalyzer.OP_PERF_SHEET].freeze_panes = "A2" - writer.close() - - @staticmethod - def color_sheet(data: pd.DataFrame, workbook, worksheet): - color_rgb = { - PerfColor.GREEN.name: workbook.add_format({'bg_color': '#C6EFCE'}), - PerfColor.YELLOW.name: workbook.add_format({'bg_color': '#FFEB9C'}), - PerfColor.RED.name: workbook.add_format({'bg_color': '#FFC7CE'}), - } - for row in data.iterrows(): - color = row[1][Constant.TITLE.COLOR] - fill_format = color_rgb.get(color) - if not fill_format: - continue - worksheet.set_row(row[0] + 1, None, fill_format) - - @staticmethod - def update_op_row(row: tuple): - return OpPerfFactory.build(row[1]).update() - - def get_call_stack(self, data: pd.DataFrame, index_id: int, ts_col: str) -> str: - if not self.has_callstack(): - print("There is no call stack info, please set 'with_stack=True'") - return "" - trace_json = TraceViewJson(self.trace_view_path) - return trace_json.get_call_stack(data, index_id, ts_col) - - def run(self): - if not self.path_check(): - return self.data - self.process() - return self.data - - def process(self): - self.data = pd.read_csv(self.kernel_details_path, dtype={"Start Time(us)": str}) - # 去除末尾的\t分隔符 - self.data["Start Time(us)"] = self.data["Start Time(us)"].apply(lambda x: x[:-1]) - pool = multiprocessing.Pool(multiprocessing.cpu_count()) - result = pool.map(self.update_op_row, self.data.iterrows()) - pool.close() - self.data = pd.DataFrame(result) - - def optimize(self): - if not self.path_check(): - return self.data - self.process() - - def make_record(self): - """ - make record for what and how to optimize - """ - optimization_item = OptimizeItem( - NpuSlowAnalyzer.npu_slow_advice, - self.bottleneck_str, - self.cur_advices - ) - self.result.add(OptimizeRecord(optimization_item)) - - # self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) - # for data_type, data in self.cur_data.items(): - # if data: - # self.result.add_detail(const.DATA + data_type, self.cur_data_table[data_type]["headers"], self.cur_data_table[data_type]["data"][0]) - - def make_render(self): - result_for_html = { - "Description" : self.cur_bottleneck, - "suggestion" : self.cur_advice, - "details" : [{}] - } - - self.html_render.render_template(key="cluster", - title=NpuSlowAnalyzer.npu_slow_advice, - template_dir="templates", - template_name="cluster_analysis.html", - cann_version=self.cann_version, - torch_version=self.torch_version, - result=result_for_html) diff --git a/profiler/advisor/analyzer/computation/npu_fused/op_perf.py b/profiler/advisor/analyzer/computation/npu_fused/op_perf.py deleted file mode 100644 index dbcaad8c1c..0000000000 --- a/profiler/advisor/analyzer/computation/npu_fused/op_perf.py +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import functools -from typing import Dict -from profiler.advisor.common.constant import Constant, CoreType, PerfColor - - -class OpPerfFactory: - @classmethod - def build(cls, op_row: Dict): - if op_row.get(Constant.TITLE.TASK_TYPE) == CoreType.AIV: - return VecOpPerf(op_row) - elif op_row.get(Constant.TITLE.TASK_TYPE) == CoreType.AIC: - return CubeOpPerf(op_row) - else: - return OpPerf(op_row) - - -class OpPerf: - def __init__(self, op_row: Dict): - if "OP Type" in op_row.keys(): - Constant.update_title() - self.row = op_row - self.model_name = op_row.get("Model Name") - self.model_id = op_row.get("Model ID") - self.task_id = op_row.get("Task ID") - self.stream_id = op_row.get("Stream ID") - self.infer_id = op_row.get("Infer ID") - self.op_name = op_row.get("Name") - self.op_type = op_row.get("Type") - self.task_type = op_row.get("Accelerator Core") - self.task_start_time = op_row.get("Start Time(us)") - self.task_duration = op_row.get("Duration(us)") - self.task_wait_time = op_row.get("Wait Time(us)") - self.block_dim = op_row.get("Block Dim") - self.mix_block_dim = op_row.get("Mix Block Dim") - - self.hf32_eligible = op_row.get("HF32 Eligible") - self.input_shapes = op_row.get("Input Shapes") - self.input_data_types = op_row.get("Input Data Types") - self.input_formats = op_row.get("Input Formats") - self.output_shapes = op_row.get("Output Shapes") - self.output_data_types = op_row.get("Output Data Types") - self.output_formats = op_row.get("Output Formats") - self.context_id = op_row.get("Context ID") - self.aicore_time = op_row.get("aicore_time(us)") - self.aic_total_cycles = op_row.get("aic_total_cycles") - - self.aic_mac_time = op_row.get("aic_mac_time(us)") - self.aic_mac_ratio = op_row.get("aic_mac_ratio") - self.aic_scalar_time = op_row.get("aic_scalar_time(us)") - self.aic_scalar_ratio = op_row.get("aic_scalar_ratio") - self.aic_mte1_time = op_row.get("aic_mte1_time(us)") - self.aic_mte1_ratio = op_row.get("aic_mte1_ratio") - self.aic_mte2_time = op_row.get("aic_mte2_time(us)") - self.aic_mte2_ratio = op_row.get("aic_mte2_ratio") - self.aic_fixpipe_time = op_row.get("aic_fixpipe_time(us)") - self.aic_fixpipe_ratio = op_row.get("aic_fixpipe_ratio") - self.aic_icache_miss_rate = op_row.get("aic_icache_miss_rate") - self.aiv_time = op_row.get("aiv_time(us)") - self.aiv_total_cycles = op_row.get("aiv_total_cycles") - self.aiv_vec_time = op_row.get("aiv_vec_time(us)") - self.aiv_vec_ratio = op_row.get("aiv_vec_ratio") - self.aiv_scalar_time = op_row.get("aiv_scalar_time(us)") - self.aiv_scalar_ratio = op_row.get("aiv_scalar_ratio") - self.aiv_mte2_time = op_row.get("aiv_mte2_time(us)") - - self.aiv_mte2_ratio = op_row.get("aiv_mte2_ratio") - self.aiv_mte3_time = op_row.get("aiv_mte3_time(us)") - self.aiv_mte3_ratio = op_row.get("aiv_mte3_ratio") - self.aiv_icache_miss_rate = op_row.get("aiv_icache_miss_rate") - self.cube_utilization = op_row.get("cube_utilization( %)") - - @staticmethod - def get_dtype_size(dtype_str: str): - return Constant.DTYPE_SIZE_MAP.get(dtype_str.lower(), 0) - - @staticmethod - def get_element_count(shape: list): - return functools.reduce(lambda x, y: int(x) * int(y), shape) - - @staticmethod - def shape_to_tuple(shape_str: str) -> tuple: - if not isinstance(shape_str, str): - return [] - shape_str = shape_str.strip('"') - split_shape = shape_str.strip(';') - if not split_shape: - return [] - pairs = split_shape.split(';') - shape_result = [] - for pair in pairs: - pair = pair.strip(";") - elements = pair.split(',') - elements = tuple(int(element) if "" != element else 0 for element in elements) - shape_result.append(elements) - return tuple(shape_result) - - @staticmethod - def dtype_to_tuple(dtypes_str: str) -> tuple: - if not isinstance(dtypes_str, str): - return [] - dtypes_str = dtypes_str.strip('"') - split_dtypes = dtypes_str.strip(';') - if not split_dtypes: - return [] - pairs = split_dtypes.split(';') - return tuple(pairs) - - def get_mac_ratio(self): - return self.aic_mac_ratio - - def get_size(self, shapes_str, dtypes_str): - shapes = self.shape_to_tuple(shapes_str) - dtypes = self.dtype_to_tuple(dtypes_str) - if len(shapes) > len(dtypes): - print(f"[ERROR] The size of shape is greater than that of dtypes.") - return 0 - if len(shapes) < len(dtypes): - shapes = list(shapes) - shapes.extend([(1,)] * (len(dtypes) - len(shapes))) - all_size = 0 - for index, shape in enumerate(shapes): - element_count = self.get_element_count(shape) - dtype_size = self.get_dtype_size(dtypes[index]) - all_size += element_count * dtype_size - return all_size - - def get_calc_size(self): - # input and output bytes (MB) - if not self.input_shapes or not self.output_shapes: - print("[ERROR] There is no tensor data, do not assess vector op performance.") - return 0 - intput_size = self.get_size(self.input_shapes, self.input_data_types) - output_size = self.get_size(self.output_shapes, self.output_data_types) - return (intput_size + output_size) / (Constant.BYTE_UNIT_TRANS * Constant.BYTE_UNIT_TRANS) - - def get_throughput(self): - # throughput(GB/s) - if not self.task_duration or abs(self.task_duration) < 1e-6: - print("[ERROR] There is no task_duration, do not assess vector op performance.") - return 0 - return self.row[Constant.TITLE.SIZE] / Constant.BYTE_UNIT_TRANS / self.task_duration * Constant.UNIT_TRANS * Constant.UNIT_TRANS - - def get_perf_color(self): - return PerfColor.WHITE - - def update(self): - self.row[Constant.TITLE.SIZE] = self.get_calc_size() - self.row[Constant.TITLE.THROUGHPUT] = self.get_throughput() - self.row[Constant.TITLE.COLOR] = self.get_perf_color().name - return self.row - - -class VecOpPerf(OpPerf): - def get_perf_color(self) -> PerfColor: - throughput = self.row[Constant.TITLE.THROUGHPUT] - op_duration = self.task_duration - tp_threshold = Constant.TP_THRESHOLD - if throughput == 0: - return PerfColor.WHITE - if throughput < tp_threshold / 2 and op_duration > 20: - return PerfColor.RED - elif tp_threshold / 2 <= throughput < tp_threshold: - return PerfColor.YELLOW - else: - return PerfColor.GREEN - - -class CubeOpPerf(OpPerf): - def get_perf_color(self) -> PerfColor: - aic_mac_ratio = self.get_mac_ratio() - if not aic_mac_ratio: - print("[WARNING] There is no aic_mac_ratio, do not assess cube op performance.") - return PerfColor.WHITE - elif aic_mac_ratio < 0.6: - return PerfColor.RED - elif 0.6 <= aic_mac_ratio < 0.8: - return PerfColor.YELLOW - else: - return PerfColor.GREEN diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index 1d23b1d9ac..90de9fe7b0 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -12,217 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from enum import Enum - - -class CsvTitle: - MODEL_NAME = "Model Name" - MODEL_ID = "Model ID" - TASK_ID = "Task ID" - STREAM_ID = "Stream ID" - INFER_ID = "Infer ID" - TASK_START_TIME = "Task Start Time(us)" - TASK_WAIT_TIME = "Task Wait Time(us)" - BLOCK_DIM = "Block Dim" - MIX_BLOCK_DIM = "Mix Block Dim" - HF32_ELIGIBLE = "HF32 Eligible" - INPUT_SHAPES = "Input Shapes" - INPUT_DATA_TYPES = "Input Data Types" - INPUT_FORMATS = "Input Formats" - OUTPUT_SHAPES = "Output Shapes" - OUTPUT_DATA_TYPES = "Output Data Types" - OUTPUT_FORMATS = "Output Formats" - CONTEXT_ID = "Context ID" - AICORE_TIME = "aicore_time(us)" - AIC_TOTAL_CYCLES = "aic_total_cycles" - AIC_MAC_TIME = "aic_mac_time(us)" - AIC_MAC_RATIO = "aic_mac_ratio" - AIC_SCALAR_TIME = "aic_scalar_time(us)" - AIC_SCALAR_RATIO = "aic_scalar_ratio" - AIC_MTE1_TIME = "aic_mte1_time(us)" - AIC_MTE1_RATIO = "aic_mte1_ratio" - AIC_MTE2_TIME = "aic_mte2_time(us)" - AIC_MTE2_RATIO = "aic_mte2_ratio" - AIC_FIXPIPE_TIME = "aic_fixpipe_time(us)" - AIC_FIXPIPE_RATIO = "aic_fixpipe_ratio" - AIC_ICACHE_MISS_RATE = "aic_icache_miss_rate" - AIV_TIME = "aiv_time(us)" - AIV_TOTAL_CYCLES = "aiv_total_cycles" - AIV_VEC_TIME = "aiv_vec_time(us)" - AIV_VEC_RATIO = "aiv_vec_ratio" - AIV_SCALAR_TIME = "aiv_scalar_time(us)" - AIV_SCALAR_RATIO = "aiv_scalar_ratio" - AIV_MTE2_TIME = "aiv_mte2_time(us)" - AIV_MTE2_RATIO = "aiv_mte2_ratio" - AIV_MTE3_TIME = "aiv_mte3_time(us)" - AIV_MTE3_RATIO = "aiv_mte3_ratio" - AIV_ICACHE_MISS_RATE = "aiv_icache_miss_rate" - CUBE_UTILIZATION = "cube_utilization( %)" - TASK_DURATION_SUM = "Task Duration Sum(us)" - TASK_DURATION_MEAN = "Task Duration Mean(us)" - TASK_DURATION_STD = "Task Duration Std(us)" - TASK_DURATION_RATIO = "Task Duration Ratio(100%)" - SIZE = "size(MB)" - THROUGHPUT = "throughput(GB/s)" - COLOR = "color" - GAP = "Gap(us)" - DURATION_SUM = "Duration Sum(us)" - COUNT = "Count" - MAX_DURATION = "Max Duration(us)" - MIN_DURATION = "Min Duration(us)" - AVG_DURATION = "Avg Duration(us)" - DURATION_RATIO = "Duration Ratio" - INDEX = "Index" - - -# 定义CSV_TITILE_V1类,继承自CSV_TITILE类, 适配旧版csv -class CsvTitleV1(CsvTitle): - OP_NAME = "Op Name" - OP_TYPE = "OP Type" - TASK_TYPE = "Task Type" - TASK_DURATION = "Task Duration(us)" - - -# 定义CSV_TITILE_V1类,继承自CSV_TITILE类, 适配新版csv -class CsvTitleV2(CsvTitle): - OP_NAME = "Name" - OP_TYPE = "Type" - TASK_TYPE = "Accelerator Core" - TASK_DURATION = "Duration(us)" - - -class Constant: - DTYPE_SIZE_MAP = {"int8": 1, "uint8": 1, - "int16": 2, "uint16": 2, - "int32": 4, "uint32": 4, - "int64": 8, "uint64": 8, - "float16": 2, - "bfloat16": 2, - "bf16": 2, - "dt_bf16": 2, - "float32": 4, - "float": 4, - "float64": 8, - "complex64": 8, - "complex128": 16, - "bool": 1} - TP_THRESHOLD = 1150 - MAX_INPUT_MODE_LEN = 30 - MAX_INPUT_ADVICE_LEN = 30 - SMALL_OP_DUR_RATIO = 0.2 - SMALL_OP_NUM_RATIO = 0.2 - BYTE_UNIT_TRANS = 1024 - UNIT_TRANS = 1000 - - # mode list - COMPUTE = "compute" - TIMELINE = "timeline" - CLUSTER = "cluster" - OVERALL = "overall" - PIPELINE = "pipeline" - - # advice list - SLOW_RANK = "slow rank" - SLOW_LINK = "slow link" - KERNEL = "kernel" - - # compute - NPU_FUSED = "npu_fused" - NPU_SLOW = "npu_slow" - - # timeline - OPTIM = "optimizer" - OP_SCHE = "op_schedule" - - # overall - SUMMARY = "summary" - - PT_PROF_SUFFIX = "ascend_pt" - ASCEND_PROFILER_OUTPUT = "ASCEND_PROFILER_OUTPUT" - COLLECTION_PATH = "collection_path" - CLUSTER_ANALYSIS_OUTPUT = "cluster_analysis_output" - KERNEL_DETAILS_CSV = "kernel_details.csv" - CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" - CLUSTER_COMM_JSON = "cluster_communication.json" - - # pipline - OP_NAME = "name" - OP_TID = "tid" - PID = "pid" - TS = "ts" - DUR = "dur" - CAT = "cat" - ARGS = "args" - PH = "ph" - ID = "id" - PH_START = "s" - PH_BEGIN = "B" - PH_END = "E" - PH_META = "M" - PH_X = "X" - CNAME = "cname" - PROCESS_NAME = "process_name" - FRAMEWORK_NAME = "Python" - ASCEND_HARDWARE_NAME = "Ascend Hardware" - ASYNC_NPU = "async_npu" - STEP_PREFIX = "ProfilerStep#" - FP_ATEN_OP = "aten" - FP_C10D_OP = "c10d" - HCOM_OP_PREFIX = "hcom_" - BP_AUTOGRAD_OP = "autograd" - TRACE_VIEW_JSON = "trace_view.json" - - # pattern_dict key: pattern, value: pattern name - PATTERN_DICT = {("Add", "DropOutDoMask", "Add"): "bias_dropout_add", - ("BatchMatMul", "Mul", "Cast", "Mul", "MaskedFill", "SoftmaxV2", "Cast", "DropOutDoMask", - "AsStrided", "BatchMatMul", "Transpose"): "FA", - ("Transpose", "Transpose", "Transpose", "Mul", "Transpose", "BatchMatMulV2", "MaskedFill", - "Cast", "SoftmaxV2", "Cast", "DropOutDoMask", "BatchMatMulV2", "Transpose"): "FA", - ("Transpose", "BatchMatMulV2", "Transpose", "Transpose", "BatchMatMulV2", "ZerosLike", - "DropOutDoMask", "Cast", "SoftmaxGrad", "Cast", "MaskedFill", "BatchMatMulV2", - "BatchMatMulV2", "Mul"): "FA", - ("Cast", "Square", "ReduceMeanD", "Add", "Rsqrt", "Cast", "Cast", "Mul", "Cast", "Cast", - "Mul", "Cast"): "RMSNORM", - ("Cast", "LayerNorm", "Cast"): "LayerNorm", - ("Add", "LayerNorm"): "AddLayerNorm", - ("Add", "LayerNormV3"): "AddLayerNorm", - ("Gelu", "Add"): "GeluAdd", - ("Cast", "Square", "MemSet", "ReduceMean", "Add", "Rsqrt", "Mul", "Cast", "Mul"): "RMSNorm", - ("BatchMatMul", "RealDiv", "Add", "Maximum", "SoftmaxV2", "Cast", "BatchMatMul"): "FA", - ("BatchMatMulV2", "RealDiv", "Add", "Cast", "Maximum", "Cast", "SoftmaxV2", "AsStrided", - "BatchMatMulV2"): "FA", - ("BatchMatMulV2", "RealDiv", "Add", "Cast", "SoftmaxV2", "Cast", "BroadcastTo", - "BatchMatMulV2"): "FA", - ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Cast", "Mul", "Add"): "RotaryMul", - ("Mul", "AsStrided", "Neg", "AsStrided", "ConcatD", "Mul", "Add"): "RotaryMul", - ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Mul", "Add"): "RotaryMul", - ("MatMulV2", "Swish", "MatMulV2", "Mul", "MatMulV2"): "FFN", - ("Transpose", "Transpose", "GatherElement", "Transpose"): "GatherElement", - ("Slice", "Slice", "Swish", "Mul"): "torch_npu.npu_swiglu", - ("Cast", "Mul", "MaskedFill", "SoftmaxV2", "Cast"): "torch_npu.npu_scaled_masked_softmax", - ("Mul", "Slice", "Neg", "Slice", "ConcatD", "Mul"): "torch_npu.npu_rotary_mul", - ("Cast", "Square", "ReduceMeanD", "Add", "Rsqrt", "Mul", "Cast", "Mul"): "torch_npu.npu_rms_norm"} - TITLE = CsvTitleV2 - - @classmethod - def update_title(cls): - cls.TITLE = CsvTitleV1 - - -class CoreType: - AIV = "AI_VECTOR_CORE" - AIC = "AI_CORE" - AICPU = "AI_CPU" - MIX_AIV = "MIX_AIV" - MIX_AIC = "MIX_AIC" - HCCL = "HCCL" - - -class PerfColor(Enum): - WHITE = 0 - GREEN = 1 - YELLOW = 2 - RED = 3 # timeline DEQUEUE = "Dequeue" diff --git a/profiler/advisor/common/trace_view_json.py b/profiler/advisor/common/trace_view_json.py deleted file mode 100644 index 8171f06ee2..0000000000 --- a/profiler/advisor/common/trace_view_json.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) 2024, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from abc import abstractmethod -from dataclasses import dataclass -from dataclasses import field -from typing import Dict -from typing import List - -import pandas as pd - -from common_func.file_manager import FileManager - - -@dataclass -class TraceObj: - ph: str = "" - bp: str = "" - cat: str = "" - name: str = "" - pid: int = 0 - tid: int = 0 - id: int = 0 - ts: str = "" - dur: float = 0.0 - args: dict = field(default='unknown') - - @abstractmethod - def hash(self): - raise Exception("To be implemented") - - def valid(self): - return self.name != "" - - def check_hashable(self): - if not self.valid(): - raise Exception("Illegal {} to hash".format(self.__class__.name)) - - -@dataclass -class Process(TraceObj): - def hash(self): - self.check_hashable() - # msprof 保证name唯一性 - return self.args.get("name") - - -@dataclass -class Thread(TraceObj): - def hash(self): - self.check_hashable() - # msprof 保证name唯一性 - return self.args.get("name") - - -@dataclass -class DurationEvent(TraceObj): - def hash(self): - self.check_hashable() - return self.ts - - -@dataclass -class FlowEvent(TraceObj): - s_point_ts: str = "" - e_point_ts: str = "" - - def hash(self): - self.check_hashable() - return self.e_point_ts - - -class TraceViewJson: - - def __init__(self, path): - self.processes: Dict[str, Process] = dict() - self.threads: Dict[str, Thread] = dict() - self.python_dur_events: Dict[str, DurationEvent] = dict() - self.cann_dur_events: Dict[str, DurationEvent] = dict() - self.ascend_hardware_dur_events: Dict[str, DurationEvent] = dict() - self.torch_2_npu_flow_events: Dict[str, FlowEvent] = dict() - traces = FileManager.read_json_file(path) - self._load_obj(traces) - - def get_call_stack(self, data: pd.DataFrame, index_id: int, ts_col: str) -> str: - if ts_col not in data.columns.tolist(): - print("[ERROR] No {} col found in data columns.".format(ts_col)) - return "" - row = data.loc[index_id] - timestamp = row[ts_col] - flow_event = self.get_torch_2_npu_flow_event(timestamp) - if not flow_event.valid(): - print("[ERROR] Get flow event failed for pattern {}.".format(row['pattern'])) - return "" - flow_event_s_key = flow_event.s_point_ts - python_dur_events = self.get_python_dur_events_contain_ts(flow_event_s_key) - if not python_dur_events: - print("[ERROR] No python dur event found for pattern {}.".format(row['pattern'])) - return "" - # 保持新老版本callstack兼容性 - if python_dur_events[0].args.get("Call stack"): - # 旧版本 - call_stack_list = python_dur_events[0].args.get("Call stack").split(";") - else: - python_dur_events.sort(key=lambda e: e.ts) - # 新版本 - call_stack_list = [event.name for event in python_dur_events if event.cat == "python_function"] - call_stack = "\n".join(call_stack_list) - return call_stack - - def get_torch_2_npu_flow_event(self, end_time) -> FlowEvent: - if not self.torch_2_npu_flow_events or not self.torch_2_npu_flow_events.get(end_time): - print("[ERROR] Find flow event failed for ts: {}".format(end_time)) - return FlowEvent() - return self.torch_2_npu_flow_events.get(end_time) - - def get_python_dur_events_contain_ts(self, ts) -> List[DurationEvent]: - res = [] - for event in self.python_dur_events.values(): - if float(event.ts) <= float(ts) <= float(event.ts) + event.dur: - res.append(event) - return res - - def _load_obj(self, traces): - self._load_format(traces) - if not self._check_format(): - print("[ERROR] parse json failed for error format") - return - self._load_duration_events(traces) - self._load_torch_to_npu_flow_events(traces) - - def _check_format(self): - # 当前功能只需要这两个process,可扩展 - check_processes = ['Python', 'Ascend Hardware'] - for check_process in check_processes: - if check_process in self.processes: - continue - print("[ERROR] {} process not found in json.".format(check_process)) - return False - return True - - # 加载pid, tid头 - def _load_format(self, traces: List[Dict]): - for i, trace in enumerate(traces): - if trace.get('name') == 'process_name': - if not trace.get('args') or not trace.get('args').get('name') or not trace.get('pid'): - continue - process = Process(**trace) - self.processes[process.hash()] = process - if trace.get('name') == 'thread_name': - if not trace.get('args') or not trace.get('args').get('name') or not trace.get('tid'): - continue - thread = Thread(**trace) - self.threads[thread.hash()] = thread - - def _load_duration_events(self, traces: List[Dict]): - def check_events(_trace): - return _trace.get('name') and _trace.get("ts") and _trace.get("dur") - - python_pid = self.processes.get("Python").pid - cann_pid = self.processes.get("CANN").pid - ascend_hardware_pid = self.processes.get("Ascend Hardware").pid - for i, trace in enumerate(traces): - if trace.get('ph') != 'X': - continue - if not check_events(trace): - continue - event = DurationEvent(**trace) - if trace.get('pid') == python_pid: - self.python_dur_events[event.hash()] = event - elif trace.get('pid') == cann_pid: - self.cann_dur_events[event.hash()] = event - elif trace.get("pid") == ascend_hardware_pid: - self.ascend_hardware_dur_events[event.hash()] = event - - def _load_torch_to_npu_flow_events(self, traces: List[Dict]): - def check_events(_trace): - return _trace.get('name') and _trace.get("id") and _trace.get("ts") - - flow_events_table_by_id = dict() - - python_pid = self.processes.get("Python") - for i, trace in enumerate(traces): - if trace.get('ph') != 's' and trace.get('ph') != 'f' and trace.get('pid') != python_pid: - continue - if not check_events(trace): - continue - event = flow_events_table_by_id.get(trace.get("id")) - if not event: - event = FlowEvent(**trace) - if trace.get('ph') == 's': - event.s_point_ts = trace.get('ts') - else: - event.e_point_ts = trace.get('ts') - flow_events_table_by_id[event.id] = event - - self.torch_2_npu_flow_events = {eve.hash(): eve for eve in flow_events_table_by_id.values()} diff --git a/profiler/advisor/common/trace_view_preprocessor.py b/profiler/advisor/common/trace_view_preprocessor.py deleted file mode 100644 index 14a13066f6..0000000000 --- a/profiler/advisor/common/trace_view_preprocessor.py +++ /dev/null @@ -1,208 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -import sys -from typing import Optional -from dataclasses import dataclass - -from profiler.advisor.common.constant import Constant - - -@dataclass -class FineTraceViewData: - py_pid: int = -1 - fp_tid: int = -1 - bp_tid: int = -1 - ascend_pid: int = -1 - min_ts: str = str(sys.maxsize) - max_ts: str = "0" - hcom_tids: list = None - fp_ops: list = None - bp_ops: list = None - hcom_ops: list = None - npu_ops_ts_dur: dict = None - torch_to_npu_links: list = None - - def __post_init__(self): - self.hcom_tids = self.hcom_tids or [] - self.fp_ops = self.fp_ops or [] - self.bp_ops = self.bp_ops or [] - self.hcom_ops = self.hcom_ops or [] - self.npu_ops_ts_dur = self.npu_ops_ts_dur or {} - self.torch_to_npu_links = self.torch_to_npu_links or [] - - def sort(self): - self.fp_ops.sort(key=lambda x: x[Constant.TS]) - self.bp_ops.sort(key=lambda x: x[Constant.TS]) - self.hcom_ops.sort(key=lambda x: x[Constant.TS]) - self.torch_to_npu_links.sort(key=lambda x: x[Constant.TS]) - - -class TraceViewPreProcessor: - """ - Trace view data preprocess - """ - - @staticmethod - def _is_fp_op(op_name: str) -> bool: - """ - check whether op is fp op - """ - return op_name.startswith(Constant.FP_ATEN_OP) or op_name.startswith(Constant.FP_C10D_OP) - - @staticmethod - def _is_fp_data(data: dict, fp_tid: int, py_pid: int) -> bool: - """ - check whether data is valid fp data - """ - return data[Constant.OP_TID] == fp_tid and \ - Constant.TS in data and Constant.DUR in data and \ - not data[Constant.OP_NAME].startswith(Constant.STEP_PREFIX) and \ - data[Constant.PID] == py_pid - - @staticmethod - def _is_bp_op(op_name: str) -> bool: - """ - check whether op is bp op - """ - return op_name.startswith(Constant.BP_AUTOGRAD_OP) - - @staticmethod - def _is_bp_data(data: dict, bp_tid: int, py_pid: int) -> bool: - """ - check whether data is valid bp data - """ - return data[Constant.OP_TID] == bp_tid and \ - Constant.TS in data and Constant.DUR in data and \ - data[Constant.PID] == py_pid - - @staticmethod - def _is_torch_to_npu_link(data: dict, fp_tid: int) -> bool: - """ - check whether data is torch to npu link - """ - return Constant.CAT in data and data[Constant.CAT] == Constant.ASYNC_NPU and \ - data[Constant.PH] == Constant.PH_START and \ - data[Constant.PID] == fp_tid - - @staticmethod - def _is_send_recv_op(op_name: str) -> bool: - """ - check whether op is hcom send or recv op - """ - # eg: hcom_BatchSendRecv__101_0_1 - p1 = re.compile(r'hcom_\w+SendRecv__\d+') - # eg: hcom_send__101_0_1 - p2 = re.compile(r'hcom_send__\d+') - # eg: hcom_receive__101_0_1 - p3 = re.compile(r'hcom_receive__\d+') - return bool(p1.match(op_name)) or bool(p2.match(op_name)) or bool(p3.match(op_name)) - - @staticmethod - def _is_hcom_op(op_name: str) -> bool: - """ - check whether data is hcom data - """ - return op_name.startswith(Constant.HCOM_OP_PREFIX) - - @staticmethod - def _is_python_process(data: dict) -> bool: - """ - check whether data is python process - """ - return Constant.PH in data and data[Constant.PH] == Constant.PH_META and \ - data[Constant.OP_NAME] == Constant.PROCESS_NAME and \ - data[Constant.ARGS][Constant.OP_NAME] == Constant.FRAMEWORK_NAME - - @staticmethod - def _is_step_op(data: dict) -> bool: - """ - check whether data is step data - """ - return data[Constant.OP_NAME].startswith(Constant.STEP_PREFIX) - - @staticmethod - def _is_ascend_process(data: dict) -> bool: - """ - check whether data is ascend process data - """ - return Constant.PH in data and data[Constant.PH] == Constant.PH_META and \ - data[Constant.OP_NAME] == Constant.PROCESS_NAME and \ - data[Constant.ARGS][Constant.OP_NAME] == Constant.ASCEND_HARDWARE_NAME - - @staticmethod - def _is_npu_op(data: dict, ascend_pid: int) -> bool: - """ - check whether data is npu op - """ - return Constant.PH in data and data[Constant.PH] == Constant.PH_X and \ - not data[Constant.OP_NAME].isupper() and \ - data[Constant.PID] == ascend_pid - - def process(self, raw_data: list) -> Optional[FineTraceViewData]: - """ - preprocess raw data - """ - if not raw_data: - print("[ERROR] No raw data found in trace view data.") - return None - - raw_fp_tids, raw_bp_tids, raw_hcom_tids = set(), set(), set() - fine_data = FineTraceViewData() - - # counting fp ops and bp ops tid and ascend pid - for data in raw_data: - if self._is_fp_op(data[Constant.OP_NAME]): - raw_fp_tids.add(data[Constant.OP_TID]) - elif self._is_bp_op(data[Constant.OP_NAME]): - raw_bp_tids.add(data[Constant.OP_TID]) - elif self._is_send_recv_op(data[Constant.OP_NAME]): - fine_data.hcom_ops.append(data) - raw_hcom_tids.add(data[Constant.OP_TID]) - elif self._is_python_process(data): - fine_data.py_pid = data[Constant.PID] - elif self._is_ascend_process(data): - fine_data.ascend_pid = data[Constant.PID] - - # find max and min ts in hcom ops - if self._is_hcom_op(data[Constant.OP_NAME]): - # for compatibility with old data (ts is float type) - ts = data[Constant.TS] if not isinstance(data[Constant.TS], float) else str(data[Constant.TS]) - fine_data.min_ts = min(fine_data.min_ts, ts) - fine_data.max_ts = max(fine_data.max_ts, ts) - - unique_fp_tid = list(raw_fp_tids - raw_bp_tids) - unique_bp_tid = list(raw_bp_tids) - fine_data.hcom_tids = list(raw_hcom_tids) - - if not unique_fp_tid or not unique_bp_tid: - print("[INFO] No fp or bp tid found in trace view data.") - else: - fine_data.fp_tid, fine_data.bp_tid = unique_fp_tid[0], unique_bp_tid[0] - - # filter fp ops and bp ops and torch_to_npu_links - for data in raw_data: - if self._is_fp_data(data, fine_data.fp_tid, fine_data.py_pid): - fine_data.fp_ops.append(data) - elif self._is_bp_data(data, fine_data.bp_tid, fine_data.py_pid): - fine_data.bp_ops.append(data) - elif self._is_torch_to_npu_link(data, fine_data.fp_tid): - fine_data.torch_to_npu_links.append(data) - elif self._is_npu_op(data, fine_data.ascend_pid): - fine_data.npu_ops_ts_dur[data[Constant.TS]] = data[Constant.DUR] - - fine_data.sort() - return fine_data diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index aedbd2e116..3bb3d3f5a8 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -8,7 +8,6 @@ from profiler.advisor.utils.utils import Timer from profiler.advisor.analyzer.cluster.slow_rank_analyser import SlowRankAnalyzer from profiler.advisor.analyzer.cluster.slow_link_analyser import SlowLinkAnalyzer from profiler.advisor.analyzer.overall.overall_summary_analyzer import OverallSummaryAnalyzer -from profiler.advisor.analyzer.computation.npu_fused.npu_slow_advice import NpuSlowAnalyzer class Interface: supported_analyzer = { -- Gitee From 4e173466d9b1a0e8ab1e3915298cd224dba65041 Mon Sep 17 00:00:00 2001 From: fanxiaotong Date: Fri, 17 May 2024 17:46:38 +0800 Subject: [PATCH 15/21] advisor --- .../compute_advice/test_npu_slow_advice.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py b/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py index 894367d070..8830d49599 100644 --- a/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py +++ b/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py @@ -6,7 +6,7 @@ import csv import unittest from advisor_backend.interface import Interface -from advisor_backend.compute_advice.npu_slow_advice import +from advisor_backend.compute_advice.npu_slow_advice import NpuSlowAdvice class TestNpuSlowAdvice(unittest.TestCase): @@ -186,7 +186,7 @@ class TestNpuSlowAdvice(unittest.TestCase): self.create_kernel_details() interface = Interface(self.ASCEND_PT_DIR) data = interface.get_data('compute', 'npu_slow') - call_stack = (self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") + call_stack = NpuSlowAdvice(self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") self.assertEqual(9, len(data)) self.assertEqual("", call_stack) @@ -197,8 +197,8 @@ class TestNpuSlowAdvice(unittest.TestCase): interface = Interface(self.ASCEND_PT_DIR) data = interface.get_data('compute', 'npu_slow') slow_op_data = data[data["color"] == "RED"] - .save_to_excel(data, file_path=os.path.join(self.ASCEND_PT_DIR, "slow_op.xlsx")) - call_stack = (self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") + NpuSlowAdvice.save_to_excel(data, file_path=os.path.join(self.ASCEND_PT_DIR, "slow_op.xlsx")) + call_stack = NpuSlowAdvice(self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") self.assertEqual(9, len(data)) self.assertEqual(2, len(slow_op_data)) print(call_stack) @@ -213,8 +213,8 @@ class TestNpuSlowAdvice(unittest.TestCase): interface = Interface(self.ASCEND_PT_DIR) data = interface.get_data('compute', 'npu_slow') slow_op_data = data[data["color"] == "RED"] - .save_to_excel(data, file_path=os.path.join(self.ASCEND_PT_DIR, "slow_op.xlsx")) - call_stack = (self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") + NpuSlowAdvice.save_to_excel(data, file_path=os.path.join(self.ASCEND_PT_DIR, "slow_op.xlsx")) + call_stack = NpuSlowAdvice(self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)") self.assertEqual(9, len(data)) self.assertEqual(2, len(slow_op_data)) print(call_stack) -- Gitee From 690955af98bf5a127b20fd48df75fb3ca16e4007 Mon Sep 17 00:00:00 2001 From: renlei Date: Sat, 18 May 2024 11:01:09 +0800 Subject: [PATCH 16/21] aicpu rules support cann8.0.0 version --- .../computation/aicpu/aicpu_checker.py | 6 +-- .../op_compile/dynamic_shape_checker.py | 41 +++++++++++-------- .../analyzer/computation/operator_checker.py | 2 +- profiler/advisor/common/constant.py | 2 +- .../config/profiling_data_version_config.yaml | 6 +-- profiler/advisor/rules/aicpu_rules.yaml | 36 ++++++++-------- profiler/advisor/utils/utils.py | 2 + profiler/test/tools/tool.py | 8 ---- 8 files changed, 51 insertions(+), 52 deletions(-) diff --git a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py index 4654d97225..58dfa9bb08 100644 --- a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py +++ b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py @@ -42,7 +42,7 @@ class AicpuChecker(OperatorChecker): def load_aicpu_rules(self, rule_path="rules/aicpu_rules.yaml") -> Dict: if not os.path.isabs(rule_path): rule_path = os.path.join(os.path.dirname(__file__), - "../../computation/", "../", rule_path) + "../../../", rule_path) if not os.path.exists(rule_path): logger.warning("Skip analyze aicpu issues, because %s does not exist.", rule_path) @@ -64,8 +64,8 @@ class AicpuChecker(OperatorChecker): support_checkers = [] for checkers in aicpu_rules['CommonChecker']: for key, value in checkers.items(): - if key == 'DataTypeChecker' and value['cann_version'] != self.cann_verson: continue - support_checkers.append(checkers) + if key == 'DataTypeChecker' and self.cann_version in value['cann_version']: + support_checkers.append(checkers) aicpu_rules['CommonChecker'] = support_checkers return diff --git a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py index 4d405eb918..902f8b7f66 100644 --- a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py +++ b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py @@ -22,26 +22,35 @@ class DynamicShapeChecker(OperatorChecker): _op_views: List = [] def __init__(self, cann_version) -> None: - super().__init__(cann_version = cann_version) + super().__init__(cann_version=cann_version) def check(self, profiling_database) -> bool: + less_than_cann800_list = [constant.CANN_VERSION_C30, constant.CANN_VERSION_C13, constant.CANN_VERSION_C15] # CANN 8.0.0 之前从 ge_info 中获取 op_state 属性,进行动态 shape 逻辑判断 - if hasattr(profiling_database, "ge_info") and profiling_database.ge_info: - ge_info = profiling_database.ge_info - static_shape_operators = ge_info.get_static_shape_operators() - if len(static_shape_operators) == 0: - OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE = True - return True - # CANN 8.0.0 之后 op_state 属性从 op_summary 文件中获取 - elif hasattr(profiling_database, "op_summary"): - static_shape_operators = profiling_database.op_summary.get_static_shape_operators() - if len(static_shape_operators) == 0: - OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE = True - return True + if self.cann_version in less_than_cann800_list: + if hasattr(profiling_database, "ge_info") and profiling_database.ge_info.op_state_info_list is not None: + ge_info = profiling_database.ge_info + static_shape_operators = ge_info.get_static_shape_operators() + if len(static_shape_operators) == 0: + OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE = True + return True + else: + logger.warning( + "Skip dynamic shape checker because of not containing ge_info.db file in host filefloder.\n" + "To enable dynamic shape checker, please try to set data_simplification=False in experimental_config.\n" + "More details please refer to link : %s", constant.ASCEND_PROFILER_URL) else: - logger.warning("Skip dynamic shape checker because of not containing ge_info.db file in host filefloder.\n" - "To enable dynamic shape checker, please try to set data_simplification=False in experimental_config.\n" - "More details please refer to link : %s", constant.ASCEND_PROFILER_URL) + # CANN 8.0.0 之后 op_state 属性从 op_summary 文件中获取 + if hasattr(profiling_database, "op_summary") and len(profiling_database.op_summary.op_list) > 0: + static_shape_operators = profiling_database.op_summary.get_static_shape_operators() + if len(static_shape_operators) == 0: + OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE = True + return True + else: + logger.warning( + "Skip dynamic shape checker because of not containing op_summary.csv file in current filefloder." + ) + return False def make_record(self, profiling_database) -> OptimizeRecord: diff --git a/profiler/advisor/analyzer/computation/operator_checker.py b/profiler/advisor/analyzer/computation/operator_checker.py index e8490ff206..d714e0952c 100644 --- a/profiler/advisor/analyzer/computation/operator_checker.py +++ b/profiler/advisor/analyzer/computation/operator_checker.py @@ -38,7 +38,7 @@ class OperatorChecker(VersionControl): _tune_op_list: List[str] = [] def __init__(self, cann_version: str): - self.cann_verson = cann_version + self.cann_version = cann_version self._op_list: List[OpInfo] = [] def check(self, profiling_data: ProfilingDataset) -> bool: diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index 664753c724..fee5ed9f81 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -32,7 +32,7 @@ NO_STACK_REASON_MAP = { TIMELINE_BACKWARD_NO_STACK_CODE: "Backward broadcast, without call stacks in profiling.", TIMELINE_ACL_TO_NPU_NO_STACK_CODE: "Incoming flow is 'acl_to_npu', without call stacks in profiling." } -TIMELINE_API_DOC_URL = "https://3ms.huawei.com/hi/group/3942456/wiki_7680982.html" +TIMELINE_API_DOC_URL = "https://support.huaweicloud.com/bestpractice-modelarts/modelarts_10_2516.html" AFFINITY_TRAINING_API = "Affinity training api" TIMELINE_WITH_STACK_DOC_URL = "https://www.hiascend.com/document/detail/zh/canncommercial/" \ "70RC1/modeldevpt/ptmigr/AImpug_0067.html" diff --git a/profiler/advisor/config/profiling_data_version_config.yaml b/profiler/advisor/config/profiling_data_version_config.yaml index 45f4b5c0f7..f73aecd3ba 100644 --- a/profiler/advisor/config/profiling_data_version_config.yaml +++ b/profiler/advisor/config/profiling_data_version_config.yaml @@ -72,9 +72,9 @@ versions: msprof: Msprof ge_info: GeInfo file_attr: - op_summary: ^op_summary_\d+_\d+_\.csv$ - task_time: ^task_time_\d+_\d+_\.json$ - msprof: ^msprof_\d+_\d+_\.json$ + op_summary: ^op_summary_\d+_\d+\.csv$ + task_time: ^task_time_\d+_\d+\.json$ + msprof: ^msprof_\d+_\d+\.json$ ge_info: ge_info.db diff --git a/profiler/advisor/rules/aicpu_rules.yaml b/profiler/advisor/rules/aicpu_rules.yaml index 053f4150e8..9313700c80 100644 --- a/profiler/advisor/rules/aicpu_rules.yaml +++ b/profiler/advisor/rules/aicpu_rules.yaml @@ -1,8 +1,9 @@ DataTypeSuggeation: &DataTypeSuggeation "Data type {} in {} operator may cause AICPU issues, Try to convert to {} if possible." +AICPU_DOC_URL: &AICPU_DOC_URL "https://support.huaweicloud.com/bestpractice-modelarts/modelarts_10_2517.html" CommonChecker: - DataTypeChecker: - cann_version: 7.0.RC1 + cann_version: [7.0.RC1] op_type: [ __ALL__ ] ignore_type: [ cast, tensorequal, equal, nonzero, mul ] input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, int16, uint16, dt_bf16 ] @@ -10,42 +11,42 @@ CommonChecker: suggestion: *DataTypeSuggeation - DataTypeChecker: - cann_version: 7.0.RC1 + cann_version: [7.0.RC1] op_type: [ cast ] input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] suggestion: *DataTypeSuggeation - DataTypeChecker: - cann_version: 7.0.RC1 + cann_version: [7.0.RC1] op_type: [ tensorequal ] input: [ float, float32, float16, bool, int32, int8, uint8 ] output: [ bool ] suggestion: *DataTypeSuggeation - DataTypeChecker: - cann_version: 7.0.RC1 + cann_version: [7.0.RC1] op_type: [ equal ] input: [ float, float32, float16, bool, int32, int64, int8, uint8 ] output: [ bool ] suggestion: *DataTypeSuggeation - DataTypeChecker: - cann_version: 7.0.RC1 + cann_version: [7.0.RC1] op_type: [ nonzero ] input: [ float16, bool, dt_bf16 ] output: [ int64 ] suggestion: *DataTypeSuggeation - DataTypeChecker: - cann_version: 7.0.RC1 + cann_version: [7.0.RC1] op_type: [ mul ] input: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] output: [ float, float32, float16, bool, int32, uint32, int64, uint64, int8, uint8, dt_bf16 ] suggestion: *DataTypeSuggeation - DataTypeChecker: - cann_version: 7.0.0 + cann_version: [8.0.0, 7.0.0] op_type: [ __ALL__ ] ignore_type: [ cast, tensorequal, equal, nonzero, mul ] input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, int16, complex64, complex128 ] @@ -53,28 +54,28 @@ CommonChecker: suggestion: *DataTypeSuggeation - DataTypeChecker: - cann_version: 7.0.0 + cann_version: [8.0.0, 7.0.0] op_type: [ cast ] input: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] output: [ float, float32, float16, bool, int32, uint32, int64, uint64, uint8, dt_bf16 ] suggestion: *DataTypeSuggeation - DataTypeChecker: - cann_version: 7.0.0 + cann_version: [8.0.0, 7.0.0] op_type: [ tensorequal ] input: [ float, float32, float16, dt_bf16, float64, bool, int32, int8, uint8 ] output: [ bool ] suggestion: *DataTypeSuggeation - DataTypeChecker: - cann_version: 7.0.0 + cann_version: [8.0.0, 7.0.0] op_type: [ equal ] input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8 ] output: [ bool ] suggestion: *DataTypeSuggeation - DataTypeChecker: - cann_version: 7.0.0 + cann_version: [8.0.0, 7.0.0] op_type: [ mul ] input: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] output: [ float, float32, float16, dt_bf16, float64, bool, int32, int64, int8, uint8, complex64 ] @@ -83,25 +84,20 @@ CommonChecker: ExampleGuideChecker: - IndexPutChecker: op_type: [index] - url: "https://wiki.huawei.com/domains/41510/wiki/76339/WIKI202311152358721?title=Index" + url: *AICPU_DOC_URL suggestion: 'Please modify source code followed by this LINK, try to replace index operator with equivalent operator.' - NonzeroChecker: op_type: [ indexput, indexputv2 ] - url: "https://wiki.huawei.com/domains/41510/wiki/76339/WIKI202311152358721?title=IndexPut" + url: *AICPU_DOC_URL suggestion: 'Please modify source code followed by this LINK, try to replace indexput operator with equivalent operator.' - CastChecker: op_type: [ argmin ] - url: "https://wiki.huawei.com/domains/41510/wiki/76339/WIKI202311152358721?title=ArgMin" + url: *AICPU_DOC_URL suggestion: 'Please update your cann-tookit to at least 7.0.RC1 version by this LINK.' - - CastChecker: - op_type: [ unique ] - url: "https://wiki.huawei.com/domains/41510/wiki/76339/WIKI202311152358721?title=unique" - suggestion: 'Please modify source code followed by this LINK, try to replace unique operator with equivalent operator.' - - CastChecker: op_type: [ nonzero ] - url: "https://wiki.huawei.com/domains/41510/wiki/76339/WIKI202311152358721?title=unique" + url: *AICPU_DOC_URL suggestion: 'Please modify source code followed by this LINK, try to replace nonzero operator with equivalent operator.' \ No newline at end of file diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index f338fc7dd4..638173c209 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -395,10 +395,12 @@ def join_prof_path(root_dir: str, sub_dir: str) -> str: for root, _, _ in os.walk(root_dir, onerror=walk_error_handler): if re.match(sub_dir, os.path.basename(root)): return root + logger.error("Fail to get profiling path %s from local path %s by regular expression matching", sub_dir, root_dir) else: sub_dir = os.path.join(root_dir, sub_dir) if os.path.exists(sub_dir): return sub_dir + logger.error("Fail to get profiling path %s from local path %s", sub_dir, root_dir) return "" diff --git a/profiler/test/tools/tool.py b/profiler/test/tools/tool.py index ee4b6f9bb1..574c371270 100644 --- a/profiler/test/tools/tool.py +++ b/profiler/test/tools/tool.py @@ -28,11 +28,3 @@ def run_command(cmd): p = Popen(shlex.split(cmd, posix=False), stdout=PIPE, bufsize=0, universal_newlines=False) p.wait() - -def init_env(): - test_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../", "data", - "asight-0.3.52.dev0+ge3f3b41.d20231111-py3-none-any.whl")) - try: - import asight - except Exception: - run_command(f"pip install {test_dir}") -- Gitee From c25fdd529e2406bd228cc772f84246841abd5aa2 Mon Sep 17 00:00:00 2001 From: shpity Date: Mon, 20 May 2024 20:06:35 +0800 Subject: [PATCH 17/21] fix bug on analyze all without graph data --- .../computation/op_compile/dynamic_shape_checker.py | 4 ++-- profiler/advisor/analyzer/computation/operator_checker.py | 2 +- .../advisor/analyzer/graph_fusion/graph_fusion_analyzer.py | 2 ++ profiler/advisor/dataset/graph_dataset.py | 6 +++--- profiler/advisor/dataset/profiling/profiling_dataset.py | 7 +++++-- profiler/advisor/utils/utils.py | 4 ++-- profiler/cli/entrance.py | 2 +- 7 files changed, 16 insertions(+), 11 deletions(-) diff --git a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py index 7a3f718259..070b3a3b57 100644 --- a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py +++ b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py @@ -28,7 +28,7 @@ class DynamicShapeChecker(OperatorChecker): less_than_cann800_list = [constant.CANN_VERSION_C30, constant.CANN_VERSION_C13, constant.CANN_VERSION_C15] # CANN 8.0.0 之前从 ge_info 中获取 op_state 属性,进行动态 shape 逻辑判断 if self.cann_version in less_than_cann800_list: - if hasattr(profiling_database, "ge_info") and profiling_database.ge_info.op_state_info_list is not None: + if hasattr(profiling_database, "ge_info"): ge_info = profiling_database.ge_info static_shape_operators = ge_info.get_static_shape_operators() if len(static_shape_operators) == 0: @@ -41,7 +41,7 @@ class DynamicShapeChecker(OperatorChecker): "More details please refer to link : %s", constant.ASCEND_PROFILER_URL) else: # CANN 8.0.0 之后 op_state 属性从 op_summary 文件中获取 - if hasattr(profiling_database, "op_summary") and len(profiling_database.op_summary.op_list) > 0: + if hasattr(profiling_database, "op_summary"): static_shape_operators = profiling_database.op_summary.get_static_shape_operators() if len(static_shape_operators) == 0: OperatorChecker.IS_ALL_OPERATOR_DYNAMIC_SHAPE = True diff --git a/profiler/advisor/analyzer/computation/operator_checker.py b/profiler/advisor/analyzer/computation/operator_checker.py index d714e0952c..6bb837004b 100644 --- a/profiler/advisor/analyzer/computation/operator_checker.py +++ b/profiler/advisor/analyzer/computation/operator_checker.py @@ -243,7 +243,7 @@ class OperatorChecker(VersionControl): return op_type_list def _check_summary(self, data: ProfilingDataset): - if not data.op_summary: + if not hasattr(data, "op_summary"): logger.warning(self.SKIP_CHECK_MSG, self._CHECKER, "op summary") return False return True diff --git a/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py b/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py index 059950089d..713e118429 100644 --- a/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py +++ b/profiler/advisor/analyzer/graph_fusion/graph_fusion_analyzer.py @@ -31,6 +31,8 @@ class FusionOPAnalyzer(BaseAnalyzer): def _check(self, graph_data: List[GraphDataset], profiling_data: List[ProfilingDataset] = None) -> None: + if len(graph_data) == 0 or graph_data[0].is_empty(): + return for _, rule in self.RULES.items(): checker = rule() if profiling_data is None: diff --git a/profiler/advisor/dataset/graph_dataset.py b/profiler/advisor/dataset/graph_dataset.py index c6dd0448b4..951de7fd26 100644 --- a/profiler/advisor/dataset/graph_dataset.py +++ b/profiler/advisor/dataset/graph_dataset.py @@ -44,10 +44,10 @@ class GraphDataset(Dataset): graph.build() graphs.append(graph) graphs.sort(key=lambda g: g.name) - del self.graph_files[0] # remove previous useless data + if len(self.graph_files) >= 1: + del self.graph_files[0] # remove previous useless data return graphs - @property def is_empty(self) -> bool: """check empty graph dataset""" - return len(self.graphs()) == 0 + return len(self.graph_files) == 0 diff --git a/profiler/advisor/dataset/profiling/profiling_dataset.py b/profiler/advisor/dataset/profiling/profiling_dataset.py index 836f30905f..46d4a4fe8b 100644 --- a/profiler/advisor/dataset/profiling/profiling_dataset.py +++ b/profiler/advisor/dataset/profiling/profiling_dataset.py @@ -45,8 +45,11 @@ class ProfilingDataset(Dataset): data_class = globals()[self.current_version_pattern.get('class_attr').get(item)] data_class.FILE_PATTERN = self.current_version_pattern.get('file_attr').get(item) data_object = data_class(current_path) - data_object.parse_data() - setattr(self, item, data_object) + is_success = data_object.parse_data() + if is_success: + setattr(self, item, data_object) + else: + logger.warning("Skip parse %s from local path %s", self.current_version_pattern.get('class_attr').get(item), current_path) else: logger.warning(f"Unsupported arguments : %s to build %s", dirs_pattern, self.__class__.__name__) diff --git a/profiler/advisor/utils/utils.py b/profiler/advisor/utils/utils.py index 638173c209..84419b6708 100644 --- a/profiler/advisor/utils/utils.py +++ b/profiler/advisor/utils/utils.py @@ -395,12 +395,12 @@ def join_prof_path(root_dir: str, sub_dir: str) -> str: for root, _, _ in os.walk(root_dir, onerror=walk_error_handler): if re.match(sub_dir, os.path.basename(root)): return root - logger.error("Fail to get profiling path %s from local path %s by regular expression matching", sub_dir, root_dir) + logger.debug("Fail to get profiling path %s from local path %s by regular expression matching", sub_dir, root_dir) else: sub_dir = os.path.join(root_dir, sub_dir) if os.path.exists(sub_dir): return sub_dir - logger.error("Fail to get profiling path %s from local path %s", sub_dir, root_dir) + logger.debug("Fail to get profiling path %s from local path %s", sub_dir, root_dir) return "" diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index 8bc5a364ac..031fa76d6a 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -54,6 +54,6 @@ advisor_cli.add_command(compare_cli, name="compare") if __name__ == '__main__': advisor_cli.main( ["analyze", "all", "-d", - r"C:\Users\admin\Downloads\llama-13B\llama-13b", + "C:\xxx\profiling_data" ] ) -- Gitee From 27614fe465a34ff1a1bc9aa693b837344094f5bd Mon Sep 17 00:00:00 2001 From: wuyuhan Date: Tue, 21 May 2024 17:38:17 +0800 Subject: [PATCH 18/21] =?UTF-8?q?=E5=B1=95=E7=A4=BAupdate=E5=91=BD?= =?UTF-8?q?=E4=BB=A4,=E7=94=A8=E4=BA=8E=E8=B7=9F=E6=96=B0advisor=E8=A7=84?= =?UTF-8?q?=E5=88=99=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MANIFEST.in | 1 + .../analyzer/cluster/__init__.py} | 0 profiler/advisor/common/constant.py | 4 +- profiler/advisor/dataset/cluster/__init__.py | 0 profiler/cli/analyze_cli.py | 41 +------------------ profiler/cli/cluster_cli.py | 27 ++++++++++++ profiler/cli/compare_cli.py | 13 ++---- profiler/cli/entrance.py | 20 ++++----- profiler/cli/update_cli.py | 40 ------------------ 9 files changed, 41 insertions(+), 105 deletions(-) rename profiler/{cli/query_cli.py => advisor/analyzer/cluster/__init__.py} (100%) create mode 100644 profiler/advisor/dataset/cluster/__init__.py delete mode 100644 profiler/cli/update_cli.py diff --git a/MANIFEST.in b/MANIFEST.in index cfadbde1db..6068a6df08 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ recursive-include profiler/ * +recursive-include profiler/advisor/display/html/templates * global-exclude */__pycache__/* global-exclude *.pyc diff --git a/profiler/cli/query_cli.py b/profiler/advisor/analyzer/cluster/__init__.py similarity index 100% rename from profiler/cli/query_cli.py rename to profiler/advisor/analyzer/cluster/__init__.py diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py index 6b10b5c55a..4c3fc42ff3 100644 --- a/profiler/advisor/common/constant.py +++ b/profiler/advisor/common/constant.py @@ -113,9 +113,9 @@ COMMON_ENDPOINT_SUFFIX = "obs.{}.myhuaweicloud.com" INNER_ENDPOINT_SUFFIX= "obs.{}.ulanqab.huawei.com" AICPU_RULES_YAML_NAME = "aicpu_rules.yaml" -FUSSION_PASS_YAML_NAME = "op_fussion_pass.yaml" +FUSION_PASS_YAML_NAME = "op_fusion_pass.yaml" TIMELINE_FUSION_OPS_YAML_NAME = "timeline_fusion_ops.yaml" -CLOUD_YAML_NAME_LIST = [AICPU_RULES_YAML_NAME, FUSSION_PASS_YAML_NAME, TIMELINE_FUSION_OPS_YAML_NAME] +CLOUD_YAML_NAME_LIST = [AICPU_RULES_YAML_NAME, FUSION_PASS_YAML_NAME, TIMELINE_FUSION_OPS_YAML_NAME] MAX_RETRIES = 3 TIMEOUT = 3 diff --git a/profiler/advisor/dataset/cluster/__init__.py b/profiler/advisor/dataset/cluster/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/profiler/cli/analyze_cli.py b/profiler/cli/analyze_cli.py index 86af711564..8e05610064 100644 --- a/profiler/cli/analyze_cli.py +++ b/profiler/cli/analyze_cli.py @@ -85,25 +85,6 @@ def analyze_all(**kwargs) -> None: _analyze(Interface.all_dimension, **kwargs) -@analyze_cli.command(context_settings=CONTEXT_SETTINGS, - name="communication", - short_help='Analyze timeline, operators and graph.') -@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, - help='Directory of profiling data') -@click.option('--cann_version', '-cv', 'cann_version', - type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), - default=constant.DEFAULT_CANN_VERSION, - help='The CANN software version, which can be viewed by executing the following command: ' - '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') -@click.option('--torch_version', '-tv', 'torch_version', - type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), - default=constant.DEFAULT_TORCH_VERSION, - help='The runtime torch version, which can be detected by exec command "pip show torch"') -@debug_option -def analyze_communication(**kwargs) -> None: - _analyze(["communication"], **kwargs) - - @analyze_cli.command(context_settings=CONTEXT_SETTINGS, name="schedule", short_help='Analyze timeline, operators and graph.') @@ -146,24 +127,4 @@ def analyze_schedule(**kwargs) -> None: help="enter the profiling type, selectable range ascend_pytorch_profiler, mslite ,msprof") @debug_option def analyze_computation(**kwargs) -> None: - _analyze(["computation"], **kwargs) - - -@analyze_cli.command(context_settings=CONTEXT_SETTINGS, - name="dataloader", - short_help='Analyze timeline, operators and graph.') -@click.option('--profiling_path', '-d', 'profiling_path', type=click.Path(), required=True, - help='Directory of profiling data') -@click.option('--cann_version', '-cv', 'cann_version', - type=click.Choice(constant.SUPPORTED_CANN_VERSION, case_sensitive=False), - default=constant.DEFAULT_CANN_VERSION, - help='The CANN software version, which can be viewed by executing the following command: ' - '"cat /usr/local/Ascend/ascend-toolkit/latest/aarch64-linux/ascend_toolkit_install.info"') -@click.option('--torch_version', '-tv', 'torch_version', - type=click.Choice(constant.SUPPORTED_TORCH_VERSION, case_sensitive=False), - default=constant.DEFAULT_TORCH_VERSION, - help='The runtime torch version, which can be detected by exec command "pip show torch"') -@click.option('--is_inference', is_flag=True, help="Enable performance analysis of inference task") -@debug_option -def analyze_dataloader(**kwargs) -> None: - _analyze(["dataloader"], **kwargs) + _analyze(["computation"], **kwargs) \ No newline at end of file diff --git a/profiler/cli/cluster_cli.py b/profiler/cli/cluster_cli.py index e69de29bb2..9ce8e45058 100644 --- a/profiler/cli/cluster_cli.py +++ b/profiler/cli/cluster_cli.py @@ -0,0 +1,27 @@ +import ast +import click +import os +import sys + +sys.path.append(os.path.dirname(os.path.dirname(__file__))) + +from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup +from profiler.advisor.utils.utils import debug_option +from profiler.cluster_analyse.common_func.constant import Constant +from profiler.cluster_analyse.cluster_analysis import ClusterAnalysis + + +@click.command(context_settings=CONTEXT_SETTINGS, + name="cluster", + short_help='The analysis of cluster profiling data') +@click.option('--profiling_path', '-d', 'base_profiling_path', type=click.Path(), required=True, + help='profiling data path') +@click.option('--mode', '-m', 'mode', type=click.Choice(['all', 'communication_time', 'communication_matrix']), + default='all', help="different analysis mode") +@debug_option +def cluster_cli(**kwargs) -> None: + parameter = { + Constant.COLLECTION_PATH: kwargs.get("base_profiling_path"), + Constant.ANALYSIS_MODE: kwargs.get("mode") + } + ClusterAnalysis(parameter).run() diff --git a/profiler/cli/compare_cli.py b/profiler/cli/compare_cli.py index a4e69653f2..2bce1f82c4 100644 --- a/profiler/cli/compare_cli.py +++ b/profiler/cli/compare_cli.py @@ -10,15 +10,8 @@ from profiler.advisor.utils.utils import debug_option from profiler.advisor.common.timeline.event import AdvisorDict from profiler.compare_tools.compare_backend.comparison_generator import ComparisonGenerator - -@click.group(name="compare", cls=ClickAliasedGroup) -def compare_cli(**kwargs): - """Query operator details from timeline.""" - pass - - -@compare_cli.command(context_settings=CONTEXT_SETTINGS, - name="profiling", +@click.command(context_settings=CONTEXT_SETTINGS, + name="compare", short_help='Analyze timeline for specific operator and report detail code stacks.') @click.option('--profiling_path', '-d', 'base_profiling_path', type=click.Path(), required=True, help='path of trace_view.json in profiling') @@ -33,6 +26,6 @@ def compare_cli(**kwargs): @click.option('--use_input_shape', is_flag=True) @click.option('--gpu_flow_cat', type=str, default='', help="gpu flow event的分类标识") @debug_option -def compare_profiling(**kwargs) -> None: +def compare_cli(**kwargs) -> None: args = AdvisorDict(kwargs) ComparisonGenerator(args).run() diff --git a/profiler/cli/entrance.py b/profiler/cli/entrance.py index 031fa76d6a..ef0815ce11 100644 --- a/profiler/cli/entrance.py +++ b/profiler/cli/entrance.py @@ -6,7 +6,7 @@ import click from profiler.cli.analyze_cli import analyze_cli from profiler.cli.complete_cli import auto_complete_cli from profiler.cli.compare_cli import compare_cli - +from profiler.cli.cluster_cli import cluster_cli from profiler.advisor.version import print_version_callback, cli_version logger = logging.getLogger() @@ -14,9 +14,9 @@ CONTEXT_SETTINGS = dict(help_option_names=['-H', '-h', '--help'], max_content_width=160) COMMAND_PRIORITY = { - "analyze": 1, - "query": 2, - "env": 3, + "advisor": 1, + "compare": 2, + "cluster": 3, "auto-completion": 4 } @@ -47,13 +47,7 @@ def advisor_cli(**kwargs): pass -advisor_cli.add_command(analyze_cli, name="analyze") -advisor_cli.add_command(auto_complete_cli, name="auto-completion") +advisor_cli.add_command(analyze_cli, name="advisor") advisor_cli.add_command(compare_cli, name="compare") - -if __name__ == '__main__': - advisor_cli.main( - ["analyze", "all", "-d", - "C:\xxx\profiling_data" - ] - ) +advisor_cli.add_command(cluster_cli, name="cluster") +advisor_cli.add_command(auto_complete_cli, name="auto-completion") \ No newline at end of file diff --git a/profiler/cli/update_cli.py b/profiler/cli/update_cli.py deleted file mode 100644 index 9407981ae0..0000000000 --- a/profiler/cli/update_cli.py +++ /dev/null @@ -1,40 +0,0 @@ -from urllib import parse - -import click - -from profiler.advisor.common import constant -from profiler.advisor.config.config import Config -from profiler.advisor.utils.tools import CONTEXT_SETTINGS, ClickAliasedGroup -from profiler.advisor.utils.utils import debug_option, request_with_retry - - -@click.group(name="update", cls=ClickAliasedGroup) -def update_cli(**kwargs): - """Update operation command, such as update rule and specify save path.""" - pass - - -@update_cli.command(context_settings=CONTEXT_SETTINGS, - name="rule", - short_help='Update the ma-advisor rules on the terminal. The default save path is ' - '"~/rules/cloud/". If user want to specify the save path, please use the environment ' - 'variable "ADVISOR_RULE_PATH"') -@click.option('--region', '-r', type=click.Choice(constant.CLOUD_RULE_REGION_LIST), required=True, - default=constant.DEFAULT_CLOUD_RULE_REGION, - help='Specifies the region where the rule file is downloaded.') -@debug_option -def update_rule(**kwargs) -> None: - """ - Download the latest rule yaml file. - """ - region_name = kwargs.get("region") - rule_bucket = Config().config.get(constant.RULE_BUCKET, region_name) - rule_endpoint_suffix = constant.COMMON_ENDPOINT_SUFFIX.format(region_name) - if region_name in constant.INNER_REGION_LIST: - rule_endpoint_suffix = constant.INNER_ENDPOINT_SUFFIX.format(region_name) - - obs_url = constant.HTTPS_PREFIXES + rule_bucket + "." + rule_endpoint_suffix - obs_url = parse.urljoin(obs_url, constant.COMMON_YAML_DIR) - for file_name in constant.CLOUD_YAML_NAME_LIST: - url = parse.urljoin(obs_url, file_name) - request_with_retry(url, region_name) -- Gitee From ab4c5a4e1b3be7d906b80b7e9e76500ff1370c60 Mon Sep 17 00:00:00 2001 From: wuyuhan Date: Wed, 22 May 2024 11:11:51 +0800 Subject: [PATCH 19/21] interface bug fix --- profiler/advisor/__init__.py | 1 + profiler/advisor/common/analyzer_scopes.py | 2 +- profiler/advisor/interface/interface.py | 8 ++++++-- profiler/advisor/result/result.py | 3 ++- profiler/cli/complete_cli.py | 6 +++--- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/profiler/advisor/__init__.py b/profiler/advisor/__init__.py index e69de29bb2..cea5ae3406 100644 --- a/profiler/advisor/__init__.py +++ b/profiler/advisor/__init__.py @@ -0,0 +1 @@ +from profiler.advisor.interface.interface import Interface \ No newline at end of file diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py index 0c6a2ac260..4041aa0483 100644 --- a/profiler/advisor/common/analyzer_scopes.py +++ b/profiler/advisor/common/analyzer_scopes.py @@ -6,4 +6,4 @@ class SupportedScopes: GRAPH = "graph" SLOW_RANK = "slow_rank" SLOW_LINK = "slow_link" - PORFILING_OPERATOR_ANALYSIS = "profiling_operator_analysis" + PROFILING_OPERATOR_ANALYSIS = "profiling_operator_analysis" diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 4ac062ce6c..1ee6e62af5 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -1,5 +1,8 @@ import os from collections import OrderedDict +import sys +sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), "cluster_analyse")) +sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))), "compare_tools")) from profiler.advisor.utils.utils import Timer from profiler.advisor.analyzer.computation.profiling_analyzer import ProfilingAnalyzer @@ -16,7 +19,7 @@ class Interface: SupportedScopes.TIMELINE_FUSION_OPS: TimelineFusionOpsAnalyzer }), "computation": OrderedDict({ - SupportedScopes.PORFILING_OPERATOR_ANALYSIS: ProfilingAnalyzer, + SupportedScopes.PROFILING_OPERATOR_ANALYSIS: ProfilingAnalyzer, SupportedScopes.GRAPH: FusionOPAnalyzer }), "communication": OrderedDict(), @@ -59,7 +62,8 @@ class Interface: if hasattr(analyzer, "html_render"): analyzer.html_render.render_html() analyzer.html_render.save_to_file(f'att_advisor_{Timer().strftime}.html') - return result if not output_dict else result.data.get(getattr(SupportedScopes, scope.upper())) + + return result if not output_dict else dict(result.data) if __name__ == "__main__": diff --git a/profiler/advisor/result/result.py b/profiler/advisor/result/result.py index 30b8f5795c..06a515e783 100644 --- a/profiler/advisor/result/result.py +++ b/profiler/advisor/result/result.py @@ -90,7 +90,8 @@ class SheetRecoder: if not isinstance(self._sheet_data[sheet_name].get("data"), list): self._sheet_data[sheet_name]["data"] = [] - self._sheet_data[sheet_name]["data"].append(data) + if data not in self._sheet_data[sheet_name]["data"]: + self._sheet_data[sheet_name]["data"].append(data) @singleton diff --git a/profiler/cli/complete_cli.py b/profiler/cli/complete_cli.py index e4fa0caf3f..28f00c5866 100644 --- a/profiler/cli/complete_cli.py +++ b/profiler/cli/complete_cli.py @@ -18,11 +18,11 @@ def auto_complete_cli(shell_type): """ click.echo("Tips: please paste following shell command to your terminal to activate auto completion.\n") if shell_type.lower() == "bash": - bash_str = 'eval "$(_advisor_COMPLETE=bash_source ma-advisor)"' + bash_str = 'eval "$(_advisor_COMPLETE=bash_source msprof-analyze)"' elif shell_type.lower() == "zsh": - bash_str = 'eval "$(_advisor_COMPLETE=zsh_source ma-advisor)"' + bash_str = 'eval "$(_advisor_COMPLETE=zsh_source msprof-analyze)"' elif shell_type.lower() == "fish": - bash_str = 'eval (env _advisor_COMPLETE=fish_source ma-advisor)' + bash_str = 'eval (env _advisor_COMPLETE=fish_source msprof-analyze)' else: click.echo(f'Unsupported shell type {shell_type}.') return -- Gitee From 568f3bb5fae2a23cb0ce4202d40b6e9ef5ab8853 Mon Sep 17 00:00:00 2001 From: PersonalC Date: Wed, 22 May 2024 17:29:26 +0800 Subject: [PATCH 20/21] bugfix: auto-completion --- profiler/cli/complete_cli.py | 8 ++++---- profiler/requirements/build.txt | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/profiler/cli/complete_cli.py b/profiler/cli/complete_cli.py index 28f00c5866..ebf2cbf30b 100644 --- a/profiler/cli/complete_cli.py +++ b/profiler/cli/complete_cli.py @@ -14,15 +14,15 @@ def auto_complete_cli(shell_type): \b # print bash auto complete command to terminal - ma-advisor auto-completion Bash + msprof-analyze auto-completion Bash """ click.echo("Tips: please paste following shell command to your terminal to activate auto completion.\n") if shell_type.lower() == "bash": - bash_str = 'eval "$(_advisor_COMPLETE=bash_source msprof-analyze)"' + bash_str = 'eval "$(_MSPROF_ANALYZE_COMPLETE=bash_source msprof-analyze)"' elif shell_type.lower() == "zsh": - bash_str = 'eval "$(_advisor_COMPLETE=zsh_source msprof-analyze)"' + bash_str = 'eval "$(_MSPROF_ANALYZE_COMPLETE=zsh_source msprof-analyze)"' elif shell_type.lower() == "fish": - bash_str = 'eval (env _advisor_COMPLETE=fish_source msprof-analyze)' + bash_str = 'eval (env _MSPROF_ANALYZE_COMPLETE=fish_source msprof-analyze)' else: click.echo(f'Unsupported shell type {shell_type}.') return diff --git a/profiler/requirements/build.txt b/profiler/requirements/build.txt index c750ff83de..d184b170c3 100644 --- a/profiler/requirements/build.txt +++ b/profiler/requirements/build.txt @@ -9,4 +9,5 @@ ijson requests xlsxwriter sqlalchemy -urllib3<2.0 \ No newline at end of file +urllib3<2.0 +numpy \ No newline at end of file -- Gitee From 5f096ddc5dfef23a82656a65a13ff5af27a0a116 Mon Sep 17 00:00:00 2001 From: fanxiaotong Date: Wed, 22 May 2024 18:53:16 +0800 Subject: [PATCH 21/21] bugfix --- .../analyzer/cluster/slow_link_analyser.py | 15 +++++-- .../analyzer/cluster/slow_rank_analyser.py | 13 ++++-- .../overall/overall_summary_analyzer.py | 40 ++++++++++++------- profiler/advisor/common/analyzer_scopes.py | 1 + .../dataset/cluster/cluster_dataset.py | 14 ++++++- profiler/advisor/interface/interface.py | 2 +- 6 files changed, 62 insertions(+), 23 deletions(-) diff --git a/profiler/advisor/analyzer/cluster/slow_link_analyser.py b/profiler/advisor/analyzer/cluster/slow_link_analyser.py index e9143ae1de..52da3965f6 100644 --- a/profiler/advisor/analyzer/cluster/slow_link_analyser.py +++ b/profiler/advisor/analyzer/cluster/slow_link_analyser.py @@ -46,9 +46,15 @@ class SlowLinkAnalyzer(BaseAnalyzer): self.result = OptimizeResult() self.bottelneck = '' self.suggestion = '' + self.format_datas = [] def optimize(self, **kwargs): + if self.rank_bw_dict is None: + print("slow_link 分析失败,原因是数据加载失败,请检查你的cluster_analysis_outpu文件夹, \ + 如不关心这类数据请忽略") + return self.result self.process() + self.format_datas = self.format_details() self.make_record() self.make_render() return self.result @@ -74,12 +80,12 @@ class SlowLinkAnalyzer(BaseAnalyzer): headers = ['rank_id'] + list(self.rank_bw_dict[0].keys()) data_list = [] for rank_id, rank_bw in self.rank_bw_dict.items(): - data_list.append([rank_id] + list(rank_bw.keys())) + data_list.append([rank_id] + list(rank_bw.values())) details_dict["headers"] = headers details_dict["data"] = data_list - return [details_dict] + return details_dict def make_record(self): """ @@ -92,11 +98,14 @@ class SlowLinkAnalyzer(BaseAnalyzer): ) self.result.add(OptimizeRecord(optimization_item)) + for i, data in enumerate(self.format_datas["data"]): + self.result.add_detail(SlowLinkAnalyzer.SLOW_LINK_ANALYSIS, self.format_datas["headers"], data) + def make_render(self): result_for_html = { "Description" : self.bottelneck, "suggestion" : self.suggestion, - "details" : self.format_details() + "details" : [self.format_datas] } self.html_render.render_template(key="cluster", diff --git a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py index b49ef5ec8c..6b1400485e 100644 --- a/profiler/advisor/analyzer/cluster/slow_rank_analyser.py +++ b/profiler/advisor/analyzer/cluster/slow_rank_analyser.py @@ -38,10 +38,15 @@ class SlowRankAnalyzer(BaseAnalyzer): self.result = OptimizeResult() self.bottelneck = '' self.suggestion = '' + self.format_datas = [] def optimize(self, **kwargs): + if self.step_trace_dict is None: + print("slow_rank 分析失败,原因是数据加载失败,请检查你的cluster_analysis_outpu文件夹 \ + 如不关心这类数据请忽略") + return self.result self.process() - + self.format_datas = self.format_details() self.make_record() self.make_render() return self.result @@ -71,6 +76,8 @@ class SlowRankAnalyzer(BaseAnalyzer): [""] ) self.result.add(OptimizeRecord(optimization_item)) + for i, data in enumerate(self.format_datas["data"]): + self.result.add_detail(SlowRankAnalyzer.SLOW_RANK_ANALYSIS, self.format_datas["headers"], data) def format_details(self): details_dict = {} @@ -80,13 +87,13 @@ class SlowRankAnalyzer(BaseAnalyzer): data_list.append([key] + value) details_dict["headers"] = headers details_dict["data"] = data_list - return [details_dict] + return details_dict def make_render(self): result_for_html = { "Description" : self.bottelneck, "suggestion" : self.suggestion, - "details" : self.format_details() + "details" : [self.format_datas] } self.html_render.render_template(key="cluster", diff --git a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py index f46bb2c1d5..b7c320b7aa 100644 --- a/profiler/advisor/analyzer/overall/overall_summary_analyzer.py +++ b/profiler/advisor/analyzer/overall/overall_summary_analyzer.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +import copy import logging from typing import Dict, List @@ -24,14 +25,15 @@ from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer from profiler.compare_tools.compare_backend.utils.constant import Constant from profiler.advisor.common import constant as const from profiler.compare_tools.compare_interface.comparison_interface import ComparisonInterface +from profiler.advisor.utils.utils import get_file_path_from_directory, load_parameter class OverallSummaryAnalyzer(BaseAnalyzer): OVERALL_SUMMARY_ANALYZER = "overall_summary_analysis" advice_map = { - "Computing Time": "if you want more detailed advice please go to compute_perf_analysis.ipynb.", - "Uncovered Communication Time": "if you want more detailed advice please go to cluster_perf_analysis.ipynb.", - "Free Time": "if you want more detailed advice please go to timeline_perf_analysis.ipynb." + "Computing Time": "if you want more detailed advice please go to att_advisor_*.html", + "Uncovered Communication Time": "if you want more detailed advice please go to att_advisor_*.html", + "Free Time": "if you want more detailed advice please go to att_advisor_*.html" } time_name_map = { "Computing Time": "computing", @@ -53,7 +55,8 @@ class OverallSummaryAnalyzer(BaseAnalyzer): def __init__(self, collection_path: str, n_processes: int = 1, cann_version=const.DEFAULT_CANN_VERSION, torch_version=const.DEFAULT_TORCH_VERSION, **kwargs): - super().__init__(collection_path, n_processes, cann_version, torch_version, **kwargs) + profile_path = get_profile_path(collection_path) + super().__init__(profile_path, n_processes, cann_version, torch_version, **kwargs) self.base_collection_path = kwargs.get("base_collection_path", "") self._has_base_collection = False self._is_minimal_profiling = False @@ -204,14 +207,16 @@ class OverallSummaryAnalyzer(BaseAnalyzer): def format_cur_data(self): if not self.cur_data: return - data_table = {} for data_type, data in self.cur_data.items(): - if data: - headers = [key for key in data] - data_list = [data[key] for key in data] - data_table["headers"] = headers - data_table["data"] = [data_list] - self.cur_data_table[data_type] = data_table + if not data: + continue + if data_type not in list(self.time_name_map.values()): + data_list = list(data.values()) + else: + data_list = [','.join(map(str, value)) for value in data.values()] + headers = list(data.keys()) + data_table = {"headers": headers, "data": [data_list]} + self.cur_data_table[data_type] = copy.deepcopy(data_table) def make_record(self): @@ -226,9 +231,9 @@ class OverallSummaryAnalyzer(BaseAnalyzer): self.result.add(OptimizeRecord(optimization_item)) self.result.add_detail(const.BOTTLENECK, self.bottleneck_table["headers"], self.bottleneck_table["data"][0]) - for data_type, data in self.cur_data.items(): - if data: - self.result.add_detail(const.DATA + data_type, self.cur_data_table[data_type]["headers"], self.cur_data_table[data_type]["data"][0]) + for data_type, data_dict in self.cur_data_table.items(): + if data_dict: + self.result.add_detail(const.DATA + data_type, data_dict["headers"], data_dict["data"][0]) def make_render(self): result_for_html = { @@ -244,3 +249,10 @@ class OverallSummaryAnalyzer(BaseAnalyzer): cann_version=self.cann_version, torch_version=self.torch_version, result=result_for_html) + +def get_profile_path(collection_path): + for root, dirs, files in os.walk(collection_path): + for file in files: + if file.startswith("profiler_info"): + return root + return None \ No newline at end of file diff --git a/profiler/advisor/common/analyzer_scopes.py b/profiler/advisor/common/analyzer_scopes.py index 4041aa0483..44f09d0a58 100644 --- a/profiler/advisor/common/analyzer_scopes.py +++ b/profiler/advisor/common/analyzer_scopes.py @@ -6,4 +6,5 @@ class SupportedScopes: GRAPH = "graph" SLOW_RANK = "slow_rank" SLOW_LINK = "slow_link" + OVER_ALL = "over_all" PROFILING_OPERATOR_ANALYSIS = "profiling_operator_analysis" diff --git a/profiler/advisor/dataset/cluster/cluster_dataset.py b/profiler/advisor/dataset/cluster/cluster_dataset.py index b8daedab08..94527cdf5b 100644 --- a/profiler/advisor/dataset/cluster/cluster_dataset.py +++ b/profiler/advisor/dataset/cluster/cluster_dataset.py @@ -71,7 +71,12 @@ class ClusterStepTraceTimeDataSet(ClusterDataset): def _parse(self): self.cluster_analyze() - step_data = self.load_csv_data(const.CLUSTER_STEP_TIME_CSV, ClusterStepTraceTimeBean) + try: + step_data = self.load_csv_data(const.CLUSTER_STEP_TIME_CSV, ClusterStepTraceTimeBean) + except RuntimeError as e: + print("捕获到异常:", e) + self._step_dict = None + return False self._step_dict = self.formate_data(step_data) return True @@ -120,7 +125,12 @@ class ClusterCommunicationDataSet(ClusterDataset): def _parse(self): self.cluster_analyze() - communication_json = self.load_json_data(const.CLUSTER_COMM_JSON) + try: + communication_json = self.load_json_data(const.CLUSTER_COMM_JSON) + except RuntimeError as e: + print("捕获到异常:", e) + self.rank_bw_dict = None + return False self.process(communication_json) return True diff --git a/profiler/advisor/interface/interface.py b/profiler/advisor/interface/interface.py index 1ee6e62af5..c0d04db8eb 100644 --- a/profiler/advisor/interface/interface.py +++ b/profiler/advisor/interface/interface.py @@ -23,7 +23,7 @@ class Interface: SupportedScopes.GRAPH: FusionOPAnalyzer }), "communication": OrderedDict(), - "overall": OrderedDict(), + "overall": OrderedDict({SupportedScopes.OVER_ALL: OverallSummaryAnalyzer}), "dataloader": OrderedDict(), "cluster": OrderedDict({ SupportedScopes.SLOW_RANK: SlowRankAnalyzer, -- Gitee