From 158522d5151455db2112d82d66942c09da83dcf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=A3=98=E5=87=AF=E8=BE=BE?= Date: Wed, 6 Dec 2023 12:03:27 +0800 Subject: [PATCH] Add Timeline Advice. --- .../advice_factory/timeline_advice_factory.py | 41 +++++++++ .../common_func_advisor/constant.py | 3 + profiler/advisor/advisor_backend/interface.py | 4 +- .../timeline_advice/__init__.py | 14 +++ .../timeline_advice/optimizer_advice.py | 55 +++++++++++ .../timeline_advice/timeline_advice_base.py | 86 +++++++++++++++++ profiler/advisor/timeline_perf_analysis.ipynb | 92 +++++++++++++++++++ 7 files changed, 294 insertions(+), 1 deletion(-) create mode 100644 profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py create mode 100644 profiler/advisor/advisor_backend/timeline_advice/__init__.py create mode 100644 profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py create mode 100644 profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py create mode 100644 profiler/advisor/timeline_perf_analysis.ipynb diff --git a/profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py b/profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py new file mode 100644 index 0000000000..add28332f6 --- /dev/null +++ b/profiler/advisor/advisor_backend/advice_factory/timeline_advice_factory.py @@ -0,0 +1,41 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +from advice_factory.advice_factory import AdviceFactory +from common_func.path_manager import PathManager +from common_func_advisor.constant import Constant +from timeline_advice.optimizer_advice import OptimizerAdvice + +class TimelineAdviceFactory(AdviceFactory): + ADVICE_LIB = { + Constant.OPTIM: OptimizerAdvice, + } + + def __init__(self, collection_path: str): + super().__init__(collection_path) + + def path_check(self): + """ + check whether input path is valid + """ + PathManager.check_input_directory_path(self.collection_path) + + def produce_advice(self, advice: str): + """ + produce data for input mode and advice + """ + self.advice_check(advice) + return self.ADVICE_LIB.get(advice)(self.collection_path).run() diff --git a/profiler/advisor/advisor_backend/common_func_advisor/constant.py b/profiler/advisor/advisor_backend/common_func_advisor/constant.py index 9cf2080acf..183b6a6711 100644 --- a/profiler/advisor/advisor_backend/common_func_advisor/constant.py +++ b/profiler/advisor/advisor_backend/common_func_advisor/constant.py @@ -27,6 +27,9 @@ class Constant: SLOW_LINK = "slow link" KERNEL = "kernel" + # timeline + OPTIM = "optimizer" + COLLECTION_PATH = "collection_path" CLUSTER_ANALYSIS_OUTPUT = "cluster_analysis_output" CLUSTER_STEP_TIME_CSV = "cluster_step_trace_time.csv" diff --git a/profiler/advisor/advisor_backend/interface.py b/profiler/advisor/advisor_backend/interface.py index e4d3bf5639..4263ec58f2 100644 --- a/profiler/advisor/advisor_backend/interface.py +++ b/profiler/advisor/advisor_backend/interface.py @@ -21,6 +21,7 @@ sys.path.append( os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "cluster_analyse")) from common_func_advisor.constant import Constant from advisor_backend.advice_factory.cluster_advice_factory import ClusterAdviceFactory +from advisor_backend.advice_factory.timeline_advice_factory import TimelineAdviceFactory class Interface: @@ -38,7 +39,8 @@ class Interface: class FactoryController: FACTORY_LIB = { - Constant.CLUSTER: ClusterAdviceFactory + Constant.CLUSTER: ClusterAdviceFactory, + Constant.TIMELINE: TimelineAdviceFactory } def __init__(self, collection_path: str): diff --git a/profiler/advisor/advisor_backend/timeline_advice/__init__.py b/profiler/advisor/advisor_backend/timeline_advice/__init__.py new file mode 100644 index 0000000000..8400fd5ecd --- /dev/null +++ b/profiler/advisor/advisor_backend/timeline_advice/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py b/profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py new file mode 100644 index 0000000000..ca24bb5ec6 --- /dev/null +++ b/profiler/advisor/advisor_backend/timeline_advice/optimizer_advice.py @@ -0,0 +1,55 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from timeline_advice.timeline_advice_base import TimelineAdviceBase + +class OptimizerAdvice(TimelineAdviceBase): + OPTIMIZER_MAP = { + "Optimizer.step#SGD.step": "torch_npu.optim.NpuFusedSGD", + "Optimizer.step#Adadelta.step": "torch_npu.optim.NpuFusedAdadelta", + "Optimizer.step#Lamb.step": "torch_npu.optim.NpuFusedLamb", + "Optimizer.step#Adam.step": "torch_npu.optim.NpuFusedAdam", + "Optimizer.step#AdamW.step": "torch_npu.optim.NpuFusedAdamW", + "Optimizer.step#AdamP.step": "torch_npu.optim.NpuFusedAdamP", + "Optimizer.step#BertAdam.step": "torch_npu.optim.NpuFusedBertAdam", + "Optimizer.step#RMSprop.step": "torch_npu.optim.NpuFusedRMSprop", + "Optimizer.step#RMSpropTF.step": "torch_npu.optim.NpuFusedRMSpropTF", + } + + def __init__(self, collection_path: str): + super().__init__(collection_path) + self.cur_data = list() + self.cur_bottleneck = str() + self.cur_advice = str() + + def run(self): + if not self.path_check(): + return self.output_format_data + self.preparse() + self.process() + self.output() + return self.output_format_data + + def process(self): + if not self.preparse_data[self.PREPARSE_TYPE.OPTIMIZER]: + return + + self.cur_data = list(set([entry.get("name", None) for entry in self.preparse_data[self.PREPARSE_TYPE.OPTIMIZER]])) + for index, opt_name in enumerate(self.cur_data): + self.cur_advice += f"You can choose {self.OPTIMIZER_MAP[opt_name]} to replace the current Optimizer: {opt_name}." + if index != len(self.cur_data) - 1: + self.cur_advice += "\n" + self.cur_bottleneck = self.cur_advice + diff --git a/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py b/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py new file mode 100644 index 0000000000..27f2640768 --- /dev/null +++ b/profiler/advisor/advisor_backend/timeline_advice/timeline_advice_base.py @@ -0,0 +1,86 @@ +# Copyright (c) 2023, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from advice_base import AdviceBase +from abc import abstractmethod +import os +import json +from collections import defaultdict + +class TimelineAdviceBase(AdviceBase): + class PREPARSE_TYPE: + OPTIMIZER = 0 + STEP = 1 + OVERLAP_CPT = 2 + OVERLAP_FREE = 3 + OVERLAP_CMU =4 + ENQUEUE = 5 + DEQUEUE = 6 + HOST_TO_DEVICE = 7 + + def __init__(self, collection_path: str): + super().__init__(collection_path) + self.trace_view_path = "" + self.has_preparse = False + self.preparse_data = defaultdict(list) + + def path_check(self): + """ + check whether input path is valid + """ + if not os.path.exists(self.collection_path): + print("[ERROR] Path: {} is not exist.".format(self.collection_path)) + return False + if os.path.isdir(self.collection_path) and self.collection_path.endswith("ascend_pt"): + self.trace_view_path = os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT", "trace_view.json") + if not os.path.exists(self.trace_view_path): + print("[ERROR] trace_view.json is not exist in the Path: {}.".format(os.path.join(self.collection_path, "ASCEND_PROFILER_OUTPUT"))) + return False + elif os.path.isfile(self.collection_path) and os.path.basename(self.collection_path) == "trace_view.json": + self.trace_view_path = self.collection_path + else: + print("[ERROR] Please input ascend_pt or trace_view.json.") + return False + print("[INFO] Start to analyse the target file: {}".format(self.trace_view_path)) + return True + + + @abstractmethod + def run(self): + """ + analyze profiling data and advice + """ + + @abstractmethod + def output(self): + """ + output relevant data + """ + self.output_format_data[self.DATA] = self.cur_data + self.output_format_data[self.BOTTLENECK] = self.cur_bottleneck + self.output_format_data[self.ADVICE] = self.cur_advice + + def preparse(self): + if self.has_preparse: + return + with open(self.trace_view_path, 'r') as f: + json_reader = json.load(f) + for entry in json_reader: + name = entry.get("name", None) + if name and name.startswith("Optimizer.step#") and name.endswith(".step"): + self.preparse_data[self.PREPARSE_TYPE.OPTIMIZER].append(entry) + + self.preparse = True + \ No newline at end of file diff --git a/profiler/advisor/timeline_perf_analysis.ipynb b/profiler/advisor/timeline_perf_analysis.ipynb new file mode 100644 index 0000000000..dc2aceef69 --- /dev/null +++ b/profiler/advisor/timeline_perf_analysis.ipynb @@ -0,0 +1,92 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from advisor_backend.interface import Interface\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Timeline调优分析\n", + "\n", + "## 1. Timeline分析的数据准备\n", + "我们当前支持Ascend PyTorch Profiler方式采集后的ascend_pt目录,并支持单独分析ascend_pt/ASCEND_PROFILER_OUTPUT目录下的trace_view.json文件。\n", + "\n", + "## 2. Timeline分析解决的问题\n", + "当前的功能:识别当前可选择的NPU亲和优化器;" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# EDIT THE PROFILING DATA PATH\n", + "timeline_path = \"[YOUR PATH]\"\n", + "interface = Interface(timeline_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 亲和优化器识别" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] Start to analyse the target file: [YOUR PATH]\n", + "['Optimizer.step#AdamW.step']\n", + "You can choose torch_npu.optim.NpuFusedAdamW to replace the current Optimizer: Optimizer.step#AdamW.step.\n" + ] + } + ], + "source": [ + "dataset = interface.get_data('timeline', 'optimizer')\n", + "# 打印当前使用的优化器\n", + "data = dataset.get('data')\n", + "print(data)\n", + "\n", + "# 如果使用了原生优化器,则打印优化建议\n", + "advice = dataset.get('advice')\n", + "print(advice)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "qkd", + "language": "python", + "name": "qkd" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- Gitee