diff --git a/profiler/merge_profiling_timeline/README.md b/profiler/merge_profiling_timeline/README.md index 5a899872a708786cdd7142d210b998c71e715ffb..2d29079570c834e0f4a1929b96a8bcd03949be70 100644 --- a/profiler/merge_profiling_timeline/README.md +++ b/profiler/merge_profiling_timeline/README.md @@ -4,12 +4,12 @@ 本工具支持合并profiling的timeline数据,支持合并指定rank的timline、合并指定timeline中的item -## 1 多timeline融合(常规) +## 1 多timeline融合 ### 1.1 数据采集 使用msporf采集数据,将采集到的所有节点的profiling数据拷贝到当前机器同一目录下,以下假设数据在/home/test/cann_profiling下 -profiling数据目录结构示意, 合并timeline必需数据:`msprof.json`和`info.json.*`: +e2e profiling数据目录结构如下, 合并timeline必需数据:`msprof*.json`和`info.json.*`: ``` |- cann_profiling |- PROF_*** @@ -21,15 +21,30 @@ profiling数据目录结构示意, 合并timeline必需数据:`msprof.json`和 |- PROF_*** ... ``` +ascend pytorch profiling数据目录结构如下 +``` +|- ascend_pytorch_profiling + |- **_ascend_pt + |- ASCEND_PROFILER_OUTPUT + |- trace_view.json + |- FRAMEWORK + |- PROF_*** + |- **_ascend_pt +``` + ### 1.2 合并timeline 可选参数: - -d: **必选参数**,profiling数据文件或文件夹路径 -- -t: **当需要融合多级多卡timeline时需要校准多机间的时间**,传入时间校准的time_difference.json文件路径, 该文件的获取参考[节点间时间差获取](https://gitee.com/aerfaliang/merge_profiling_timeline/tree/master/get_nodes_timediff) +- -t: **当需要融合多机多卡timeline时需要校准多机间的时间**,传入时间校准的time_difference.json文件路径, 该文件的获取参考[节点间时间差获取](https://gitee.com/aerfaliang/merge_profiling_timeline/tree/master/get_nodes_timediff) - -o: 可选参数,指定合并后的timeline文件输出的路径,默认为'-d'输入的路径 - --rank:可选参数,指定需要合并timeline的卡号,默认全部合并 - --items:可选参数,指定需要合并的profiling数据项,默认全部合并 +- --type: 指定需要合并timeline场景,可选参数:`pytorch`, `e2e`, `custom` + - `pytorch`:通过ascend pytorch方式采集profiling数据,合并所有卡的trace_view.json + - `e2e`:通过e2e方式采集profiling数据,优先合并总timeline,没有生成则选择合并device目录下的msprof_*.json + - `custom` :自定义需要合并的timeline数据,具体参考第2章节 @@ -64,7 +79,7 @@ python3 main.py -d path/to/cann_profiling/ -t path/to/time_difference.json --ran ## 2 多timeline融合(自定义) ### 2.1 数据采集 将需要合并的timeline文件全部放在同一目录下 -数据目录结构示意, 合并timeline必需数据:`msprof.json`和`info.json.*`: +数据目录结构示意如下: ``` |- timeline |- msprof_0.json @@ -83,12 +98,13 @@ python3 main.py -d path/to/cann_profiling/ -t path/to/time_difference.json --ran 可选参数: - -d: **必选参数**,指定profiling数据文件或文件夹路径 - -o: 可选参数,指定合并后的timeline文件输出的路径,默认为'-d'输入的路径 -- --custom: **必选参数**,工具通过该参数识别为自定义融合场景 +- --type: 指定需要合并timeline场景,指定参数:`custom` + - `custom` :自定义需要合并的timeline数据 **使用示例**: 将需要合并的所有timeline放在同一目录下,通过下面的命令合并所有timeline ``` -python3 main.py -d path/to/timeline/ --custom +python3 main.py -d path/to/timeline/ --type custom ``` 合并timeline查看:同 @@ -102,5 +118,3 @@ python ./trace_processor --httpd path/to/msprof_merged_*p.json 等待加载完毕,刷新[perfetto](https://ui.perfetto.dev/)界面,点击`YES, use loaded trace`即可展示timeline - - diff --git a/profiler/merge_profiling_timeline/main.py b/profiler/merge_profiling_timeline/main.py index dfa90eb05d265ffc796833452a00bb4c0d8e4748..84a0f7d71bf7b15f7d0fdac37b3a3d7fa73d2515 100644 --- a/profiler/merge_profiling_timeline/main.py +++ b/profiler/merge_profiling_timeline/main.py @@ -62,11 +62,11 @@ def natural_sort(files): return sorted(files, key=alphanum_key) -def get_timeline_info(input_path, prof_dirs): +def get_timeline_info(args, prof_dirs): timeline_info = {} for prof in prof_dirs: - pro_path = os.path.join(input_path, prof) + pro_path = os.path.join(args.data, prof) # 从info.json读取rank_id rank_id = get_rank_id_from_info_json(pro_path) @@ -74,14 +74,29 @@ def get_timeline_info(input_path, prof_dirs): print(f"WARN, There is not rank id info in {pro_path}") continue - tmp_path = os.path.realpath(os.path.join(pro_path, "timeline", "msprof.json")) - if os.path.exists(tmp_path): - timeline_info[rank_id] = tmp_path + timeline_path = get_timeline_path(pro_path, args.type) + + if os.path.exists(timeline_path): + timeline_info[rank_id] = timeline_path else: - print(f"WARN, The file \"{tmp_path}\" does not exist.") + print(f"WARN, The file \"{timeline_path}\" does not exist.") return timeline_info +def get_timeline_path(pro_path, type): + for root, dirs, files in os.walk(pro_path): + for dir_ in dirs: + if 'ASCEND_PROFILER_OUTPUT' == dir_ and type == 'pytorch': + timeline_path = os.path.realpath(os.path.join(root, dir_, 'trace_view.json')) + return timeline_path + + for file_ in sorted(files, reverse=True): + if 'msprof' in file_: + timeline_path = os.path.join(root, file_) + return timeline_path + return + + def get_rank_id_from_info_json(pro_path): info_json = "" rank_id = None @@ -98,8 +113,10 @@ def get_rank_id_from_info_json(pro_path): return rank_id -def merge_timeline_general(timeline_info, args): - +def merge_timeline_general(args): + """合并e2e profiling生成的msprof*.json""" + prof_dir = get_path_dir(args.data) + timeline_info = get_timeline_info(args, prof_dir) timeline_files_dict = {} node_time_diff = get_node_time_diff(args.timediff) if args.timediff else None @@ -178,9 +195,8 @@ def merge_timeline_events(timeline_file_dict, process_list, node_time_diff=None) # 区分不同rank的同一进程的pid if isinstance(event.get("pid"), (str, int)): # 合并GPU profiling/ after timeline pid modify - # event["pid"] = int(event["pid"]) if event["pid"].isdigit() else event["pid"] + str(rank_id) event["pid"] = int(''.join(x for x in str(event.get("pid")) if x.isdigit()) + - str(rank_id * MAX_INDEX_COUNT)) + str(rank_id)) # convert tid to int if isinstance(event.get("tid"), str): @@ -206,20 +222,18 @@ def parse_args(): parser.add_argument("--output", "-o", default=None, help="save path of msprof_merged.json ") parser.add_argument("--rank", default=None, help="List of ranks to be merged. By default, all ranks are merged") parser.add_argument("--items", default=None, help="Specify the data items to be merged. in the timeline.") - parser.add_argument("--custom", action="store_true", help="Customize the timeline file to be merged.") + parser.add_argument("--type", choices=('pytorch', 'e2e', 'custom'), help="Customize the timeline file to be merged.") arg = parser.parse_args() return arg if __name__ == "__main__": args = parse_args() - prof_dir = get_path_dir(args.data) - timeline_info = get_timeline_info(args.data, prof_dir) if not args.output: args.output = args.data print("========================== start merge timeline ====================") - if args.custom: + if args.type == "custom": merge_timeline_custom(args) else: - merge_timeline_general(timeline_info, args) \ No newline at end of file + merge_timeline_general(args) \ No newline at end of file diff --git a/profiler/merge_profiling_timeline/trace_processor b/profiler/merge_profiling_timeline/trace_processor new file mode 100644 index 0000000000000000000000000000000000000000..201d8a8d8f3e24454c0aac3155f7c945c49424b9 --- /dev/null +++ b/profiler/merge_profiling_timeline/trace_processor @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +# Copyright (C) 2021 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# DO NOT EDIT. Auto-generated by tools/gen_amalgamated_python_tools +# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +# This file should do the same thing when being invoked in any of these ways: +# ./trace_processor +# python trace_processor +# bash trace_processor +# cat ./trace_processor | bash +# cat ./trace_processor | python - + +BASH_FALLBACK=""" " +exec python3 - "$@" <<'#'EOF +#""" # yapf: disable + + +# ----- Amalgamator: begin of python/perfetto/prebuilts/manifests/trace_processor_shell.py +# This file has been generated by: /Users/hjd/src/perfetto/tools/roll-prebuilts v34.0 +TRACE_PROCESSOR_SHELL_MANIFEST = [{ + 'arch': + 'mac-amd64', + 'file_name': + 'trace_processor_shell', + 'file_size': + 8714576, + 'url': + #'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v34.0/mac-amd64/trace_processor_shell', + "http://aisbench.obs.cn-north-4.myhuaweicloud.com/packet/perfetto/v34.0/mac-amd64/trace_processor_shell.exe", + 'sha256': + '9bdb89493f0f00db5d3a73166450ac2f6ee830de16415e79c5a0234990caa644', + 'platform': + 'darwin', + 'machine': ['x86_64'] +}, { + 'arch': + 'mac-arm64', + 'file_name': + 'trace_processor_shell', + 'file_size': + 7286968, + 'url': + #'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v34.0/mac-arm64/trace_processor_shell', + "http://aisbench.obs.cn-north-4.myhuaweicloud.com/packet/perfetto/v34.0/mac-arm64/trace_processor_shell.exe", + 'sha256': + '948536035fbe680b47b94a99d320ff459450738e4aeeb16cef18364f0023622b', + 'platform': + 'darwin', + 'machine': ['arm64'] +}, { + 'arch': + 'linux-amd64', + 'file_name': + 'trace_processor_shell', + 'file_size': + 8576688, + 'url': + #'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v34.0/linux-amd64/trace_processor_shell', + "http://aisbench.obs.cn-north-4.myhuaweicloud.com/packet/perfetto/v34.0/linux-amd64/trace_processor_shell.exe", + 'sha256': + '493698c81fffcabc340c72831b175962dba5a31dfe8572a6d5af083a116af4f8', + 'platform': + 'linux', + 'machine': ['x86_64'] +}, { + 'arch': + 'linux-arm', + 'file_name': + 'trace_processor_shell', + 'file_size': + 6125384, + 'url': + #'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v34.0/linux-arm/trace_processor_shell', + "http://aisbench.obs.cn-north-4.myhuaweicloud.com/packet/perfetto/v34.0/linux-arm/trace_processor_shell.exe", + + 'sha256': + '53f1e27603695cf92d22519993b6eafa9c60957d9cb33bd0b300df8573b87ebb', + 'platform': + 'linux', + 'machine': ['armv6l', 'armv7l', 'armv8l'] +}, { + 'arch': + 'linux-arm64', + 'file_name': + 'trace_processor_shell', + 'file_size': + 8036288, + 'url': + #'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v34.0/linux-arm64/trace_processor_shell', + "http://aisbench.obs.cn-north-4.myhuaweicloud.com/packet/perfetto/v34.0/linux-arm64/trace_processor_shell.exe", + + 'sha256': + '2a2cda222c9d5e18b638057688babb00a3a975ccd4b7dd65f26211c2cb7767f9', + 'platform': + 'linux', + 'machine': ['aarch64'] +}, { + 'arch': + 'android-arm', + 'file_name': + 'trace_processor_shell', + 'file_size': + 5813384, + 'url': + #'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v34.0/android-arm/trace_processor_shell', + "http://aisbench.obs.cn-north-4.myhuaweicloud.com/packet/perfetto/v34.0/android-arm/trace_processor_shell.exe", + + 'sha256': + 'f3ec4c194d0b06af5b296c1c479e6b29090e6b7cc7e58fbd55ca2919a126f0ee' +}, { + 'arch': + 'android-arm64', + 'file_name': + 'trace_processor_shell', + 'file_size': + 7294768, + 'url': + #'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v34.0/android-arm64/trace_processor_shell', + "http://aisbench.obs.cn-north-4.myhuaweicloud.com/packet/perfetto/v34.0/android-arm64/trace_processor_shell.exe", + 'sha256': + 'f44f47d4b873ec68b6fa4f4c69a3e5a13d58b4d9cb2ec591fa687d4480c1950b' +}, { + 'arch': + 'android-x86', + 'file_name': + 'trace_processor_shell', + 'file_size': + 8090716, + 'url': + #'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v34.0/android-x86/trace_processor_shell', + "http://aisbench.obs.cn-north-4.myhuaweicloud.com/packet/perfetto/v34.0/android-x86/trace_processor_shell.exe", + 'sha256': + '5636d8251747376787640bc3a4894ecf3091e4bf3d38b007003e1992fc5792df' +}, { + 'arch': + 'android-x64', + 'file_name': + 'trace_processor_shell', + 'file_size': + 8359784, + 'url': + #'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v34.0/android-x64/trace_processor_shell', + "http://aisbench.obs.cn-north-4.myhuaweicloud.com/packet/perfetto/v34.0/android-x64/trace_processor_shell.exe", + 'sha256': + '50440fa055ab998f6cf24f9a9a7388520cc854708735521505e10291bc52f3d0' +}, { + 'arch': + 'windows-amd64', + 'file_name': + 'trace_processor_shell.exe', + 'file_size': + 8130560, + 'url': + #'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v34.0/windows-amd64/trace_processor_shell.exe', + "http://aisbench.obs.cn-north-4.myhuaweicloud.com/packet/perfetto/v34.0/windows-amd64/trace_processor_shell.exe", + 'sha256': + '5cbcf98e29a2d989523235e11e4e0dade692a295ebf47a6c93a09a050ce9bc91', + 'platform': + 'win32', + 'machine': ['amd64'] +}] + +# ----- Amalgamator: end of python/perfetto/prebuilts/manifests/trace_processor_shell.py + +# ----- Amalgamator: begin of python/perfetto/prebuilts/perfetto_prebuilts.py +# Copyright (C) 2021 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Functions to fetch pre-pinned Perfetto prebuilts. + +This function is used in different places: +- Into the //tools/{trace_processor, traceconv} scripts, which are just plain + wrappers around executables. +- Into the //tools/{heap_profiler, record_android_trace} scripts, which contain + some other hand-written python code. + +The manifest argument looks as follows: +TRACECONV_MANIFEST = [ + { + 'arch': 'mac-amd64', + 'file_name': 'traceconv', + 'file_size': 7087080, + 'url': https://commondatastorage.googleapis.com/.../trace_to_text', + 'sha256': 7d957c005b0dc130f5bd855d6cec27e060d38841b320d04840afc569f9087490', + 'platform': 'darwin', + 'machine': 'x86_64' + }, + ... +] + +The intended usage is: + + from perfetto.prebuilts.manifests.traceconv import TRACECONV_MANIFEST + bin_path = get_perfetto_prebuilt(TRACECONV_MANIFEST) + subprocess.call(bin_path, ...) +""" + +import hashlib +import os +import platform +import subprocess +import sys + + +def download_or_get_cached(file_name, url, sha256): + """ Downloads a prebuilt or returns a cached version + + The first time this is invoked, it downloads the |url| and caches it into + ~/.local/share/perfetto/prebuilts/$tool_name. On subsequent invocations it + just runs the cached version. + """ + dir = os.path.join( + os.path.expanduser('~'), '.local', 'share', 'perfetto', 'prebuilts') + os.makedirs(dir, exist_ok=True) + bin_path = os.path.join(dir, file_name) + sha256_path = os.path.join(dir, file_name + '.sha256') + needs_download = True + + # Avoid recomputing the SHA-256 on each invocation. The SHA-256 of the last + # download is cached into file_name.sha256, just check if that matches. + if os.path.exists(bin_path) and os.path.exists(sha256_path): + with open(sha256_path, 'rb') as f: + digest = f.read().decode() + if digest == sha256: + needs_download = False + + if needs_download: + # Either the filed doesn't exist or the SHA256 doesn't match. + tmp_path = bin_path + '.tmp' + print('Downloading ' + url) + subprocess.check_call(['curl', '-f', '-L', '-#', '-o', tmp_path, url]) + with open(tmp_path, 'rb') as fd: + actual_sha256 = hashlib.sha256(fd.read()).hexdigest() + if actual_sha256 != sha256: + raise Exception('Checksum mismatch for %s (actual: %s, expected: %s)' % + (url, actual_sha256, sha256)) + os.chmod(tmp_path, 0o755) + os.replace(tmp_path, bin_path) + with open(sha256_path, 'w') as f: + f.write(sha256) + return bin_path + + +def get_perfetto_prebuilt(manifest, soft_fail=False, arch=None): + """ Downloads the prebuilt, if necessary, and returns its path on disk. """ + plat = sys.platform.lower() + machine = platform.machine().lower() + manifest_entry = None + for entry in manifest: + # If the caller overrides the arch, just match that (for Android prebuilts). + if arch: + if entry.get('arch') == arch: + manifest_entry = entry + break + continue + # Otherwise guess the local machine arch. + if entry.get('platform') == plat and machine in entry.get('machine', []): + manifest_entry = entry + break + if manifest_entry is None: + if soft_fail: + return None + raise Exception( + ('No prebuilts available for %s-%s\n' % (plat, machine)) + + 'See https://perfetto.dev/docs/contributing/build-instructions') + + return download_or_get_cached( + file_name=manifest_entry['file_name'], + url=manifest_entry['url'], + sha256=manifest_entry['sha256']) + + +def run_perfetto_prebuilt(manifest): + bin_path = get_perfetto_prebuilt(manifest) + if sys.platform.lower() == 'win32': + sys.exit(subprocess.check_call([bin_path, *sys.argv[1:]])) + os.execv(bin_path, [bin_path] + sys.argv[1:]) + +# ----- Amalgamator: end of python/perfetto/prebuilts/perfetto_prebuilts.py + +if __name__ == '__main__': + run_perfetto_prebuilt(TRACE_PROCESSOR_SHELL_MANIFEST) + +#EOF