From e47c2ef6ba00b0f4d0f8e05e4e132229b40d5ae7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=8C?= Date: Sat, 26 Aug 2023 10:56:06 +0000 Subject: [PATCH 1/8] update profiler/merge_profiling_timeline/main.py. change default merge.json name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 李斌 --- profiler/merge_profiling_timeline/main.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/profiler/merge_profiling_timeline/main.py b/profiler/merge_profiling_timeline/main.py index 5ac4b89e0c..7a80a21547 100644 --- a/profiler/merge_profiling_timeline/main.py +++ b/profiler/merge_profiling_timeline/main.py @@ -227,8 +227,10 @@ def merge_timeline_events(timeline_file_dict, process_list): event['id'] = float(event.get('id')) * RANK_ID_POS + rank_id new_events.append(event) - - out_path = f"{args.output}_merged.json" + if args.output == args.data: + out_path = f"{args.output}merged.json" + else: + out_path = f"{args.output}.json" with open(out_path, 'w') as f: json.dump(new_events, f) print(f"timeline merged output path: {out_path}") @@ -237,7 +239,7 @@ def merge_timeline_events(timeline_file_dict, process_list): def parse_args(): parser = ArgumentParser(description="Merge timeline for multi card") parser.add_argument("--data", "-d", default=None, help="root dir of PROF_* data") - parser.add_argument("--output", "-o", default=None, help="save path of msprof_merged.json ") + parser.add_argument("--output", "-o", default=None, help="save path of merged.json ") parser.add_argument("--rank", default=None, help="List of ranks to be merged. By default, all ranks are merged") parser.add_argument("--items", default=None, help="Specify the data items (python,CANN,Ascend Hardware,HCCL,..)to be merged. in the timeline.") parser.add_argument("--type", choices=('pytorch', 'e2e', 'custom'), help="Customize the timeline file to be merged.") -- Gitee From a7ffa5993432dbb5ef386c1ce5f4125158d4dc6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=8C?= Date: Sat, 26 Aug 2023 10:57:59 +0000 Subject: [PATCH 2/8] =?UTF-8?q?update=20profiler/merge=5Fprofiling=5Ftimel?= =?UTF-8?q?ine/README.md.=20=E4=BF=AE=E6=94=B9=E6=96=87=E6=A1=A3=E8=AF=B4?= =?UTF-8?q?=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 李斌 --- profiler/merge_profiling_timeline/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiler/merge_profiling_timeline/README.md b/profiler/merge_profiling_timeline/README.md index 22516739cd..b9054a5f49 100644 --- a/profiler/merge_profiling_timeline/README.md +++ b/profiler/merge_profiling_timeline/README.md @@ -53,7 +53,7 @@ ascend pytorch profiler数据目录结构如下: **使用示例**: -1、合并单机多卡timeline,默认合并所有卡、所有数据项,生成first_merge.json在path/to/cann_profiling/output/目录下(不设置-o参数时默认生成_merge.json在数据目录(path/to/cann_profiling/)下: +1、合并单机多卡timeline,默认合并所有卡、所有数据项,生成first.json在path/to/cann_profiling/output/目录下(不设置-o参数时默认生成merge.json在数据目录(path/to/cann_profiling/)下: ``` python3 main.py -d path/to/cann_profiling/ -o path/to/cann_profiling/output/first --type pytorch -- Gitee From 6525c6f37a2d2ce1bb43179bf67dc846584400a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=8C?= Date: Mon, 28 Aug 2023 12:35:06 +0000 Subject: [PATCH 3/8] update profiler/merge_profiling_timeline/main.py. update default directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 李斌 --- profiler/merge_profiling_timeline/main.py | 30 +++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/profiler/merge_profiling_timeline/main.py b/profiler/merge_profiling_timeline/main.py index 7a80a21547..27b31c4888 100644 --- a/profiler/merge_profiling_timeline/main.py +++ b/profiler/merge_profiling_timeline/main.py @@ -66,7 +66,7 @@ def get_timeline_info(args, prof_dirs): timeline_info = {} for prof in prof_dirs: - pro_path = os.path.join(args.data, prof) + pro_path = os.path.join(args.input, prof) # 从info.json读取rank_id rank_id = get_rank_id_from_info_json(pro_path) @@ -130,7 +130,7 @@ def get_rank_id_from_info_json(pro_path): def merge_timeline_general(args): """合并e2e profiling生成的msprof*.json""" - prof_dir = get_path_dir(args.data) + prof_dir = get_path_dir(args.input) timeline_info = get_timeline_info(args, prof_dir) timeline_files_dict = {} @@ -153,10 +153,10 @@ def merge_timeline_general(args): def merge_timeline_custom(args): """合并指定目录里所有timeline文件""" - timeline_files = natural_sort(os.listdir(args.data)) + timeline_files = natural_sort(os.listdir(args.input)) timeline_files_dict = {} for idx, timeline_file in enumerate(timeline_files): - timeline_files_dict[idx] = (os.path.join(args.data, timeline_file),0) + timeline_files_dict[idx] = (os.path.join(args.input, timeline_file),0) # 合并部分profiling items process_list = args.items.split(",") if args.items else None merge_timeline_events(timeline_files_dict, process_list) @@ -227,19 +227,23 @@ def merge_timeline_events(timeline_file_dict, process_list): event['id'] = float(event.get('id')) * RANK_ID_POS + rank_id new_events.append(event) - if args.output == args.data: - out_path = f"{args.output}merged.json" - else: - out_path = f"{args.output}.json" - with open(out_path, 'w') as f: - json.dump(new_events, f) + out_path = f"{args.output}.json" + if os.path.exists(output_path): + print(f"File {out_path} existed before and is now overwritten.") + os.remove(out_path) + try: + with open(out_path, 'w') as f: + json.dump(new_events, f) + except FileNotFoundError: + print(f"Param -o (output path) is not exists, please check it.") + return print(f"timeline merged output path: {out_path}") def parse_args(): parser = ArgumentParser(description="Merge timeline for multi card") - parser.add_argument("--data", "-d", default=None, help="root dir of PROF_* data") - parser.add_argument("--output", "-o", default=None, help="save path of merged.json ") + parser.add_argument("-i", "--input", default=None, help="root dir of PROF_* data") + parser.add_argument("-o", "--output", default=None, help="save path of merged.json ") parser.add_argument("--rank", default=None, help="List of ranks to be merged. By default, all ranks are merged") parser.add_argument("--items", default=None, help="Specify the data items (python,CANN,Ascend Hardware,HCCL,..)to be merged. in the timeline.") parser.add_argument("--type", choices=('pytorch', 'e2e', 'custom'), help="Customize the timeline file to be merged.") @@ -251,7 +255,7 @@ if __name__ == "__main__": args = parse_args() if not args.output: - args.output = args.data + args.output = "./merged" print("========================== start merge timeline ====================") if args.type == "custom": merge_timeline_custom(args) -- Gitee From 2f5fea6e3f627e34cc8c858d90331e19300b9ee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=8C?= Date: Mon, 28 Aug 2023 12:42:05 +0000 Subject: [PATCH 4/8] =?UTF-8?q?update=20profiler/merge=5Fprofiling=5Ftimel?= =?UTF-8?q?ine/README.md.=20=E6=9B=B4=E6=96=B0=E6=96=87=E6=A1=A3=E6=8F=8F?= =?UTF-8?q?=E8=BF=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 李斌 --- profiler/merge_profiling_timeline/README.md | 23 +++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/profiler/merge_profiling_timeline/README.md b/profiler/merge_profiling_timeline/README.md index b9054a5f49..dd03f86f01 100644 --- a/profiler/merge_profiling_timeline/README.md +++ b/profiler/merge_profiling_timeline/README.md @@ -42,33 +42,33 @@ ascend pytorch profiler数据目录结构如下: 可选参数: -- -d: **必选参数**,profiling数据文件或文件夹路径 +- -i: **必选参数**,profiling数据文件或文件夹路径 - --type: **必选参数**,指定需要合并timeline场景,可选参数有:`pytorch`, `e2e`, `custom` - `pytorch`:通过ascend pytorch方式采集profiling数据,合并所有卡的trace_view.json - `e2e`:通过e2e方式采集profiling数据,优先合并总timeline,没有生成则选择合并device目录下的msprof_*.json - `custom` :自定义需要合并的timeline数据,具体参考示例 -- -o: 可选参数,指定合并后的timeline文件输出的路径(路径末尾可以设置文件名,具体用法参考示例),默认为'-d'输入的路径 +- -o: 可选参数,指定合并后的timeline文件输出的路径(路径末尾可以设置文件名,具体用法参考示例),默认为当前目录 - --rank:可选参数,指定需要合并timeline的卡号,默认全部合并 - --items:可选参数,指定需要合并的profiling数据项(python,Ascend Hardware,CANN,HCCL,PTA,Overlap Analysis),默认全部合并 **使用示例**: -1、合并单机多卡timeline,默认合并所有卡、所有数据项,生成first.json在path/to/cann_profiling/output/目录下(不设置-o参数时默认生成merge.json在数据目录(path/to/cann_profiling/)下: +1、合并单机多卡timeline,默认合并所有卡、所有数据项,生成first.json在path/to/cann_profiling/output/目录下(不设置-o参数时默认生成merge.json在当前目录下: ``` -python3 main.py -d path/to/cann_profiling/ -o path/to/cann_profiling/output/first --type pytorch +python3 main.py -i path/to/cann_profiling/ -o path/to/cann_profiling/output/first --type pytorch ``` 2、合并单机多卡timeline,只合并0卡和1卡: ``` -python3 main.py -d path/to/cann_profiling/ -o path/to/cann_profiling/output/2p --type pytorch --rank 0,1 +python3 main.py -i path/to/cann_profiling/ -o path/to/cann_profiling/output/2p --type pytorch --rank 0,1 ``` 3、合并单机多卡timeline,合并所有卡的CANN层和Ascend_Hardware层数据 ``` -python3 main.py -d path/to/cann_profiling/ --type pytorch --items CANN,Ascend_Hardware +python3 main.py -i path/to/cann_profiling/ --type pytorch --items CANN,Ascend_Hardware ``` 4、合并多timeline(自定义) @@ -81,11 +81,8 @@ python3 main.py -d path/to/cann_profiling/ --type pytorch --items CANN,Ascend_Ha |- msprof_0.json |- msprof_1.json |- msprof_2.json - |- msprof_3.json - |- step_trace_0.json - |- step_trace_1.json - |- step_trace_2.json - |- step_trace_3.json + |- hccl_3.json + |- hccl_4.json ... ``` @@ -94,12 +91,12 @@ python3 main.py -d path/to/cann_profiling/ --type pytorch --items CANN,Ascend_Ha 通过下面的命令合并所有timeline,同样支持-o、--rank、--items等参数: ``` -python3 main.py -d path/to/timeline/ --type custom +python3 main.py -i path/to/timeline/ -o path/to/timeline/xxx --type custom ``` 合并timeline查看: -> 在 -o 指定的目录(默认在-d指定的目录下)的_merged.json为合并后的文件 +> 在 -o 指定的目录(不设置-o时默认在当前目录下的merged.json)的xxx.json为合并后的文件 ## 2 超大timeline文件查看 -- Gitee From 91e69abf85143fa53f7d261d6bb2718952725513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=8C?= Date: Mon, 28 Aug 2023 12:49:30 +0000 Subject: [PATCH 5/8] =?UTF-8?q?update=20profiler/merge=5Fprofiling=5Ftimel?= =?UTF-8?q?ine/README.md.=20=E6=96=87=E6=A1=A3=E8=A1=A5=E5=85=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 李斌 --- profiler/merge_profiling_timeline/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiler/merge_profiling_timeline/README.md b/profiler/merge_profiling_timeline/README.md index dd03f86f01..53c0d6a4a1 100644 --- a/profiler/merge_profiling_timeline/README.md +++ b/profiler/merge_profiling_timeline/README.md @@ -49,7 +49,7 @@ ascend pytorch profiler数据目录结构如下: - `custom` :自定义需要合并的timeline数据,具体参考示例 - -o: 可选参数,指定合并后的timeline文件输出的路径(路径末尾可以设置文件名,具体用法参考示例),默认为当前目录 - --rank:可选参数,指定需要合并timeline的卡号,默认全部合并 -- --items:可选参数,指定需要合并的profiling数据项(python,Ascend Hardware,CANN,HCCL,PTA,Overlap Analysis),默认全部合并 +- --items:可选参数,指定需要合并的profiling数据项(python,Ascend_Hardware,CANN,HCCL,PTA,Overlap_Analysis),默认全部合并(item直接使用Ascend Hardware作为参数会被误认为是两个参数,因此作为一个参数时使用'_'连接) **使用示例**: -- Gitee From 089b5711a7b7775bff9c9b89a625fb911807d0b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=8C?= Date: Mon, 28 Aug 2023 12:53:12 +0000 Subject: [PATCH 6/8] update profiler/merge_profiling_timeline/main.py. change default output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 李斌 --- profiler/merge_profiling_timeline/main.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/profiler/merge_profiling_timeline/main.py b/profiler/merge_profiling_timeline/main.py index 27b31c4888..3d1bdaef90 100644 --- a/profiler/merge_profiling_timeline/main.py +++ b/profiler/merge_profiling_timeline/main.py @@ -243,7 +243,7 @@ def merge_timeline_events(timeline_file_dict, process_list): def parse_args(): parser = ArgumentParser(description="Merge timeline for multi card") parser.add_argument("-i", "--input", default=None, help="root dir of PROF_* data") - parser.add_argument("-o", "--output", default=None, help="save path of merged.json ") + parser.add_argument("-o", "--output", default="./merged", help="save path of merged.json ") parser.add_argument("--rank", default=None, help="List of ranks to be merged. By default, all ranks are merged") parser.add_argument("--items", default=None, help="Specify the data items (python,CANN,Ascend Hardware,HCCL,..)to be merged. in the timeline.") parser.add_argument("--type", choices=('pytorch', 'e2e', 'custom'), help="Customize the timeline file to be merged.") @@ -253,9 +253,6 @@ def parse_args(): if __name__ == "__main__": args = parse_args() - - if not args.output: - args.output = "./merged" print("========================== start merge timeline ====================") if args.type == "custom": merge_timeline_custom(args) -- Gitee From ab9db51e0a890b5c5ee484455904567697d2d484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=8C?= Date: Mon, 28 Aug 2023 12:58:03 +0000 Subject: [PATCH 7/8] update profiler/merge_profiling_timeline/main.py. change default path name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 李斌 --- profiler/merge_profiling_timeline/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiler/merge_profiling_timeline/main.py b/profiler/merge_profiling_timeline/main.py index 3d1bdaef90..7fd483a736 100644 --- a/profiler/merge_profiling_timeline/main.py +++ b/profiler/merge_profiling_timeline/main.py @@ -228,7 +228,7 @@ def merge_timeline_events(timeline_file_dict, process_list): new_events.append(event) out_path = f"{args.output}.json" - if os.path.exists(output_path): + if os.path.exists(out_path): print(f"File {out_path} existed before and is now overwritten.") os.remove(out_path) try: -- Gitee From a436512aed2470d4a4213e790ca135b849670ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=96=8C?= Date: Tue, 29 Aug 2023 01:27:13 +0000 Subject: [PATCH 8/8] =?UTF-8?q?update=20profiler/merge=5Fprofiling=5Ftimel?= =?UTF-8?q?ine/README.md.=20=E5=AE=8C=E5=96=84md=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 李斌 --- profiler/merge_profiling_timeline/README.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/profiler/merge_profiling_timeline/README.md b/profiler/merge_profiling_timeline/README.md index 53c0d6a4a1..21e8ca5e26 100644 --- a/profiler/merge_profiling_timeline/README.md +++ b/profiler/merge_profiling_timeline/README.md @@ -47,31 +47,36 @@ ascend pytorch profiler数据目录结构如下: - `pytorch`:通过ascend pytorch方式采集profiling数据,合并所有卡的trace_view.json - `e2e`:通过e2e方式采集profiling数据,优先合并总timeline,没有生成则选择合并device目录下的msprof_*.json - `custom` :自定义需要合并的timeline数据,具体参考示例 -- -o: 可选参数,指定合并后的timeline文件输出的路径(路径末尾可以设置文件名,具体用法参考示例),默认为当前目录 +- -o: 可选参数,指定合并后的timeline文件输出的路径(路径末尾可以设置文件名,具体用法参考示例),不设置该参数的情况下默认文件输出的路径为当前目录(默认文件名为merged.json) - --rank:可选参数,指定需要合并timeline的卡号,默认全部合并 - --items:可选参数,指定需要合并的profiling数据项(python,Ascend_Hardware,CANN,HCCL,PTA,Overlap_Analysis),默认全部合并(item直接使用Ascend Hardware作为参数会被误认为是两个参数,因此作为一个参数时使用'_'连接) **使用示例**: -1、合并单机多卡timeline,默认合并所有卡、所有数据项,生成first.json在path/to/cann_profiling/output/目录下(不设置-o参数时默认生成merge.json在当前目录下: +1、合并单机多卡timeline,默认合并所有卡、所有数据项,生成first.json在path/to/cann_profiling/output/目录下: ``` python3 main.py -i path/to/cann_profiling/ -o path/to/cann_profiling/output/first --type pytorch ``` +2、合并单机多卡timeline,默认合并所有卡、所有数据项,不设置-o参数时默认生成merge.json在当前目录下: -2、合并单机多卡timeline,只合并0卡和1卡: +``` +python3 main.py -i path/to/cann_profiling/ --type pytorch +``` + +3、合并单机多卡timeline,只合并0卡和1卡: ``` python3 main.py -i path/to/cann_profiling/ -o path/to/cann_profiling/output/2p --type pytorch --rank 0,1 ``` -3、合并单机多卡timeline,合并所有卡的CANN层和Ascend_Hardware层数据 +4、合并单机多卡timeline,合并所有卡的CANN层和Ascend_Hardware层数据 ``` python3 main.py -i path/to/cann_profiling/ --type pytorch --items CANN,Ascend_Hardware ``` -4、合并多timeline(自定义) +5、合并多timeline(自定义) 以上场景不支持的情况下,可以使用自定义的合并方式,将需要合并的timeline文件放在同一目录下(附:该场景比较特殊,与正常合并不同,无法直接读取info.json中的rank_id, 因此该场景下的rank_id为默认分配的序号,用于区分不同文件的相同层,不代表实际rank_id) 数据目录结构示意如下: -- Gitee