diff --git a/profiler/merge_profiling_timeline/README.md b/profiler/merge_profiling_timeline/README.md index 22516739cd7d1a5d0654faae5bc1cea6c3e5e643..21e8ca5e26ad465f20be96cd8913158fc1e770b4 100644 --- a/profiler/merge_profiling_timeline/README.md +++ b/profiler/merge_profiling_timeline/README.md @@ -42,36 +42,41 @@ ascend pytorch profiler数据目录结构如下: 可选参数: -- -d: **必选参数**,profiling数据文件或文件夹路径 +- -i: **必选参数**,profiling数据文件或文件夹路径 - --type: **必选参数**,指定需要合并timeline场景,可选参数有:`pytorch`, `e2e`, `custom` - `pytorch`:通过ascend pytorch方式采集profiling数据,合并所有卡的trace_view.json - `e2e`:通过e2e方式采集profiling数据,优先合并总timeline,没有生成则选择合并device目录下的msprof_*.json - `custom` :自定义需要合并的timeline数据,具体参考示例 -- -o: 可选参数,指定合并后的timeline文件输出的路径(路径末尾可以设置文件名,具体用法参考示例),默认为'-d'输入的路径 +- -o: 可选参数,指定合并后的timeline文件输出的路径(路径末尾可以设置文件名,具体用法参考示例),不设置该参数的情况下默认文件输出的路径为当前目录(默认文件名为merged.json) - --rank:可选参数,指定需要合并timeline的卡号,默认全部合并 -- --items:可选参数,指定需要合并的profiling数据项(python,Ascend Hardware,CANN,HCCL,PTA,Overlap Analysis),默认全部合并 +- --items:可选参数,指定需要合并的profiling数据项(python,Ascend_Hardware,CANN,HCCL,PTA,Overlap_Analysis),默认全部合并(item直接使用Ascend Hardware作为参数会被误认为是两个参数,因此作为一个参数时使用'_'连接) **使用示例**: -1、合并单机多卡timeline,默认合并所有卡、所有数据项,生成first_merge.json在path/to/cann_profiling/output/目录下(不设置-o参数时默认生成_merge.json在数据目录(path/to/cann_profiling/)下: +1、合并单机多卡timeline,默认合并所有卡、所有数据项,生成first.json在path/to/cann_profiling/output/目录下: ``` -python3 main.py -d path/to/cann_profiling/ -o path/to/cann_profiling/output/first --type pytorch +python3 main.py -i path/to/cann_profiling/ -o path/to/cann_profiling/output/first --type pytorch ``` +2、合并单机多卡timeline,默认合并所有卡、所有数据项,不设置-o参数时默认生成merge.json在当前目录下: -2、合并单机多卡timeline,只合并0卡和1卡: +``` +python3 main.py -i path/to/cann_profiling/ --type pytorch +``` + +3、合并单机多卡timeline,只合并0卡和1卡: ``` -python3 main.py -d path/to/cann_profiling/ -o path/to/cann_profiling/output/2p --type pytorch --rank 0,1 +python3 main.py -i path/to/cann_profiling/ -o path/to/cann_profiling/output/2p --type pytorch --rank 0,1 ``` -3、合并单机多卡timeline,合并所有卡的CANN层和Ascend_Hardware层数据 +4、合并单机多卡timeline,合并所有卡的CANN层和Ascend_Hardware层数据 ``` -python3 main.py -d path/to/cann_profiling/ --type pytorch --items CANN,Ascend_Hardware +python3 main.py -i path/to/cann_profiling/ --type pytorch --items CANN,Ascend_Hardware ``` -4、合并多timeline(自定义) +5、合并多timeline(自定义) 以上场景不支持的情况下,可以使用自定义的合并方式,将需要合并的timeline文件放在同一目录下(附:该场景比较特殊,与正常合并不同,无法直接读取info.json中的rank_id, 因此该场景下的rank_id为默认分配的序号,用于区分不同文件的相同层,不代表实际rank_id) 数据目录结构示意如下: @@ -81,11 +86,8 @@ python3 main.py -d path/to/cann_profiling/ --type pytorch --items CANN,Ascend_Ha |- msprof_0.json |- msprof_1.json |- msprof_2.json - |- msprof_3.json - |- step_trace_0.json - |- step_trace_1.json - |- step_trace_2.json - |- step_trace_3.json + |- hccl_3.json + |- hccl_4.json ... ``` @@ -94,12 +96,12 @@ python3 main.py -d path/to/cann_profiling/ --type pytorch --items CANN,Ascend_Ha 通过下面的命令合并所有timeline,同样支持-o、--rank、--items等参数: ``` -python3 main.py -d path/to/timeline/ --type custom +python3 main.py -i path/to/timeline/ -o path/to/timeline/xxx --type custom ``` 合并timeline查看: -> 在 -o 指定的目录(默认在-d指定的目录下)的_merged.json为合并后的文件 +> 在 -o 指定的目录(不设置-o时默认在当前目录下的merged.json)的xxx.json为合并后的文件 ## 2 超大timeline文件查看 diff --git a/profiler/merge_profiling_timeline/main.py b/profiler/merge_profiling_timeline/main.py index 5ac4b89e0c8c25acd266d7b96ac40f7985bf89a1..7fd483a7361fc0b562f934cf3da6a969ef6f0c84 100644 --- a/profiler/merge_profiling_timeline/main.py +++ b/profiler/merge_profiling_timeline/main.py @@ -66,7 +66,7 @@ def get_timeline_info(args, prof_dirs): timeline_info = {} for prof in prof_dirs: - pro_path = os.path.join(args.data, prof) + pro_path = os.path.join(args.input, prof) # 从info.json读取rank_id rank_id = get_rank_id_from_info_json(pro_path) @@ -130,7 +130,7 @@ def get_rank_id_from_info_json(pro_path): def merge_timeline_general(args): """合并e2e profiling生成的msprof*.json""" - prof_dir = get_path_dir(args.data) + prof_dir = get_path_dir(args.input) timeline_info = get_timeline_info(args, prof_dir) timeline_files_dict = {} @@ -153,10 +153,10 @@ def merge_timeline_general(args): def merge_timeline_custom(args): """合并指定目录里所有timeline文件""" - timeline_files = natural_sort(os.listdir(args.data)) + timeline_files = natural_sort(os.listdir(args.input)) timeline_files_dict = {} for idx, timeline_file in enumerate(timeline_files): - timeline_files_dict[idx] = (os.path.join(args.data, timeline_file),0) + timeline_files_dict[idx] = (os.path.join(args.input, timeline_file),0) # 合并部分profiling items process_list = args.items.split(",") if args.items else None merge_timeline_events(timeline_files_dict, process_list) @@ -227,17 +227,23 @@ def merge_timeline_events(timeline_file_dict, process_list): event['id'] = float(event.get('id')) * RANK_ID_POS + rank_id new_events.append(event) - - out_path = f"{args.output}_merged.json" - with open(out_path, 'w') as f: - json.dump(new_events, f) + out_path = f"{args.output}.json" + if os.path.exists(out_path): + print(f"File {out_path} existed before and is now overwritten.") + os.remove(out_path) + try: + with open(out_path, 'w') as f: + json.dump(new_events, f) + except FileNotFoundError: + print(f"Param -o (output path) is not exists, please check it.") + return print(f"timeline merged output path: {out_path}") def parse_args(): parser = ArgumentParser(description="Merge timeline for multi card") - parser.add_argument("--data", "-d", default=None, help="root dir of PROF_* data") - parser.add_argument("--output", "-o", default=None, help="save path of msprof_merged.json ") + parser.add_argument("-i", "--input", default=None, help="root dir of PROF_* data") + parser.add_argument("-o", "--output", default="./merged", help="save path of merged.json ") parser.add_argument("--rank", default=None, help="List of ranks to be merged. By default, all ranks are merged") parser.add_argument("--items", default=None, help="Specify the data items (python,CANN,Ascend Hardware,HCCL,..)to be merged. in the timeline.") parser.add_argument("--type", choices=('pytorch', 'e2e', 'custom'), help="Customize the timeline file to be merged.") @@ -247,9 +253,6 @@ def parse_args(): if __name__ == "__main__": args = parse_args() - - if not args.output: - args.output = args.data print("========================== start merge timeline ====================") if args.type == "custom": merge_timeline_custom(args)