diff --git a/debug/tools/profiling_analyse/README.md b/debug/tools/profiling_analyse/README.md index 75b22562ce608ac2bfcb857f6911923b56e3620f..d871f019827de5a1fa99368962d0938c54cd4a98 100644 --- a/debug/tools/profiling_analyse/README.md +++ b/debug/tools/profiling_analyse/README.md @@ -33,14 +33,14 @@ gpu上的内存使用可以使用nvidia-smi查看,使用json文件分析时需 ### 调度占比 1、调度占比的求取需先计算调度耗时,调度占比=调度耗时/e2e耗时 * 100%。 2、调度耗时的计算方法有2种,①调度耗时=单步打屏时间-算子耗时-通信不可掩盖耗时,②调度耗时=e2e耗时-计算流执行任务总耗时。 -3、由于”单步打屏时间“需额外记录输入,增加可选输入字段“-ns”,作为用户的可选输入“单步打屏时间”,若无输入,该值使用e2e耗时替代。 +3、由于”单步打屏时间“需额外记录输入,增加可选输入字段“-nlt”,作为用户的可选输入“单步打屏时间”,若无输入,该值使用e2e耗时替代。 ### 内存 1、内存统计的数据来源于ASCEND_PROFILER_OUTPUT/memory_record.csv中的”Total Reserved(MB)“。 2、其值在模型训练趋于稳定时逐渐固定,整体偏差不大,因此输出结果为该列数据的最大值。 ## 样例 -- step1:获取gpu和npu的profiling数据,若没开启memory采集开关,则没有内存使用数据 +- step1:获取gpu和npu的profiling数据,若采集profiling数据时没开启memory采集开关,则没有内存使用数据 -- 运行命令:python profiling_parse.py -g gpu\gpu_trace_device0.json -gs 0.9 -n npu\xxx_ascend_pt -ns 1.2 +- 运行命令:python profiling_parse.py -g gpu\gpu_trace_device0.json -glt 0.9 -n npu\xxx_ascend_pt -nlt 1.2 - 输出结果:可以得到gpu与npu对照的打屏性能拆解数据 diff --git a/debug/tools/profiling_analyse/gpu_parser.py b/debug/tools/profiling_analyse/gpu_parser.py index 9d5ffbe4b407292412f0960bd2a523e71c8a1318..ffcee87edaaafd050b9720ab314895e4f6851481 100644 --- a/debug/tools/profiling_analyse/gpu_parser.py +++ b/debug/tools/profiling_analyse/gpu_parser.py @@ -7,7 +7,7 @@ import parser_helper class GpuProfilingParser: def __init__(self, args): self.trace_events = self.read_profiling_json_file(args.gpu) - self.one_step_time = args.gpu_step + self.one_step_time = args.gpu_log_time self.profiling_info = parser_helper.ProfilingInfo() @staticmethod diff --git a/debug/tools/profiling_analyse/npu_parser.py b/debug/tools/profiling_analyse/npu_parser.py index 7abd5acc04a8e10507b9c4cafa0fb8083be76c42..611343027b89643910ec7a93188c20c8f051091a 100644 --- a/debug/tools/profiling_analyse/npu_parser.py +++ b/debug/tools/profiling_analyse/npu_parser.py @@ -1,5 +1,6 @@ import sys import pandas as pd +from collections import defaultdict import parser_helper @@ -12,7 +13,10 @@ class NpuProfilingParser: self.npu_step_time = npu_step_time def parse_npu_json_events(self): - event_wait_sqe = {} + event_wait_sqe = defaultdict(float) + if not self.npu_json_file: + print('Npu trace json file is not available.') + return min_ts = sys.float_info.max max_ts = sys.float_info.min data = parser_helper.read_json_file(self.npu_json_file) @@ -20,8 +24,7 @@ class NpuProfilingParser: if dic.get('name') == 'EVENT_WAIT_SQE': args = dic.get('args') stream_id = args.get('Stream Id') - event_wait_sqe[stream_id] = (event_wait_sqe[stream_id] + dic.get('dur')) if \ - event_wait_sqe.get(stream_id) else dic.get('dur') + event_wait_sqe[stream_id] += dic.get('dur') if dic.get('ts'): ts = dic.get('ts') min_ts = ts if ts < min_ts else min_ts @@ -30,16 +33,18 @@ class NpuProfilingParser: self.profiling_info.communication_not_overlapped = event_wait_sqe.get(min(event_wait_sqe)) / 1000 / 1000 time_required = (self.profiling_info.cube_time + self.profiling_info.vector_time) + \ self.profiling_info.communication_not_overlapped - self.profiling_info.scheduling_time = (self.npu_step_time - time_required) if self.npu_step_time \ - else (self.profiling_info.e2e_time - time_required) + if self.npu_step_time: + self.profiling_info.scheduling_time = self.npu_step_time - time_required + else: + self.profiling_info.scheduling_time = self.profiling_info.e2e_time - time_required self.profiling_info.scheduling_ratio = self.profiling_info.scheduling_time / self.profiling_info.e2e_time def parse_npu_csv_events(self): - # 读取csv文件 + if not self.npu_summary_file: + print('Npu op summary csv file is not available.') + return info = pd.read_csv(self.npu_summary_file, index_col=None) - # 用一个字典保存各类算子的统计结果 op_statics_result = {} - # cube和vector时间 cube_time = 0.0 vec_time = 0.0 length = len(info['Model ID']) @@ -67,8 +72,14 @@ class NpuProfilingParser: op_statics_result[op_type] = [task_durations, 'cube'] else: op_statics_result[op_type][0] += task_durations - - info = pd.read_csv(self.npu_mem_file, usecols=['Total Reserved(MB)'], index_col=None) - self.profiling_info.memory_used = max(info.get('Total Reserved(MB)')) / 1024 + if not self.npu_mem_file: + print('Npu op memory csv file is not available.') + return + try: + info = pd.read_csv(self.npu_mem_file, usecols=['Total Reserved(MB)'], index_col=None) + except ValueError: + print('Npu profiling data does not contain memory info.') + else: + self.profiling_info.memory_used = max(info.get('Total Reserved(MB)')) / 1024 self.profiling_info.cube_time = cube_time / 10 ** 6 self.profiling_info.vector_time = vec_time / 10 ** 6 diff --git a/debug/tools/profiling_analyse/profiling_parse.py b/debug/tools/profiling_analyse/profiling_parse.py index d95553eb67a68ad667229910ea797403a0dbfc87..f3e891473728008dd0f070f080451b1c61ef8244 100644 --- a/debug/tools/profiling_analyse/profiling_parse.py +++ b/debug/tools/profiling_analyse/profiling_parse.py @@ -11,10 +11,10 @@ from parser_helper import ProfilingInfo def parse_command(): parser = argparse.ArgumentParser() parser.add_argument('-g', '--gpu', required=False, default='', metavar='(FILE)', help='Gpu profiling json file.') - parser.add_argument('-gs', '--gpu_step', required=False, default=0, type=float, help='Gpu one step time(s)') + parser.add_argument('-glt', '--gpu_log_time', required=False, default=0.0, type=float, help='Gpu one step time(s)') parser.add_argument('-n', '--npu', required=False, default='', metavar='(FILE)', help='Npu single core profiling root path.') - parser.add_argument('-ns', '--npu_step', required=False, default='', metavar='(FILE)', type=float, + parser.add_argument('-nlt', '--npu_log_time', required=False, default=0.0, metavar='(FILE)', type=float, help='Npu one step time(s).') return parser.parse_args() @@ -37,7 +37,7 @@ def show_table(gpu_profiling_info, npu_profiling_info): def parse_gpu(args): if args.gpu: - if args.gpu_step < 0: + if args.gpu_log_time < 0: raise ValueError("Gpu one step time shouldn't less than 0.") gpu_parser = GpuProfilingParser(args) gpu_parser.parse_events() @@ -47,16 +47,7 @@ def parse_gpu(args): def parse_npu(args, npu_path): - if not npu_path.get('trace_view'): - print('Npu trace json file is not available.') - return ProfilingInfo() - if not npu_path.get('op_summary'): - print('Npu op summary csv file is not available.') - return ProfilingInfo() - if not npu_path.get('memory_record'): - print('Npu op memory csv file is not available.') - return ProfilingInfo() - npu_parser = NpuProfilingParser(args.npu_step, npu_path) + npu_parser = NpuProfilingParser(args.npu_log_time, npu_path) npu_parser.parse_npu_csv_events() npu_parser.parse_npu_json_events() return npu_parser.profiling_info @@ -71,7 +62,7 @@ def main(): npu_path['trace_view'] = os.path.join(root, file) if file == 'memory_record.csv': npu_path['memory_record'] = os.path.join(root, file) - if 'op_summary' in file: + if 'op_summary_' in file: npu_path['op_summary'] = os.path.join(root, file) show_table(parse_gpu(args), parse_npu(args, npu_path))