diff --git a/docs/0.quickstart.md b/docs/0.quickstart.md index cdde6f88bc38c341f96738f9b4f6fdfa82090e3e..5dca4ceccea7cfeb094e36f39510fb4318584a91 100644 --- a/docs/0.quickstart.md +++ b/docs/0.quickstart.md @@ -196,7 +196,7 @@ pip install protobuf==3.20.3 ##转换内存oom数据 #拷贝sysTrace_pb2.py到convert目录下,确保该文件和转化脚本在一个目录下 cp /sysTrace/protos/sysTrace_pb2.py /sysTrace/convert -python /sysTrace/convert/convert_mem_to_flamegraph_for_cur.py +python /sysTrace/convert/convert_mem_to_flamegraph_for_cur.py ##转换torch_npu数据 cd /home/sysTrace/timeline diff --git a/systrace/convert/convert_io_to_timeline.py b/systrace/convert/convert_io_to_timeline.py index 43f143c34472f9111eb36230c45020782bf6e04e..d64608bd494c366332e610c7d4306298962c12be 100644 --- a/systrace/convert/convert_io_to_timeline.py +++ b/systrace/convert/convert_io_to_timeline.py @@ -45,14 +45,14 @@ def process_io_file(input_path, trace_data): }) -def aggregate_io_files(output_path): +def aggregate_io_files(input_dir, output_path): trace_data = { "traceEvents": [], "displayTimeUnit": "us", "metadata": {"format": "IO Profiler"} } - pb_files = glob.glob("*.pb") + pb_files = glob.glob(os.path.join(input_dir, "*.pb")) print(f"Found {len(pb_files)} .pb files to process") for pb_file in pb_files: @@ -69,7 +69,8 @@ def aggregate_io_files(output_path): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Aggregate all *.pb files into a Chrome Trace JSON') + parser.add_argument('--input', default='.', help='Input directory containing .pb files (default: current directory)') parser.add_argument('--output', required=True, help='Output JSON file path') args = parser.parse_args() - aggregate_io_files(args.output) \ No newline at end of file + aggregate_io_files(args.input, args.output) \ No newline at end of file diff --git a/systrace/convert/convert_mem_to_flamegraph.py b/systrace/convert/convert_mem_to_flamegraph.py index a26da067abbd3b90a7f2b9bbd508cef5ce236330..244987125d623d8cf827d0f0bbfa4bb82edc8cad 100644 --- a/systrace/convert/convert_mem_to_flamegraph.py +++ b/systrace/convert/convert_mem_to_flamegraph.py @@ -298,16 +298,17 @@ class FixedFlameGraphConverter: if __name__ == "__main__": parser = argparse.ArgumentParser(description='Aggregate all *.pb files into a single JSON') + parser.add_argument('--input', default='.', help='Input directory containing .pb files (default: current directory)') parser.add_argument('--output', required=True, help='Output JSON file path') args = parser.parse_args() output_path = args.output all_events = [] - pb_files = glob.glob("*.pb") + pb_files = glob.glob(os.path.join(args.input, "*.pb")) for i, _ in enumerate(pb_files): processed_files[i] = False - for pb_file in glob.glob("*.pb"): + for pb_file in pb_files: print(f"Processing {pb_file}") converter = FixedFlameGraphConverter() tmp_output = f"{os.path.splitext(pb_file)[0]}_tmp.json" @@ -324,7 +325,7 @@ if __name__ == "__main__": "displayTimeUnit": "ns", "metadata": { "format": "FixedFlameGraph (Aggregated)", - "source_files": glob.glob("*.pb") + "source_files": pb_files } }, f, indent=2) diff --git a/systrace/convert/conver_osprobe_to_timeline.py b/systrace/convert/convert_osprobe_to_timeline.py similarity index 82% rename from systrace/convert/conver_osprobe_to_timeline.py rename to systrace/convert/convert_osprobe_to_timeline.py index 508a7c42fe8f2ac6407f9f7b8609fea5fab7855a..522225799854334c5b90ba91ba345bdf6c8eec5d 100644 --- a/systrace/convert/conver_osprobe_to_timeline.py +++ b/systrace/convert/convert_osprobe_to_timeline.py @@ -4,6 +4,7 @@ import glob import json import argparse import systrace_pb2 +import os from collections import defaultdict event_type_dic = { @@ -32,13 +33,13 @@ def process_single_file(input_path): return [] for entry in osprobe_data.OSprobe_entries: - if entry.OS_event_type in [1, 2, 3]: + if entry.OS_event_type in [1, 2, 3]: cpu_trace_events.append({ "name": event_type_dic[entry.OS_event_type], "cat": "osprobe", "ph": "X", "pid": entry.rank if entry.OS_event_type in [0, 4] else f"Rank: {entry.rank} CPU: {entry.key}", - "tid": f"{entry.comm}: {entry.key}" if entry.OS_event_type in [0, 4] else entry.key , + "tid": f"{entry.comm}: {entry.key}" if entry.OS_event_type in [0, 4] else entry.key, "ts": entry.start_us, "dur": entry.dur, "args": { @@ -82,7 +83,7 @@ def process_single_file(input_path): return trace_events, cpu_trace_events -def aggregate_timeline_files(output_path): +def aggregate_timeline_files(input_dir, output_path): trace_data = { "traceEvents": [], "displayTimeUnit": "ns", @@ -94,26 +95,29 @@ def aggregate_timeline_files(output_path): "displayTimeUnit": "ns", "metadata": {"format": "eBPF OSProbe"} } - timeline_files = glob.glob("*.pb") - print(f"Found {len(timeline_files)} timeline files.") + + timeline_files = glob.glob(os.path.join(input_dir, "*.pb")) + print(f"Found {len(timeline_files)} timeline files in {input_dir}") with Pool(processes=cpu_count()) as pool: - # tqdm 结合 imap_unordered 实现进度显示 - for result, cpu_result in tqdm(pool.imap_unordered(process_single_file, timeline_files), total=len(timeline_files), desc="Processing .pb files"): + for result, cpu_result in tqdm(pool.imap_unordered(process_single_file, timeline_files), + total=len(timeline_files), + desc="Processing .pb files"): trace_data["traceEvents"].extend(result) cpu_trace_data["traceEvents"].extend(cpu_result) with open(output_path, "w") as f: json.dump(trace_data, f, indent=None, separators=(',', ':')) - with open(f"cpu_{output_path}", "w") as f: + with open(f"{output_path}_cpu", "w") as f: json.dump(cpu_trace_data, f, indent=None, separators=(',', ':')) print(f"Aggregated {len(trace_data['traceEvents'])} events to {output_path}") if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Aggregate all *.timeline files into a single JSON') + parser = argparse.ArgumentParser(description='Aggregate all *.pb files into JSON traces') + parser.add_argument('--input', default='.', help='Input directory containing .pb files (default: current directory)') parser.add_argument('--output', required=True, help='Output JSON file path') args = parser.parse_args() - aggregate_timeline_files(args.output) \ No newline at end of file + aggregate_timeline_files(args.input, args.output) \ No newline at end of file diff --git a/systrace/convert/convert_pytorch_to_timeline.py b/systrace/convert/convert_pytorch_to_timeline.py index bb42d8b2d80767786166a7cefc17fb22056017bf..24505a9b58ca7cd6406324fd5b3f543050444ac0 100644 --- a/systrace/convert/convert_pytorch_to_timeline.py +++ b/systrace/convert/convert_pytorch_to_timeline.py @@ -2,6 +2,7 @@ import json import systrace_pb2 import argparse import glob +import os def process_timeline_file(input_path, trace_data): with open(input_path, "rb") as f: @@ -26,14 +27,17 @@ def process_timeline_file(input_path, trace_data): } }) -def aggregate_timeline_files(output_path): +def aggregate_timeline_files(input_dir, output_path): trace_data = { "traceEvents": [], "displayTimeUnit": "ns", "metadata": {"format": "Pytorch Profiler"} } - for timeline_file in glob.glob("*timeline"): + timeline_files = glob.glob(os.path.join(input_dir, "*timeline")) + print(f"Found {len(timeline_files)} timeline files to process") + + for timeline_file in timeline_files: print(f"Processing {timeline_file}") process_timeline_file(timeline_file, trace_data) @@ -45,6 +49,7 @@ def aggregate_timeline_files(output_path): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Aggregate all *.timeline files into a single JSON') + parser.add_argument('--input', default='.', help='Input directory containing timeline files (default: current directory)') parser.add_argument('--output', required=True, help='Output JSON file path') args = parser.parse_args() - aggregate_timeline_files(args.output) \ No newline at end of file + aggregate_timeline_files(args.input, args.output) \ No newline at end of file diff --git a/systrace/src/cann/common_hook.c b/systrace/src/cann/common_hook.c index 8d8410ecc7add04f66f08c4c15403745368209dd..7a401a3a22e06cdd34073ec095031889220a4dc5 100644 --- a/systrace/src/cann/common_hook.c +++ b/systrace/src/cann/common_hook.c @@ -4,8 +4,6 @@ #include #include -#define PATH_MAX 100 - uint64_t get_current_us() { struct timeval tv; gettimeofday(&tv, NULL); @@ -38,7 +36,7 @@ void get_log_filename(char *buf, size_t buf_size, const char *path_suffix) { const char *rank_str = getenv("RANK") ? getenv("RANK") : getenv("RANK_ID"); int rank = rank_str ? atoi(rank_str) : 0; - char path[PATH_MAX] = {0}; + char path[PATH_LEN] = {0}; int ret = snprintf(path, sizeof(path), "%s/%s", SYS_TRACE_ROOT_DIR, path_suffix); if (ret < 0 || (size_t)ret >= sizeof(path)) { snprintf(buf, buf_size, "%s_trace_rank%d.pb", path_suffix, rank); diff --git a/systrace/src/cann/common_hook.h b/systrace/src/cann/common_hook.h index 0bae9beff559d8cd1ec96398fe3ec019ba61fdb9..56b34dd4f6571f5874a6934023113f7c3e1c9e54 100644 --- a/systrace/src/cann/common_hook.h +++ b/systrace/src/cann/common_hook.h @@ -22,6 +22,7 @@ #define LOG_INTERVAL_SEC 5 #define LOG_ITEMS_MIN 10 +#define PATH_LEN 256 uint64_t get_current_us(); const char *get_so_name(uint64_t ip); diff --git a/systrace/src/cann/mem_hook.c b/systrace/src/cann/mem_hook.c index f9cda8fa55aeb51a6c5612905c01f65777f6cc95..6dcc3f50171c839311156411084512945dffc8ca 100644 --- a/systrace/src/cann/mem_hook.c +++ b/systrace/src/cann/mem_hook.c @@ -192,7 +192,7 @@ static void write_protobuf_to_file() if (pthread_mutex_trylock(&file_mutex) == 0) { // pthread_mutex_trylock or pthread_mutex_lock char filename[256]; - get_log_filename(filename, sizeof(filename), "mem_trace"); + get_log_filename(filename, sizeof(filename), "hbm_trace"); size_t len = proc_mem__get_packed_size(td->proc_mem); buf = malloc(len);