diff --git a/systrace/CMakeLists.txt b/systrace/CMakeLists.txt index 9d90576b805d8fd45c2479771b747da2c8b38dbc..bcefed6b1691b7ffd145af2a62db95ff58d850de 100644 --- a/systrace/CMakeLists.txt +++ b/systrace/CMakeLists.txt @@ -44,6 +44,7 @@ target_include_directories(common PUBLIC ${PROJECT_SOURCE_DIR}/include ${Python3 add_subdirectory(protos) +add_subdirectory(src/os) add_library(sysTrace_hook SHARED ${PROJECT_SOURCE_DIR}/include/common/shared_constants.c @@ -70,6 +71,7 @@ target_link_libraries(sysTrace_hook protobuf::libprotobuf ${CMAKE_THREAD_LIBS} pthread + os_probe jsoncpp -ldl ) diff --git a/systrace/convert/conver_osprobe_to_timeline.py b/systrace/convert/conver_osprobe_to_timeline.py new file mode 100644 index 0000000000000000000000000000000000000000..9d7924293583c3e2e4f0019e8d99efbd39b01b38 --- /dev/null +++ b/systrace/convert/conver_osprobe_to_timeline.py @@ -0,0 +1,62 @@ +import json +import systrace_pb2 +import argparse +import glob + +event_type_dic = { + 0: "mm_fault", + 1: "swap_page", + 2: "compaction", + 3: "vmscan", + 4: "offcpu", + 5: "unknown" + } + +def process_timeline_file(input_path, trace_data): + last_delay = {} + delay = 0; + with open(input_path, "rb") as f: + osprobe_data = systrace_pb2.OSprobe() + osprobe_data.ParseFromString(f.read()) + + for entry in osprobe_data.OSprobe_entries: + if entry.OS_event_type == 4: + if f"{entry.comm}: {entry.key}" in last_delay: + delay = entry.rundelay - last_delay[f"{entry.comm}: {entry.key}"] + last_delay[f"{entry.comm}: {entry.key}"] = entry.rundelay + + trace_data["traceEvents"].append({ + "name": event_type_dic[entry.OS_event_type], + "cat": "osprobe", + "ph": "X", + "pid": entry.rank if entry.OS_event_type in [0, 4] else f"CPU: {entry.key}", + "tid": f"{entry.comm}: {entry.key}" if entry.OS_event_type in [0, 4] else entry.key , + "ts": entry.start_us, + "dur": entry.dur, + "args": { + "cpu_rundelay": delay + } if entry.OS_event_type == 4 else {} + }) + +def aggregate_timeline_files(output_path): + trace_data = { + "traceEvents": [], + "displayTimeUnit": "ns", + "metadata": {"format": "eBPF OSProbe"} + } + + for timeline_file in glob.glob("*.pb"): + print(f"Processing {timeline_file}") + process_timeline_file(timeline_file, trace_data) + + # trace_data["traceEvents"].sort(key=lambda x: x["args"]["stage_id"]) + + with open(output_path, "w") as f: + json.dump(trace_data, f, indent=None, separators=(',', ':')) + print(f"Aggregated {len(trace_data['traceEvents'])} events to {output_path}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Aggregate all *.timeline files into a single JSON') + parser.add_argument('--output', required=True, help='Output JSON file path') + args = parser.parse_args() + aggregate_timeline_files(args.output) \ No newline at end of file diff --git a/systrace/protos/systrace.proto b/systrace/protos/systrace.proto index aa0e8f71b85f36be112326aff86ada54bbd19bb4..debbaa629cb6c0f55736bd44ae1a7064b7073077 100644 --- a/systrace/protos/systrace.proto +++ b/systrace/protos/systrace.proto @@ -59,4 +59,18 @@ message Pytorch { message Mem { repeated ProcMem proc_mem = 1; +} + +message OSprobe { + repeated OSprobeEntry OSprobe_entries = 1; +} + +message OSprobeEntry { + uint32 key = 1; + uint64 start_us = 2; + uint64 dur = 3; + uint64 rundelay = 4; + uint32 OS_event_type = 5; + uint32 rank = 6; + string comm = 7; } \ No newline at end of file diff --git a/systrace/src/os/CMakeLists.txt b/systrace/src/os/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a43ac7eb040c816971c3526873bedd6c7a6b7d60 --- /dev/null +++ b/systrace/src/os/CMakeLists.txt @@ -0,0 +1,66 @@ +cmake_minimum_required(VERSION 3.10) +project(os_probe) +set(CMAKE_VERBOSE_MAKEFILE ON) + +# Set compiler flags +set(CMAKE_C_FLAGS "-g -O2 -Wall -fPIC -std=gnu11") +set(CMAKE_CXX_FLAGS "-std=c++11 -g -O2 -Wall -fPIC") +set(BPFTOOL bpftool) +# Find required libraries +find_package(PkgConfig REQUIRED) +pkg_check_modules(LIBBPF REQUIRED libbpf) +pkg_check_modules(PROTOBUF_C REQUIRED libprotobuf-c) + +# BPF compilation +set(BPF_SOURCES + ${PROJECT_SOURCE_DIR}/os_cpu.bpf.c + ${PROJECT_SOURCE_DIR}/os_mem.bpf.c +) + +add_custom_target(ebpf_kern + COMMAND bpftool btf dump file /sys/kernel/btf/vmlinux format c > ${PROJECT_SOURCE_DIR}/vmlinux.h +) + +foreach(bpf_src ${BPF_SOURCES}) + get_filename_component(bpf_name ${bpf_src} NAME_WE) + add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/${bpf_name}.bpf.o + COMMAND clang -target bpf -g -O2 -I${PROJECT_SOURCE_DIR} -c ${bpf_src} -o ${PROJECT_SOURCE_DIR}/${bpf_name}.bpf.o + DEPENDS ${bpf_src} + COMMENT "Building BPF object ${bpf_name}" + ) + list(APPEND BPF_OBJECTS ${PROJECT_SOURCE_DIR}/${bpf_name}.bpf.o) +endforeach() + +foreach(bpf_obj ${BPF_OBJECTS}) + get_filename_component(bpf_obj_name ${bpf_obj} NAME_WE) + add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/${bpf_obj_name}.skel.h + COMMAND ${BPFTOOL} gen skeleton ${bpf_obj} > ${PROJECT_SOURCE_DIR}/${bpf_obj_name}.skel.h + DEPENDS ${bpf_obj} ${BPF_OBJECTS} + ) + list(APPEND BPF_SKEL_H ${PROJECT_SOURCE_DIR}/${bpf_obj_name}.skel.h) +endforeach() + +add_custom_target(generate_skel_h_${PROJECT_NAME} ALL DEPENDS ebpf_kern ${BPF_OBJECTS} ${BPF_SKEL_H}) + +# Main source files +set(SOURCES + os_probe.c +) + +# Create shared library +add_library(os_probe OBJECT ${SOURCES}) +add_dependencies(os_probe generate_skel_h_${PROJECT_NAME}) + +# Link libraries +target_link_libraries(os_probe + ${LIBBPF_LIBRARIES} + ${PROTOBUF_C_LIBRARIES} + pthread + z + stdc++ +) + +# Install +install(TARGETS os_probe LIBRARY DESTINATION lib) diff --git a/systrace/src/os/__bpf_kern.h b/systrace/src/os/__bpf_kern.h new file mode 100644 index 0000000000000000000000000000000000000000..31df4371016756c37454eafb3dfaf27c8e9994f1 --- /dev/null +++ b/systrace/src/os/__bpf_kern.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * sysTrace licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.lu + * Create: 2021-09-28 + * Description: bpf header + ******************************************************************************/ +#ifndef __GOPHER_BPF_KERN_H__ +#define __GOPHER_BPF_KERN_H__ + +#ifdef BPF_PROG_KERN + +#include "vmlinux.h" +#include +#include +#include + +#define bpf_section(NAME) __attribute__((section(NAME), used)) + +#define KPROBE(func, type) \ + bpf_section("kprobe/" #func) \ + int bpf_##func(struct type *ctx) + +#define KRETPROBE(func, type) \ + bpf_section("kretprobe/" #func) \ + int bpf_ret_##func(struct type *ctx) + +#define KRAWTRACE(func, type) \ + bpf_section("raw_tracepoint/" #func) \ + int bpf_raw_trace_##func(struct type *ctx) + +#endif +#endif \ No newline at end of file diff --git a/systrace/src/os/__compat.h b/systrace/src/os/__compat.h new file mode 100644 index 0000000000000000000000000000000000000000..c69fc9ef66d698c0041d316c2acc94c6e502ee70 --- /dev/null +++ b/systrace/src/os/__compat.h @@ -0,0 +1,206 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * gala-gopher licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Yang Hanlin + * Create: 2023-09-19 + * Description: Compatibility APIs for eBPF probes + ******************************************************************************/ + +#ifndef __GOPHER_COMPAT_H__ +#define __GOPHER_COMPAT_H__ + +#if defined(BPF_PROG_KERN) || defined(BPF_PROG_USER) +#include + +#if defined(BPF_PROG_KERN) +#include "vmlinux.h" +#endif + +#include "__feat_probe.h" + +#define MAX_DATA_SIZE 10240 + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, MAX_DATA_SIZE); +} heap SEC(".maps"); + +#endif + +#if !defined(BPF_PROG_KERN) && !defined(BPF_PROG_USER) +#include +#include +#include + +#include "__feat_probe.h" + +#define PERF_BUFFER_PAGES 64 + +typedef int (*bpf_buffer_sample_fn)(void *ctx, void *data, u32 size); +typedef void (*bpf_buffer_lost_fn)(void *ctx, int cpu, u64 cnt); + +struct bpf_buffer +{ + struct bpf_map *map; + void *inner; + bpf_buffer_sample_fn fn; + void *ctx; + int type; +}; + +static void __perfbuf_sample_fn(void *ctx, int cpu, void *data, __u32 size) +{ + struct bpf_buffer *buffer = (struct bpf_buffer *)ctx; + bpf_buffer_sample_fn fn; + + fn = buffer->fn; + if (!fn) { + return; + } + + (void)fn(buffer->ctx, data, size); +} + +static inline int bpf_buffer__reset(struct bpf_map *map, struct bpf_map *heap) +{ + bool use_ringbuf; + int type; + + use_ringbuf = probe_ringbuf(); + if (use_ringbuf) { + bpf_map__set_autocreate(heap, false); + bpf_map__set_type(map, BPF_MAP_TYPE_RINGBUF); + type = BPF_MAP_TYPE_RINGBUF; + } else { + bpf_map__set_autocreate(heap, true); + bpf_map__set_type(map, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + bpf_map__set_key_size(map, sizeof(int)); + bpf_map__set_value_size(map, sizeof(int)); + type = BPF_MAP_TYPE_PERF_EVENT_ARRAY; + } + + return type; +} + +#define MAX_RB_MAP_SZ 32 +static inline int bpf_buffer__set_max_entries(struct bpf_map *map, struct bpf_buffer *buffer, unsigned char map_size_mb) +{ + + if (buffer == NULL || buffer->type != BPF_MAP_TYPE_RINGBUF) { + return 0; + } + + if (map_size_mb == 0 || map_size_mb > MAX_RB_MAP_SZ) { + return -1; + } + + u32 max_entries = map_size_mb * 1024 * 1024; + return bpf_map__set_max_entries(map, max_entries); +} + +static inline struct bpf_buffer *bpf_buffer__new(struct bpf_map *map, struct bpf_map *heap) +{ + struct bpf_buffer *buffer; + int type; + + type = bpf_buffer__reset(map, heap); + buffer = (struct bpf_buffer *)calloc(1, sizeof(*buffer)); + if (!buffer) { + errno = ENOMEM; + return NULL; + } + + buffer->map = map; + buffer->type = type; + return buffer; +} + +static inline struct bpf_buffer *bpf_buffer__new_shared(struct bpf_map *map, struct bpf_map *heap, struct bpf_buffer **buffer_ptr) +{ + struct bpf_buffer *buffer; + + if (*buffer_ptr != NULL) { + (void)bpf_buffer__reset(map, heap); + return *buffer_ptr; + } + + buffer = bpf_buffer__new(map, heap); + *buffer_ptr = buffer; + return buffer; +} + +static inline int bpf_buffer__open(struct bpf_buffer *buffer, bpf_buffer_sample_fn sample_cb, bpf_buffer_lost_fn lost_cb, void *ctx) +{ + int fd, type; + void *inner; + + if (buffer == NULL) { + return -1; + } + + fd = bpf_map__fd(buffer->map); + type = buffer->type; + + switch (type) { + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + buffer->fn = sample_cb; + buffer->ctx = ctx; + inner = perf_buffer__new(fd, PERF_BUFFER_PAGES, __perfbuf_sample_fn, lost_cb, buffer, NULL); + break; + case BPF_MAP_TYPE_RINGBUF: + inner = ring_buffer__new(fd, (ring_buffer_sample_fn) sample_cb, ctx, NULL); + break; + default: + return 0; + } + + long err = libbpf_get_error(inner); + if (err) { + return err; + } + + buffer->inner = inner; + return 0; +} + +static inline int bpf_buffer__poll(struct bpf_buffer *buffer, int timeout_ms) +{ + switch (buffer->type) + { + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + return perf_buffer__poll((struct perf_buffer *)buffer->inner, timeout_ms); + case BPF_MAP_TYPE_RINGBUF: + return ring_buffer__poll((struct ring_buffer *)buffer->inner, timeout_ms); + default: + return -EINVAL; + } +} + +static inline void bpf_buffer__free(struct bpf_buffer *buffer) +{ + if (!buffer) { + return; + } + + switch (buffer->type) { + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + perf_buffer__free((struct perf_buffer *)buffer->inner); + break; + case BPF_MAP_TYPE_RINGBUF: + ring_buffer__free((struct ring_buffer *)buffer->inner); + break; + } + free(buffer); +} +#endif + +#endif diff --git a/systrace/src/os/__feat_probe.h b/systrace/src/os/__feat_probe.h new file mode 100644 index 0000000000000000000000000000000000000000..a04f2c67bb8b21193e5b901a12dc64374d29d18c --- /dev/null +++ b/systrace/src/os/__feat_probe.h @@ -0,0 +1,73 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * sysTrace licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Yang Hanlin + * Create: 2023-09-18 + * Description: Utility functions for feature probes + ******************************************************************************/ + +#ifndef __GOPHER_FEAT_PROBE_H__ +#define __GOPHER_FEAT_PROBE_H__ + +#if defined(BPF_PROG_KERN) || defined(BPF_PROG_USER) +#include +#include +#endif + +#ifdef BPF_PROG_KERN +#include "vmlinux.h" +#elif defined(BPF_PROG_USER) +struct bpf_ringbuf { +}; +#endif + +#if !defined(BPF_PROG_KERN) && !defined(BPF_PROG_USER) +#include +#include +#include +#include +#include + +#include "common.h" +#endif + +/* BPF_MAP_TYPE_RINGBUF original defined in /usr/include/linux/bpf.h, which from kernel-headers + if BPF_MAP_TYPE_RINGBUF wasn't defined, this kernel does not support using ringbuf */ +#ifndef BPF_MAP_TYPE_RINGBUF +#define BPF_MAP_TYPE_RINGBUF 27 // defined here to avoid compile error in lower kernel version +#define IS_RINGBUF_DEFINED 0 +#else +#define IS_RINGBUF_DEFINED 1 +#endif + +#if defined(BPF_PROG_KERN) || defined(BPF_PROG_USER) +static inline char probe_ringbuf() +{ +#if CLANG_VER_MAJOR >= 12 + return (char)bpf_core_type_exists(struct bpf_ringbuf); +#else + return IS_RINGBUF_DEFINED; +#endif +} +#endif +#if !defined(BPF_PROG_KERN) && !defined(BPF_PROG_USER) +static inline bool probe_ringbuf() { + int map_fd; + + if ((map_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, getpagesize(), NULL)) < 0) { + return false; + } + + close(map_fd); + return true; +} +#endif + +#endif diff --git a/systrace/src/os/__libbpf.h b/systrace/src/os/__libbpf.h new file mode 100644 index 0000000000000000000000000000000000000000..4aa3eca4536f04e5d2c12d0c108caafeae9c0dbf --- /dev/null +++ b/systrace/src/os/__libbpf.h @@ -0,0 +1,334 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * sysTrace licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.lu + * Create: 2021-09-28 + * Description: bpf header + ******************************************************************************/ +#ifndef __GOPHER_LIB_BPF_H__ +#define __GOPHER_LIB_BPF_H__ + +#pragma once + +#if !defined( BPF_PROG_KERN ) && !defined( BPF_PROG_USER ) + +#include +#include +#include +#include +#include "common.h" +#include "__compat.h" + +#define EBPF_RLIM_LIMITED RLIM_INFINITY +#define EBPF_RLIM_INFINITY (~0UL) +#ifndef EINTR +#define EINTR 4 +#endif + +static __always_inline int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level == LIBBPF_WARN) + return vfprintf(stderr, format, args); + + return 0; +} + +static __always_inline int set_memlock_rlimit(unsigned long limit) +{ + struct rlimit rlim_new = { + .rlim_cur = limit, + .rlim_max = limit, + }; + + if (setrlimit(RLIMIT_MEMLOCK, (const struct rlimit *)&rlim_new) != 0) { + (void)fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + return 0; + } + return 1; +} + +#define GET_MAP_OBJ(probe_name, map_name) (probe_name##_skel->maps.map_name) +#define GET_MAP_FD(probe_name, map_name) bpf_map__fd(probe_name##_skel->maps.map_name) +#define GET_PROG_FD(probe_name, prog_name) bpf_program__fd(probe_name##_skel->progs.prog_name) +#define GET_PROGRAM_OBJ(probe_name, prog_name) (probe_name##_skel->progs.prog_name) + +#define GET_MAP_FD_BY_SKEL(skel, probe_name, map_name) \ + bpf_map__fd(((struct probe_name##_bpf *)(skel))->maps.map_name) +#define GET_PROG_OBJ_BY_SKEL(skel, probe_name) \ + (((struct probe_name##_bpf *)(skel))->obj) + +#define BPF_OBJ_GET_MAP_FD(obj, map_name) \ + ({ \ + int __fd = -1; \ + struct bpf_map *__map = bpf_object__find_map_by_name((obj), (map_name)); \ + if (__map) { \ + __fd = bpf_map__fd(__map); \ + } \ + __fd; \ + }) + +#define BPF_OBJ_PIN_MAP_PATH(obj, map_name, path) \ + ({ \ + int __ret = -1; \ + struct bpf_map *__map = bpf_object__find_map_by_name((obj), (map_name)); \ + if (__map) { \ + __ret = bpf_map__set_pin_path(__map, path); \ + } \ + __ret; \ + }) + + +#define __MAP_SET_PIN_PATH(probe_name, map_name, map_path) \ + do { \ + int ret; \ + struct bpf_map *__map; \ + \ + __map = GET_MAP_OBJ(probe_name, map_name); \ + ret = bpf_map__set_pin_path(__map, map_path); \ + printf("======>SHARE map(" #map_name ") set pin path \"%s\"(ret=%d).\n", map_path, ret); \ + } while (0) + +#define GET_PROC_MAP_PIN_PATH(app_name) ("/sys/fs/bpf/sysTrace/__"#app_name"_proc_map") + +#define INIT_BPF_APP(app_name, limit) \ + static char __init = 0; \ + do { \ + if (!__init) { \ + /* Set up libbpf printfs and printf printf callback */ \ + (void)libbpf_set_print(libbpf_print_fn); \ + \ + /* Bump RLIMIT_MEMLOCK allow BPF sub-system to do anything */ \ + if (set_memlock_rlimit(limit) == 0) { \ + printf("BPF app(" #app_name ") failed to set mem limit.\n"); \ + return -1; \ + } \ + __init = 1; \ + } \ + } while (0) + +#define LOAD(app_name, probe_name, end) \ + struct probe_name##_bpf *probe_name##_skel = NULL; \ + struct bpf_link *probe_name##_link[PATH_NUM] __maybe_unused = {NULL}; \ + int probe_name##_link_current = 0; \ + do { \ + int err; \ + /* Open load and verify BPF application */ \ + probe_name##_skel = probe_name##_bpf__open(); \ + if (!probe_name##_skel) { \ + printf("Failed to open BPF " #probe_name " skeleton\n"); \ + goto end; \ + } \ + if (probe_name##_bpf__load(probe_name##_skel)) { \ + printf("Failed to load BPF " #probe_name " skeleton\n"); \ + goto end; \ + } \ + /* Attach tracepoint handler */ \ + err = probe_name##_bpf__attach(probe_name##_skel); \ + if (err) { \ + printf("Failed to attach BPF " #probe_name " skeleton\n"); \ + probe_name##_bpf__destroy(probe_name##_skel); \ + probe_name##_skel = NULL; \ + goto end; \ + } \ + printf("Succeed to load and attach BPF " #probe_name " skeleton\n"); \ + } while (0) + +#define __OPEN_OPTS(probe_name, end, load, opts) \ + struct probe_name##_bpf *probe_name##_skel = NULL; \ + struct bpf_link *probe_name##_link[PATH_NUM] __maybe_unused = {NULL}; \ + int probe_name##_link_current = 0; \ + do { \ + if (load) \ + {\ + /* Open load and verify BPF application */ \ + probe_name##_skel = probe_name##_bpf__open_opts(opts); \ + if (!probe_name##_skel) { \ + printf("Failed to open BPF " #probe_name " skeleton\n"); \ + goto end; \ + } \ + }\ + } while (0) + +#define OPEN(probe_name, end, load) __OPEN_OPTS(probe_name, end, load, NULL) + +#define OPEN_OPTS(probe_name, end, load) __OPEN_OPTS(probe_name, end, load, &probe_name##_open_opts) + +#define MAP_SET_PIN_PATH(probe_name, map_name, map_path, load) \ + do { \ + if (load) \ + { \ + __MAP_SET_PIN_PATH(probe_name, map_name, map_path); \ + } \ + } while (0) + +#define MAP_INIT_BPF_BUFFER(probe_name, map_name, buffer, load) \ + do { \ + if (load) { \ + buffer = bpf_buffer__new(probe_name##_skel->maps.map_name, probe_name##_skel->maps.heap); \ + if (buffer == NULL) { \ + printf("Failed to initialize bpf_buffer for " #map_name " in " #probe_name "\n"); \ + } \ + } \ + } while (0) + +#define MAP_INIT_BPF_BUFFER_SHARED(probe_name, map_name, buffer_ptr, load) \ + do { \ + if (load) { \ + (void)bpf_buffer__new_shared(probe_name##_skel->maps.map_name, probe_name##_skel->maps.heap, (buffer_ptr)); \ + if (*(buffer_ptr) == NULL) { \ + printf("Failed to initialize bpf_buffer for " #map_name " in " #probe_name "\n"); \ + } \ + } \ + } while (0) + +#define LOAD_ATTACH(app_name, probe_name, end, load) \ + do { \ + if (load) \ + { \ + int err; \ + if (probe_name##_bpf__load(probe_name##_skel)) { \ + printf("Failed to load BPF " #probe_name " skeleton\n"); \ + goto end; \ + } \ + /* Attach tracepoint handler */ \ + err = probe_name##_bpf__attach(probe_name##_skel); \ + if (err) { \ + printf("Failed to attach BPF " #probe_name " skeleton\n"); \ + probe_name##_bpf__destroy(probe_name##_skel); \ + probe_name##_skel = NULL; \ + goto end; \ + } \ + printf("Succeed to load and attach BPF " #probe_name " skeleton\n"); \ + } \ + } while (0) + +#define UNLOAD(probe_name) \ + do { \ + int err; \ + if (probe_name##_skel != NULL) { \ + probe_name##_bpf__destroy(probe_name##_skel); \ + } \ + for (int i = 0; i < probe_name##_link_current; i++) { \ + err = bpf_link__destroy(probe_name##_link[i]); \ + if (err < 0) { \ + printf("Failed to detach BPF " #probe_name " %d\n", err); \ + break; \ + } \ + } \ + } while (0) + +#define INIT_OPEN_OPTS(probe_name) \ + LIBBPF_OPTS(bpf_object_open_opts, probe_name##_open_opts) + +static __always_inline __maybe_unused void poll_pb(struct perf_buffer *pb, int timeout_ms) +{ + int ret; + + while ((ret = perf_buffer__poll(pb, timeout_ms)) >= 0 || ret == -EINTR) { + ; + } + return; +} + +#define SKEL_MAX_NUM 20 +typedef void (*skel_destroy_fn)(void *); + +struct __bpf_skel_s { + skel_destroy_fn fn; + void *skel; + void *_link[PATH_NUM]; + size_t _link_num; +}; +struct bpf_prog_s { + struct perf_buffer* pb; + struct ring_buffer* rb; + struct bpf_buffer *buffer; + struct perf_buffer* pbs[SKEL_MAX_NUM]; + struct ring_buffer* rbs[SKEL_MAX_NUM]; + struct bpf_buffer *buffers[SKEL_MAX_NUM]; + struct __bpf_skel_s skels[SKEL_MAX_NUM]; + const char *custom_btf_paths[SKEL_MAX_NUM]; + size_t num; +}; + +static __always_inline __maybe_unused void free_bpf_prog(struct bpf_prog_s *prog) +{ + (void)free(prog); +} + +static __always_inline __maybe_unused struct bpf_prog_s *alloc_bpf_prog(void) +{ + struct bpf_prog_s *prog = malloc(sizeof(struct bpf_prog_s)); + if (prog == NULL) { + return NULL; + } + + (void)memset(prog, 0, sizeof(struct bpf_prog_s)); + return prog; +} + +static __always_inline __maybe_unused void unload_bpf_prog(struct bpf_prog_s **unload_prog) +{ + struct bpf_prog_s *prog = *unload_prog; + + *unload_prog = NULL; + if (prog == NULL) { + return; + } + + for (int i = 0; i < prog->num; i++) { + if (prog->skels[i].skel) { + prog->skels[i].fn(prog->skels[i].skel); + + for (int j = 0; j < prog->skels[i]._link_num; j++) { + if (prog->skels[i]._link[j]) { + (void)bpf_link__destroy(prog->skels[i]._link[j]); + } + } + } + + if (prog->pbs[i]) { + perf_buffer__free(prog->pbs[i]); + } + +#if (CURRENT_LIBBPF_VERSION >= LIBBPF_VERSION(0, 8)) + if (prog->rbs[i]) { + ring_buffer__free(prog->rbs[i]); + } +#endif + + if (prog->buffers[i]) { + bpf_buffer__free(prog->buffers[i]); + } + + free((char *)prog->custom_btf_paths[i]); + } + + if (prog->pb) { + perf_buffer__free(prog->pb); + } + +#if (CURRENT_LIBBPF_VERSION >= LIBBPF_VERSION(0, 8)) + if (prog->rb) { + ring_buffer__free(prog->rb); + } +#endif + + if (prog->buffer) { + bpf_buffer__free(prog->buffer); + } + + free_bpf_prog(prog); + return; +} + + +#endif +#endif diff --git a/systrace/src/os/bpf.h b/systrace/src/os/bpf.h new file mode 100644 index 0000000000000000000000000000000000000000..48c0e7ed14fa1585cdcaa4b48e0ba9c5809efc43 --- /dev/null +++ b/systrace/src/os/bpf.h @@ -0,0 +1,30 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * sysTrace licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.lu + * Create: 2021-09-28 + * Description: bpf header + ******************************************************************************/ +#ifndef __GOPHER_BPF_H__ +#define __GOPHER_BPF_H__ + +#pragma once + +#include "common.h" + +#define LIBBPF_VERSION(a, b) (((a) << 8) + (b)) +#define CURRENT_LIBBPF_VERSION LIBBPF_VERSION(LIBBPF_VER_MAJOR, LIBBPF_VER_MINOR) + +#include "__libbpf.h" +#include "__bpf_kern.h" +#include "__feat_probe.h" +#include "__compat.h" + +#endif diff --git a/systrace/src/os/bpf_comm.h b/systrace/src/os/bpf_comm.h new file mode 100644 index 0000000000000000000000000000000000000000..52870c876793fcc9a8216a746e5e3c8ae1dd9951 --- /dev/null +++ b/systrace/src/os/bpf_comm.h @@ -0,0 +1,211 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * sysTrace licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: wo_cow + * Create: 2025-04-30 + * Description: + ******************************************************************************/ + +#ifndef __BPF_COMMON_H__ +#define __BPF_COMMON_H__ +#include "bpf.h" + +#include "os_probe.h" + +#define MAX_SIZE_OF_PROC 128 +#define MAX_SIZE_OF_THREAD (128 * MAX_SIZE_OF_PROC) +#define PF_IDLE 0x00000002 /* IDLE thread */ +#define PF_KTHREAD 0x00200000 /* kernel thread */ + +typedef struct { + u32 pid; + u32 tgid; +} offcpu_task_key_s; + +typedef struct { + int pid; + int rank; + __u64 start_time; + __u64 end_time; + __u64 delay; +} task_cpu_s; + +typedef struct { + int key; + event_type_e event; +} comm_mem_task_key_s; + +typedef struct { + event_type_e event; + u32 pid; + u32 tgid; +} fault_task_key_s; + +typedef struct { + event_type_e event; + __u32 key; + __u64 start_ts; + int rank; +} task_mem_s; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64); +} osprobe_map_0 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64); +} osprobe_map_1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64); +} osprobe_map_2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64); +} osprobe_map_3 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64); +} osprobe_map_4 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64); +} osprobe_map_5 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64); +} osprobe_map_6 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64); +} osprobe_map_7 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, 16); + __uint(value_size, sizeof(int)); + __uint(max_entries, 128); +} kernel_filter_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 128); +} proc_filter_map SEC(".maps"); + +static __always_inline void emit_event(trace_event_data_t *event, void *ctx) +{ + if (!event) { + return; + } + switch (event->rank) { + case 0: + bpf_ringbuf_output(&osprobe_map_0, event, sizeof(*event), 0); + break; + case 1: + bpf_ringbuf_output(&osprobe_map_1, event, sizeof(*event), 0); + break; + case 2: + bpf_ringbuf_output(&osprobe_map_2, event, sizeof(*event), 0); + break; + case 3: + bpf_ringbuf_output(&osprobe_map_3, event, sizeof(*event), 0); + break; + case 4: + bpf_ringbuf_output(&osprobe_map_4, event, sizeof(*event), 0); + break; + case 5: + bpf_ringbuf_output(&osprobe_map_5, event, sizeof(*event), 0); + break; + case 6: + bpf_ringbuf_output(&osprobe_map_6, event, sizeof(*event), 0); + break; + case 7: + bpf_ringbuf_output(&osprobe_map_7, event, sizeof(*event), 0); + break; + default: + break; + } +} + +static __always_inline void create_cur_event(trace_event_data_t *cur_event, int key, + u64 start_time, u64 end_time, int rank, event_type_e type) +{ + if (cur_event == NULL) { + return; + } + __builtin_memset(cur_event, 0, sizeof(*cur_event)); + cur_event->key = key; + cur_event->start_time = start_time; + cur_event->end_time = end_time; + cur_event->duration = end_time - start_time; + cur_event->type = type; + cur_event->rank = rank; +} + +static __always_inline char is_filter_task(struct task_struct *task) +{ + unsigned int flags = BPF_CORE_READ(task, flags); + return (char)((flags & PF_IDLE) || (flags & PF_KTHREAD)); +} + +static __always_inline int get_npu_id(struct task_struct *task) +{ + u32 pid = BPF_CORE_READ(task, pid); + + // 匹配python主线程 + int *rank; + rank = bpf_map_lookup_elem(&proc_filter_map, &pid); + if (rank) { + return *rank; + } + + // 匹配内核dev线程 + char comm[16] = {}; + bpf_core_read_str(comm, sizeof(comm), &task->comm); + // bpf_get_current_comm(&comm, sizeof(comm)); + rank = bpf_map_lookup_elem(&kernel_filter_map, comm); + if (rank) { + bpf_printk("is kernel thread:%s, pid is %lu", comm, pid); + return *rank; + } + + // 匹配ACL线程 + int match = 1; + const char target[] = "ACL_thread"; + for (int i = 0; i < sizeof(target); i++) { + if (comm[i] != target[i]) { + match = 0; + break; + } + } + if (match) { + u32 tgid = BPF_CORE_READ(task, tgid); + rank = bpf_map_lookup_elem(&proc_filter_map, &tgid); + if (rank) { + return *rank; + } + } + + // 全都不匹配返回-1 + return -1; + +} + +#endif diff --git a/systrace/src/os/common.h b/systrace/src/os/common.h new file mode 100644 index 0000000000000000000000000000000000000000..95a6dbd08a5b3ef41e2c2fb02da3ef712e4f5b3c --- /dev/null +++ b/systrace/src/os/common.h @@ -0,0 +1,70 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * sysTrace licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.lu + * Create: 2022-5-30 + * Description: common macro define + ******************************************************************************/ +#ifndef __GOPHER_COMMON_H__ +#define __GOPHER_COMMON_H__ + +#pragma once + +#define THOUSAND 1000 +#define PATH_NUM 20 + +#define __maybe_unused __attribute__((unused)) + +#define MSEC_PER_SEC 1000L +#define USEC_PER_MSEC 1000L +#define NSEC_PER_USEC 1000L +#define NSEC_PER_MSEC 1000000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L +#define FSEC_PER_SEC 1000000000000000LL + +#ifndef __u8 +typedef unsigned char __u8; +typedef __u8 u8; +#endif + +#ifndef __s8 +typedef signed char __s8; +typedef __s8 s8; +#endif + +#ifndef __s16 +typedef signed short __s16; +typedef __s16 s16; +#endif + +#ifndef __u16 +typedef short unsigned int __u16; +typedef __u16 u16; +typedef __u16 __be16; +#endif + +#ifndef __u32 +typedef unsigned int __u32; +typedef __u32 u32; +typedef __u32 __be32; +typedef __u32 __wsum; +#endif + +#ifndef __s64 +typedef long long int __s64; +typedef __s64 s64; +#endif + +#ifndef __u64 +typedef long long unsigned int __u64; +typedef __u64 u64; +#endif +#endif diff --git a/systrace/src/os/os_cpu.bpf.c b/systrace/src/os/os_cpu.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..8cf874113b69ac9e5bcac2751091487f0c064d3f --- /dev/null +++ b/systrace/src/os/os_cpu.bpf.c @@ -0,0 +1,129 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * sysTrace licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: curry + * Create: 2025-04-30 + * Description: + ******************************************************************************/ +#ifdef BPF_PROG_KERN +#undef BPF_PROG_KERN +#endif +#define BPF_PROG_KERN +#include "vmlinux.h" +#include "bpf.h" +#include "bpf_comm.h" +#include "os_probe.h" + +char g_license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(offcpu_task_key_s)); + __uint(value_size, sizeof(task_cpu_s)); + __uint(max_entries, MAX_SIZE_OF_THREAD); +} task_cpu_map SEC(".maps"); + +static __always_inline task_cpu_s *get_offcpu_enter(struct task_struct *task) +{ + int rank = 0; + u32 pid = BPF_CORE_READ(task, pid); // 获取 TGID + u32 tgid = BPF_CORE_READ(task, tgid); // 获取 PID + offcpu_task_key_s task_offcpu_key = {0}; + task_offcpu_key.pid = pid; + task_offcpu_key.tgid = tgid; + task_cpu_s *offcpu_enter; + offcpu_enter = (task_cpu_s *)bpf_map_lookup_elem(&task_cpu_map, &task_offcpu_key); + if (offcpu_enter == (void *)0) { + task_cpu_s oncpu_enter_tmp; + __builtin_memset(&oncpu_enter_tmp, 0, sizeof(oncpu_enter_tmp)); + rank = get_npu_id(task); + if (rank < 0) { + return 0; + } + oncpu_enter_tmp.pid = pid; + oncpu_enter_tmp.rank = rank; + (void)bpf_map_update_elem(&task_cpu_map, &task_offcpu_key, &oncpu_enter_tmp, BPF_ANY); + offcpu_enter = (task_cpu_s *)bpf_map_lookup_elem(&task_cpu_map, &task_offcpu_key); + } + + return offcpu_enter; +} + +static __inline int str_eq(const char *s1, const char *s2, int len) { + for (int i = 0; i < len; i++) { + if (s1[i] != s2[i]) + return 1; + if (s1[i] == '\0') + return 0; + } + return 0; +} + +static __always_inline void process_oncpu(struct task_struct *task, void *ctx) +{ + u64 new_delay; + u32 pid = BPF_CORE_READ(task, pid); // 获取 TGID + u32 tgid = BPF_CORE_READ(task, tgid); // 获取 PID + offcpu_task_key_s task_offcpu_key = {0}; + task_offcpu_key.pid = pid; + task_offcpu_key.tgid = tgid; + task_cpu_s *offcpu_enter = (task_cpu_s *)bpf_map_lookup_elem(&task_cpu_map, &task_offcpu_key); + if (offcpu_enter == (void *)0) { + return; + } + + offcpu_enter->end_time = bpf_ktime_get_ns(); // i.e. offcpu's start_time + if (offcpu_enter->start_time == 0) { + bpf_map_delete_elem(&task_cpu_map, &task_offcpu_key); + return; + } + trace_event_data_t cur_event; + create_cur_event(&cur_event, pid, offcpu_enter->start_time, offcpu_enter->end_time, offcpu_enter->rank, EVENT_TYPE_OFFCPU); + new_delay = BPF_CORE_READ((task), sched_info.run_delay); + cur_event.delay = new_delay - offcpu_enter->delay; + // bpf_get_current_comm(&cur_event.comm, sizeof(cur_event.comm)); + char target1[16] = "python"; + char target2[16] = "ACL_thread"; + bpf_core_read_str(cur_event.comm, sizeof(cur_event.comm), &task->comm); + if (str_eq(cur_event.comm, target1, sizeof(cur_event.comm)) && + str_eq(cur_event.comm, target2, sizeof(cur_event.comm))) { + bpf_printk("emit event comm name is %s", cur_event.comm); + } + + // bpf_printk("emit event pid is %lu tgid is %lu", pid, BPF_CORE_READ(task, tgid)); + emit_event(&cur_event, ctx); + bpf_map_delete_elem(&task_cpu_map, &task_offcpu_key); +} + +static __always_inline void process_offcpu(struct task_struct *task, void *ctx) +{ + task_cpu_s *offcpu_enter; + offcpu_enter = get_offcpu_enter(task); + if (offcpu_enter == (void *)0) { + return; + } + offcpu_enter->start_time = bpf_ktime_get_ns(); + offcpu_enter->delay = BPF_CORE_READ((task), sched_info.run_delay); + bpf_printk("delay is %llu", offcpu_enter->delay); +} + +KRAWTRACE(sched_switch, bpf_raw_tracepoint_args) +{ + struct task_struct *prev = (struct task_struct *)ctx->args[1]; + struct task_struct *current = (struct task_struct *)ctx->args[2]; + + if (is_filter_task(current)) { + return 0; + } + process_offcpu(prev, (void *)ctx); + process_oncpu(current, (void *)ctx); + + return 0; +} diff --git a/systrace/src/os/os_mem.bpf.c b/systrace/src/os/os_mem.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..f9b9b964c6aa78038da2cd9eb8e98e65e038e001 --- /dev/null +++ b/systrace/src/os/os_mem.bpf.c @@ -0,0 +1,218 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * sysTrace licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: curry + * Create: 2025-04-30 + * Description: + ******************************************************************************/ +#ifdef BPF_PROG_USER +#undef BPF_PROG_USER +#endif +#define BPF_PROG_KERN +#include "bpf.h" +#include "bpf_comm.h" +#include "os_probe.h" + +char g_license[] SEC("license") = "GPL"; + +#define BPF_F_INDEX_MASK 0xffffffffULL +#define BPF_F_ALL_CPU BPF_F_INDEX_MASK + +#ifndef __PERF_OUT_MAX +#define __PERF_OUT_MAX (64) +#endif + +#define PAGE_SIZE 4096 +#define DEFAULT_RANK 0 + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(fault_task_key_s *)); + __uint(value_size, sizeof(task_mem_s)); + __uint(max_entries, 1000); +} fault_task_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(comm_mem_task_key_s *)); + __uint(value_size, sizeof(task_mem_s)); + __uint(max_entries, 1000); +} comm_mem_task_map SEC(".maps"); + +static __always_inline int fault_event_start(struct task_struct *task, event_type_e event) +{ + u32 pid = BPF_CORE_READ(task, pid); // 获取 TGID + u32 tgid = BPF_CORE_READ(task, tgid); // 获取 PID + int rank = 0; + rank = get_npu_id(task); + if (rank < 0) { + return 0; + } + + pid = BPF_CORE_READ(task, pid); + fault_task_key_s fault_task_key = {0}; + fault_task_key.event = event; + fault_task_key.pid = pid; + fault_task_key.tgid = tgid; + + task_mem_s task_mem_event = {0}; + task_mem_event.start_ts = bpf_ktime_get_ns(); + task_mem_event.event = event; + task_mem_event.key = pid; + task_mem_event.rank = rank; + bpf_map_update_elem(&fault_task_map, &fault_task_key, &task_mem_event, BPF_ANY); + + return 0; +} + +static __always_inline int fault_event_end(struct task_struct *task, void *ctx, event_type_e event) +{ + u32 pid = BPF_CORE_READ(task, pid); // 获取 TGID + u32 tgid = BPF_CORE_READ(task, tgid); // 获取 PID + fault_task_key_s fault_task_key = {0}; + fault_task_key.event = event; + fault_task_key.pid = pid; + fault_task_key.tgid = tgid; + + task_mem_s* task_mem_event = bpf_map_lookup_elem(&fault_task_map, &fault_task_key); + if (task_mem_event) { + u64 now = bpf_ktime_get_ns(); + if (now > task_mem_event->start_ts) { + trace_event_data_t cur_event; + create_cur_event(&cur_event, task_mem_event->key, task_mem_event->start_ts, now, task_mem_event->rank, event); + // bpf_get_current_comm(&cur_event.comm, sizeof(cur_event.comm)); + bpf_core_read_str(cur_event.comm, sizeof(cur_event.comm), &task->comm); + emit_event(&cur_event, ctx); + } + bpf_map_delete_elem(&fault_task_map, &fault_task_key); + } + + return 0; +} + +static __always_inline int common_event_start(struct task_struct *task, event_type_e event) +{ + int cpu = bpf_get_smp_processor_id(); + comm_mem_task_key_s comm_mem_task_key = {0}; + comm_mem_task_key.event = event; + comm_mem_task_key.key = cpu; + + task_mem_s task_mem_event = {0}; + task_mem_event.start_ts = bpf_ktime_get_ns(); + task_mem_event.event = event; + task_mem_event.key = cpu; + + bpf_map_update_elem(&comm_mem_task_map, &comm_mem_task_key, &task_mem_event, BPF_ANY); + + return 0; +} + +static __always_inline int common_event_end(struct task_struct *task, void *ctx, event_type_e event) +{ + int cpu = bpf_get_smp_processor_id(); + comm_mem_task_key_s comm_mem_task_key = {0}; + comm_mem_task_key.event = event; + comm_mem_task_key.key = cpu; + + task_mem_s* task_mem_event = bpf_map_lookup_elem(&comm_mem_task_map, & comm_mem_task_key); + if (task_mem_event) { + u64 now = bpf_ktime_get_ns(); + if (now > task_mem_event->start_ts) { + trace_event_data_t cur_event; + create_cur_event(&cur_event, task_mem_event->key, task_mem_event->start_ts, now, DEFAULT_RANK, event); + emit_event(&cur_event, ctx); + } + bpf_map_delete_elem(&fault_task_map, &comm_mem_task_key); + } + + return 0; +} + +KPROBE(handle_mm_fault, pt_regs) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + if (is_filter_task(task)) { + return 0; + } + fault_event_start(task, EVENT_TYPE_MM_FAULT); + + return 0; +} + +KRETPROBE(handle_mm_fault, pt_regs) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + if (is_filter_task(task)) { + return 0; + } + fault_event_end(task, ctx, EVENT_TYPE_MM_FAULT); + + return 0; +} + +KPROBE(do_swap_page, pt_regs) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + if (is_filter_task(task)) { + return 0; + } + common_event_start(task, EVENT_TYPE_SWAP_PAGE); + return 0; +} + +KRETPROBE(do_swap_page, pt_regs) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + if (is_filter_task(task)) { + return 0; + } + common_event_end(task, ctx, EVENT_TYPE_SWAP_PAGE); + return 0; +} + +KRAWTRACE(mm_compaction_begin, bpf_raw_tracepoint_args) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + if (is_filter_task(task)) { + return 0; + } + common_event_start(task, EVENT_TYPE_COMPACTION); + return 0; +} + +KRAWTRACE(mm_compaction_end, bpf_raw_tracepoint_args) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + if (is_filter_task(task)) { + return 0; + } + common_event_end(task, ctx, EVENT_TYPE_COMPACTION); + return 0; +} + +KRAWTRACE(mm_vmscan_direct_reclaim_begin, bpf_raw_tracepoint_args) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + if (is_filter_task(task)) { + return 0; + } + common_event_start(task, EVENT_TYPE_VMSCAN); + return 0; +} + +KRAWTRACE(mm_vmscan_direct_reclaim_end, bpf_raw_tracepoint_args) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + if (is_filter_task(task)) { + return 0; + } + common_event_end(task, ctx, EVENT_TYPE_VMSCAN); + return 0; +} diff --git a/systrace/src/os/os_probe.c b/systrace/src/os/os_probe.c new file mode 100644 index 0000000000000000000000000000000000000000..50c9a706d68f84f0bde0a85eca2a33a799d7a75c --- /dev/null +++ b/systrace/src/os/os_probe.c @@ -0,0 +1,581 @@ + +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * sysTrace licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: wo_cow + * Create: 2025-04-30 + * Description: + ******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef BPF_PROG_KERN +#undef BPF_PROG_KERN +#endif + +#ifdef BPF_PROG_USER +#undef BPF_PROG_USER +#endif + +#include "../../include/common/shared_constants.h" +#include "../../protos/systrace.pb-c.h" +#include "bpf.h" +#include "os_probe.h" +#include "os_mem.skel.h" +#include "os_cpu.skel.h" + +#define MAX_PATH_LEN 512 +#define LOG_INTERVAL_SEC 120 +#define RM_MAP_PATH "/usr/bin/rm -rf /sys/fs/bpf/sysTrace*" +#define PROC_FILTER_MAP_PATH "/sys/fs/bpf/sysTrace/__osprobe_proc_filter" +#define KERNEL_FILTER_MAP_PATH "/sys/fs/bpf/sysTrace/__osprobe_kernel_filter" +#define LOG_ITEMS_MIN 10 + +#define MAP_SET_COMMON_PIN_PATHS(probe_name, end, load) \ + INIT_OPEN_OPTS(probe_name); \ + OPEN_OPTS(probe_name, end, load); \ + MAP_SET_PIN_PATH(probe_name, osprobe_map_0, "/sys/fs/bpf/sysTrace/__osprobe_map_0" , load); \ + MAP_SET_PIN_PATH(probe_name, osprobe_map_1, "/sys/fs/bpf/sysTrace/__osprobe_map_1" , load); \ + MAP_SET_PIN_PATH(probe_name, osprobe_map_2, "/sys/fs/bpf/sysTrace/__osprobe_map_2" , load); \ + MAP_SET_PIN_PATH(probe_name, osprobe_map_3, "/sys/fs/bpf/sysTrace/__osprobe_map_3" , load); \ + MAP_SET_PIN_PATH(probe_name, osprobe_map_4, "/sys/fs/bpf/sysTrace/__osprobe_map_4" , load); \ + MAP_SET_PIN_PATH(probe_name, osprobe_map_5, "/sys/fs/bpf/sysTrace/__osprobe_map_5" , load); \ + MAP_SET_PIN_PATH(probe_name, osprobe_map_6, "/sys/fs/bpf/sysTrace/__osprobe_map_6" , load); \ + MAP_SET_PIN_PATH(probe_name, osprobe_map_7, "/sys/fs/bpf/sysTrace/__osprobe_map_7" , load); \ + MAP_SET_PIN_PATH(probe_name, proc_filter_map, PROC_FILTER_MAP_PATH, load); \ + MAP_SET_PIN_PATH(probe_name, kernel_filter_map, KERNEL_FILTER_MAP_PATH, load); \ + +#define OPEN_OSPROBE(probe_name, end, load, buffer) \ + MAP_SET_COMMON_PIN_PATHS(probe_name, end, load); \ + MAP_INIT_BPF_BUFFER_SHARED(probe_name, osprobe_map_0, &buffer, load); \ + +#define MAP_SET_PIN_SINGLE(probe_name, osprobe_map, osprobe_map_path, end, load, buffer) \ + MAP_SET_PIN_PATH(probe_name, osprobe_map, osprobe_map_path , load); \ + MAP_SET_PIN_PATH(probe_name, proc_filter_map, PROC_FILTER_MAP_PATH, load); \ + MAP_INIT_BPF_BUFFER_SHARED(probe_name, osprobe_map, &buffer, load); \ + +static pthread_mutex_t file_mutex = PTHREAD_MUTEX_INITIALIZER; +int g_stop = 0; + +static pthread_key_t thread_data_key; +static pthread_once_t key_once = PTHREAD_ONCE_INIT; +static int rank; +static int local_rank; +static u64 sysBootTime; + +typedef struct +{ + OSprobe *osprobe; + time_t last_log_time; +} OSprobe_ThreadData; + +void sig_int() +{ + g_stop = 1; +}; + +char *event_name[] = { + "mem_fault", + "swap_page", + "compaction", + "vmscan", + "offcpu" +}; + +// system boot time = current time - uptime since system boot. +static int get_sys_boot_time() +{ + struct timespec ts_cur_time = {0}; + struct timespec ts_uptime = {0}; + __u64 cur_time = 0; + __u64 uptime = 0; + + if (clock_gettime(CLOCK_REALTIME, &ts_cur_time)) { + return -1; + } + cur_time = (__u64)ts_cur_time.tv_sec * NSEC_PER_SEC + ts_cur_time.tv_nsec; + + if (clock_gettime(CLOCK_BOOTTIME, &ts_uptime)) { + return -1; + } + uptime = (__u64)ts_uptime.tv_sec * NSEC_PER_SEC + ts_uptime.tv_nsec; + + if (uptime >= cur_time) { + return -1; + } + sysBootTime = cur_time - uptime; + return 0; +} + +static __u64 get_unix_time_from_uptime(__u64 uptime) +{ + return sysBootTime + uptime; +} + +void initialize_osprobe() { + const char *rank_str = getenv("RANK"); + const char *local_rank_str = getenv("LOCAL_RANK"); + rank = rank_str ? atoi(rank_str) : 0; + local_rank = local_rank_str? atoi(local_rank_str) : 0; + get_sys_boot_time(); +} + +static void free_osprobe(OSprobe *osprobe) +{ + if (!osprobe) + return; + + // 释放分配记录 + for (size_t i = 0; i < osprobe->n_osprobe_entries; i++) + { + OSprobeEntry *entry = osprobe->osprobe_entries[i]; + free(entry); + } + free(osprobe->osprobe_entries); + osprobe->n_osprobe_entries = 0; + osprobe->osprobe_entries = NULL; +} + +static void free_thread_data(void *data) +{ + OSprobe_ThreadData *td = (OSprobe_ThreadData *)data; + if (td && td->osprobe) + { + free_osprobe(td->osprobe); + free(td->osprobe); + } + free(td); +} + +static void make_key() +{ + pthread_key_create(&thread_data_key, free_thread_data); +} + +static OSprobe_ThreadData *get_thread_data() +{ + OSprobe_ThreadData *td; + + pthread_once(&key_once, make_key); + td = pthread_getspecific(thread_data_key); + + if (!td) + { + td = calloc(1, sizeof(OSprobe_ThreadData)); + td->osprobe = calloc(1, sizeof(OSprobe)); + osprobe__init(td->osprobe); + td->last_log_time = time(NULL); + pthread_setspecific(thread_data_key, td); + } + + return td; +} + +static void add_osprobe_entry(trace_event_data_t *evt_data) +{ + OSprobe_ThreadData *td = get_thread_data(); + + OSprobeEntry *entry = malloc(sizeof(OSprobeEntry)); + osprobe_entry__init(entry); + entry->key = evt_data->key; + entry->start_us = get_unix_time_from_uptime(evt_data->start_time) / NSEC_PER_USEC; + entry->dur = evt_data->duration / NSEC_PER_USEC; + entry->rundelay = evt_data->delay; + entry->os_event_type = (u32)evt_data->type; + entry->rank = rank; + if (evt_data->comm[0] != '\0') { + if (strcasecmp(evt_data->comm, "python") != 0 && strcasecmp(evt_data->comm, "ACL_thread") != 0) { + fprintf(stderr, "[OS_PROBE RANK_%d] emit common name is: %s.\n", rank, evt_data->comm); + } + entry->comm = strdup(evt_data->comm); + } else { + char comm[16] = "unknown"; + entry->comm = strdup(comm); + // fprintf(stderr, "[OS_PROBE RANK_%d] emit common name NULl evt type is: %d.\n", rank, evt_data->type); + } + + td->osprobe->n_osprobe_entries++; + td->osprobe->osprobe_entries = + realloc(td->osprobe->osprobe_entries, + td->osprobe->n_osprobe_entries * sizeof(OSprobeEntry *)); + + td->osprobe->osprobe_entries[td->osprobe->n_osprobe_entries - 1] = + entry; +} + +static void get_log_filename(time_t current, char *buf, + size_t buf_size) +{ + struct tm *tm = localtime(¤t); + + const char *dir_path = SYS_TRACE_ROOT_DIR "osprobe"; + if (access(dir_path, F_OK) != 0) + { + if (mkdir(dir_path, 0755) != 0 && errno != EEXIST) + { + perror("Failed to create directory"); + snprintf(buf, buf_size, "os_trace_%04d%02d%02d_%02d_rank_%d.pb", + tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, rank); + return; + } + } + snprintf(buf, buf_size, "%s/os_trace_%04d%02d%02d_%02d_rank_%d.pb", + dir_path, tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, rank); +} + +static char is_ready_to_write(OSprobe_ThreadData *td, time_t *current) +{ + OSprobe *osprobe = td->osprobe; + if (!osprobe || + (osprobe->n_osprobe_entries == 0)) + { + return 0; + } + + *current = time(NULL); + if (osprobe->n_osprobe_entries < LOG_ITEMS_MIN) + { + if (*current - td->last_log_time < LOG_INTERVAL_SEC) + { + return 0; + } + } + + return 1; +} + +static void write_protobuf_to_file() +{ + time_t current; + uint8_t *buf; + OSprobe_ThreadData *td = get_thread_data(); + if (!td) + { + return; + } + + if (!is_ready_to_write(td, ¤t)) + { + return; + } + if (pthread_mutex_trylock(&file_mutex) == 0) + { // pthread_mutex_trylock or pthread_mutex_lock + char filename[256]; + get_log_filename(current, filename, + sizeof(filename)); + size_t len = osprobe__get_packed_size(td->osprobe); + buf = malloc(len); + osprobe__pack(td->osprobe, buf); + + FILE *fp = fopen(filename, "ab"); + if (fp) + { + fwrite(buf, len, 1, fp); + fclose(fp); + } + + pthread_mutex_unlock(&file_mutex); + } + else + { + return; + } + + if (buf) + { + free(buf); + } + + free_osprobe(td->osprobe); + td->last_log_time = current; +} + +static int recv_bpf_msg(void *ctx, void *data, __u32 size) +{ + char *p = data; + size_t remain_size = (size_t)size, step_size = sizeof(trace_event_data_t), offset = 0; + trace_event_data_t *evt_data; + + do { + if (remain_size < step_size) { + break; + } + p = (char *)data + offset; + evt_data = (trace_event_data_t *)p; + add_osprobe_entry(evt_data); + write_protobuf_to_file(); + offset += step_size; + remain_size -= step_size; + } while (1); + + return 0; +} + +static int load_mem_probe(struct bpf_prog_s *prog, struct bpf_buffer *buffer) +{ + INIT_BPF_APP(os_probe, EBPF_RLIM_LIMITED); + OPEN_OSPROBE(os_mem, err, 1, buffer); + prog->skels[prog->num].skel = os_mem_skel; + prog->skels[prog->num].fn = (skel_destroy_fn)os_mem_bpf__destroy; + prog->custom_btf_paths[prog->num] = os_mem_open_opts.btf_custom_path; + LOAD_ATTACH(os_probe, os_mem, err, 1); + + int ret = bpf_buffer__open(buffer, recv_bpf_msg, NULL, NULL); + if (ret) { + fprintf(stderr, "[OS_PROBE RANK_%d] Open osprobe bpf_buffer failed: %s.\n", rank, strerror(errno)); + bpf_buffer__free(buffer); + goto err; + } + prog->buffers[prog->num] = buffer; + prog->num++; + + return 0; +err: + UNLOAD(os_mem); + return -1; +} + +static int load_cpu_probe(struct bpf_prog_s *prog, struct bpf_buffer *buffer) +{ + INIT_BPF_APP(os_probe, EBPF_RLIM_LIMITED); + OPEN_OSPROBE(os_cpu, err, 1, buffer); + prog->skels[prog->num].skel = os_cpu_skel; + prog->skels[prog->num].fn = (skel_destroy_fn)os_cpu_bpf__destroy; + prog->custom_btf_paths[prog->num] = os_cpu_open_opts.btf_custom_path; + LOAD_ATTACH(os_probe, os_cpu, err, 1); + + int ret = bpf_buffer__open(buffer, recv_bpf_msg, NULL, NULL); + if (ret) { + fprintf(stderr, "[OS_PROBE RANK_%d] Open osprobe bpf_buffer failed: %s.\n", rank, strerror(errno)); + bpf_buffer__free(buffer); + goto err; + } + prog->buffers[prog->num] = buffer; + prog->num++; + + return 0; +err: + UNLOAD(os_cpu); + return -1; +} + +int update_filter_map(){ + int proc_filter_map_fd, kernel_filter_map_fd; + int ret = 0; + proc_filter_map_fd = bpf_obj_get(PROC_FILTER_MAP_PATH); + if (proc_filter_map_fd < 0) { + // 打印error num + fprintf(stderr, "[OS_PROBE RANK_%d] Failed to get bpf prog proc_filter map: %s.\n", rank, strerror(errno)); + return -1; + } + kernel_filter_map_fd = bpf_obj_get(KERNEL_FILTER_MAP_PATH); + if (kernel_filter_map_fd < 0) { + // 打印error num + fprintf(stderr, "[OS_PROBE RANK_%d] Failed to get bpf prog kernel_filter map: %s.\n", rank, strerror(errno)); + return -1; + } + for (int dev_id = 0; dev_id < 8; ++dev_id) { + char send_key[16] = {0}; + char task_key[16] = {0}; + snprintf(send_key, sizeof(send_key), "dev%d_sq_send_wq", dev_id); + snprintf(task_key, sizeof(task_key), "dev%d_sq_task", dev_id); + + ret = bpf_map_update_elem(kernel_filter_map_fd, send_key, &dev_id, BPF_ANY); + if (ret != 0) { + perror("bpf_map_update_elem failed"); + } + ret = bpf_map_update_elem(kernel_filter_map_fd, task_key, &dev_id, BPF_ANY); + if (ret != 0) { + perror("bpf_map_update_elem failed"); + } + } + + FILE *fp; + char line[1024]; + + // 获取进程号 + sleep(2); + fp = popen("npu-smi info", "r"); + if (fp == NULL) { + perror("Failed to run npu-smi info"); + return -1; + } + int start_parsing = 0; + while (fgets(line, sizeof(line), fp) != NULL) { + // 查找 Process id 和 NPU 号 + if (strstr(line, "Process id") != NULL) { + start_parsing = 1; + continue; + } + if (!start_parsing) continue; + // 空行表示表格结束 + if (strstr(line, "====") || strlen(line) < 10) continue; + + unsigned int npu, pid; + + // 匹配含 pid 的行,例如: + // | 0 0 | 1228424 | python | 194 | + if (sscanf(line, "| %u %*d | %u | %*s %*s | %*d", &npu, &pid) == 2) { + ret = bpf_map_update_elem(proc_filter_map_fd, &pid, &npu, BPF_ANY); + if (ret != 0) { + fprintf(stderr, "[OS_PROBE RANK_%d] bpf_map_update_elem failed: %s (errno: %d)\n", rank, + strerror(errno), errno); + } + } + } + pclose(fp); + + return 0; +} + +int bpf_buffer_init_from_pin(struct bpf_buffer **buffer_ptr, const char *map_path, + bpf_buffer_sample_fn fn, void *ctx) +{ + struct bpf_buffer *buffer; + if (!map_path || !fn) { + fprintf(stderr, "Invalid arguments to bpf_buffer_init_from_pin\n"); + return -EINVAL; + } + buffer = (struct bpf_buffer *)calloc(1, sizeof(*buffer)); + + int map_fd = bpf_obj_get(map_path); + if (map_fd < 0) { + fprintf(stderr, "Failed to open pinned map at %s: %s\n", map_path, strerror(errno)); + return -1; + } + + struct bpf_map_info info = {}; + __u32 info_len = sizeof(info); + if (bpf_obj_get_info_by_fd(map_fd, &info, &info_len) < 0) { + perror("bpf_obj_get_info_by_fd"); + close(map_fd); + return -1; + } + + buffer->type = info.type; + buffer->fn = fn; + buffer->ctx = ctx; + switch (info.type) { + case BPF_MAP_TYPE_RINGBUF: + buffer->inner = ring_buffer__new(map_fd, (ring_buffer_sample_fn) fn, ctx, NULL); + if (!buffer->inner) { + fprintf(stderr, "ring_buffer__new failed for map: %s\n", map_path); + close(map_fd); + return -1; + } + break; + + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + return -1; + + default: + fprintf(stderr, "Unsupported map type (%d) for map: %s\n", info.type, map_path); + close(map_fd); + return -1; + } + *buffer_ptr = buffer; + close(map_fd); // 不再需要,buffer 内部已经引用 fd 或 dup + return 0; +} + +int run_osprobe() { + FILE *fp; + int ret = 0; + struct bpf_prog_s *prog = NULL; + struct bpf_buffer *buffer = NULL; + initialize_osprobe(); + + if (local_rank == 0) { + + fp = popen(RM_MAP_PATH, "r"); + if (fp != NULL) { + (void)pclose(fp); + fp = NULL; + } + prog = alloc_bpf_prog(); + if (prog == NULL) { + goto err; + } + + ret = load_mem_probe(prog, buffer); + if (ret) { + fprintf(stderr, "[OS_PROBE RANK_%d] load mem probe failed.\n", rank); + goto err; + } + ret = load_cpu_probe(prog, buffer); + if (ret) { + fprintf(stderr, "[OS_PROBE RANK_%d] load cpu probe failed.\n", rank); + goto err; + } + if (update_filter_map()) { + fprintf(stderr, "[OS_PROBE RANK_%d] Failed to update proc_filter map.\n", rank); + goto err; + } + while (!g_stop) { + sleep(1); + for (int i = 0; i < prog->num; i++) { + if (prog->buffers[i] + && ((ret = bpf_buffer__poll(prog->buffers[i], THOUSAND)) < 0) + && ret != -EINTR) { + fprintf(stderr, "[OS_PROBE] perf poll prog_%d failed.\n", i); + break; + } + } + } + + return ret; + + } + else + { + char osprobe_map_path[MAX_PATH_LEN]; + snprintf(osprobe_map_path, sizeof(osprobe_map_path), + "/sys/fs/bpf/sysTrace/__osprobe_map_%d", local_rank); + while (access(osprobe_map_path, F_OK) != 0) { + continue; + } + ret = bpf_buffer_init_from_pin(&buffer, + osprobe_map_path, + recv_bpf_msg, NULL); + if (ret < 0) { + fprintf(stderr, "[OS_PROBE RANK_%d] Failed to init buffer\n", local_rank); + goto err; + } + while (!g_stop) { + if (((ret = bpf_buffer__poll(buffer, THOUSAND)) < 0) + && ret != -EINTR) { + fprintf(stderr, "[OS_PROBE RANK_%d] perf poll prog failed:%s.\n", local_rank, strerror(errno)); + break; + } + } + } + + fprintf(stderr, "[OS_PROBE RANK_%d] sysTrace ebpf trace finished\n", local_rank); +err: + fp = popen(RM_MAP_PATH, "r"); + if (fp != NULL) { + (void)pclose(fp); + fp = NULL; + } + unload_bpf_prog(&prog); + if (prog) { + free_bpf_prog(prog); + } + return ret; +} diff --git a/systrace/src/os/os_probe.h b/systrace/src/os/os_probe.h new file mode 100644 index 0000000000000000000000000000000000000000..b8da2e43821d3c501efe65d001057d928a8cba68 --- /dev/null +++ b/systrace/src/os/os_probe.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. + * sysTrace licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: luzhihao + * Create: 2024-04-17 + * Description: sli probe + ******************************************************************************/ +#ifndef __OS_PROBE_H__ +#define __OS_PROBE_H__ + +#pragma once + +#define THREAD_COMM_LEN 16 + +typedef enum { + EVENT_TYPE_MM_FAULT = 0, + EVENT_TYPE_SWAP_PAGE, + EVENT_TYPE_COMPACTION, + EVENT_TYPE_VMSCAN, + EVENT_TYPE_OFFCPU, + EVENT_TYPE_MAX +} event_type_e; + +typedef struct { + int key; + int rank; + long long unsigned int start_time; + long long unsigned int end_time; + long long unsigned int duration; // 若为多个事件聚合,则表示累计的执行时间 + event_type_e type; + long long unsigned int delay; + char comm[THREAD_COMM_LEN]; +} trace_event_data_t; + +#endif