From eb4ed60be9cf99bab7f808880e53dbf6fbcc8717 Mon Sep 17 00:00:00 2001 From: znzjugod Date: Mon, 30 Sep 2024 01:54:30 +0800 Subject: [PATCH 1/3] update ebpf collector --- add-ebpf-collector.patch | 3485 ++++++++++++++++++++++++++++++++++++++ sysSentry.spec | 21 +- 2 files changed, 3504 insertions(+), 2 deletions(-) create mode 100644 add-ebpf-collector.patch diff --git a/add-ebpf-collector.patch b/add-ebpf-collector.patch new file mode 100644 index 0000000..189047a --- /dev/null +++ b/add-ebpf-collector.patch @@ -0,0 +1,3485 @@ +From 02b1cb18cac548d92c809111914d1208cfe313a8 Mon Sep 17 00:00:00 2001 +From: zhangnan +Date: Fri, 27 Sep 2024 11:36:41 +0800 +Subject: [PATCH] add ebpf collector + +--- + src/c/ebpf_collector/Makefile | 101 ++ + src/c/ebpf_collector/bpf_helpers.h | 535 +++++++ + src/c/ebpf_collector/bpf_load.c | 709 +++++++++ + src/c/ebpf_collector/ebpf_collector.bpf.c | 1408 +++++++++++++++++ + src/c/ebpf_collector/ebpf_collector.c | 274 ++++ + src/c/ebpf_collector/ebpf_collector.h | 77 + + src/python/sentryCollector/collect_io.py | 241 ++- + .../avg_block_io/avg_block_io.py | 4 +- + 8 files changed, 3332 insertions(+), 17 deletions(-) + create mode 100644 src/c/ebpf_collector/Makefile + create mode 100644 src/c/ebpf_collector/bpf_helpers.h + create mode 100644 src/c/ebpf_collector/bpf_load.c + create mode 100644 src/c/ebpf_collector/ebpf_collector.bpf.c + create mode 100644 src/c/ebpf_collector/ebpf_collector.c + create mode 100644 src/c/ebpf_collector/ebpf_collector.h + +diff --git a/src/c/ebpf_collector/Makefile b/src/c/ebpf_collector/Makefile +new file mode 100644 +index 0000000..210d95b +--- /dev/null ++++ b/src/c/ebpf_collector/Makefile +@@ -0,0 +1,101 @@ ++# Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. ++# Description: ebpf collector program ++ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ ++ | sed 's/arm.*/arm/' \ ++ | sed 's/aarch64/arm64/' \ ++ | sed 's/ppc64le/powerpc/' \ ++ | sed 's/mips.*/mips/' \ ++ | sed 's/riscv64/riscv/' \ ++ | sed 's/loongarch64/loongarch/') ++ ++KERNEL_VERSION ?= $(shell uname -r) ++KERNEL_SRC := /usr/src/kernels/$(KERNEL_VERSION) ++KERNEL_PATH := /usr/src/linux-$(shell rpm -qa kernel-source | cut -d' ' -f1 | sed 's/kernel-source-//') ++GCC_ARCH ?= $(shell gcc -dumpmachine) ++GCC_VERSION ?= $(shell gcc -dumpversion) ++ ++LINUX_INCLUDE := -I$(KERNEL_SRC)/include/ ++LINUX_INCLUDE += -I$(KERNEL_SRC)/arch/$(ARCH)/include/ ++LINUX_INCLUDE += -I$(KERNEL_SRC)/arch/$(ARCH)/include/generated ++LINUX_INCLUDE += -I$(KERNEL_SRC)/arch/$(ARCH)/include/uapi ++LINUX_INCLUDE += -I$(KERNEL_SRC)/arch/$(ARCH)/include/uapi/linux ++LINUX_INCLUDE += -I$(KERNEL_SRC)/arch/$(ARCH)/include/generated/uapi ++LINUX_INCLUDE += -I$(KERNEL_SRC)/include/uapi ++LINUX_INCLUDE += -I$(KERNEL_SRC)/include/generated/uapi ++LINUX_INCLUDE += -include $(KERNEL_SRC)/include/linux/kconfig.h ++LINUX_INCLUDE += -I$(KERNEL_PATH)/samples/bpf ++LINUX_INCLUDE += -I$(KERNEL_SRC)/tools/lib/ ++LINUX_INCLUDE += -I/usr/src/kernels/$(KERNEL_VERSION)/samples/bpf ++LINUX_INCLUDE += -I$(KERNEL_SRC)/tools/perf/include/bpf ++LINUX_INCLUDE += -I/usr/include/libbpf/src/bpf ++LINUX_INCLUDE += -I/usr/src/kernels/$(KERNEL_VERSION)/include/uapi/linux/ ++LINUX_INCLUDE += -I/usr/include/bpf/ ++LINUX_INCLUDE += -I/usr/include/ ++BPF_LOAD_INCLUDE := -I/usr/include ++BPF_LOAD_INCLUDE += -I$(KERNEL_SRC)/include/ ++BPF_LOAD_INCLUDE += -I/usr/src/kernels/$(KERNEL_VERSION)/include/ ++KBUILD_HOSTCFLAGS := -I$(KERNEL_PATH)/include/ ++KBUILD_HOSTCFLAGS += -I$(KERNEL_PATH)/tools/lib/ -I$(KERNEL_PATH)/tools/include ++KBUILD_HOSTCFLAGS += -I$(KERNEL_PATH)/tools/perf ++NOSTDINC_FLAGS := -nostdinc ++EXTRA_CFLAGS := -isystem /usr/lib/gcc/$(GCC_ARCH)/$(GCC_VERSION)/include ++CFLAGS := -g -Wall -w ++ ++CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - &1 \ ++ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') ++ ++APPS = ebpf_collector ++ ++CC = gcc ++LLC ?= llc ++CLANG ?= clang ++ ++USER_CFLAGS = -I. -I/usr/src/kernels/$(KERNEL_VERSION)/include/uapi/linux/ -I/usr/src/kernel/include -Wall ++KERNEL_CFLAGS = -I. -I/usr/src/kernels/$(KERNEL_VERSION)/include/uapi/linux/ -Wall ++LOADER_CFLAGS = -I. -I/usr/src/kernels/$(KERNEL_VERSION)/include/uapi/linux/ -I/usr/src/kernel/include ++CLANG_FLAGS = -O2 -emit-llvm -c ++LLC_FLAGS = -march=bpf -filetype=obj ++ ++OUTPUT := output ++ ++.PHONY: all ++all: $(APPS) ++ ++.PHONY: clean ++clean: ++ $(call msg,CLEAN) ++ $(Q)rm -rf $(OUTPUT) $(APPS) ++ ++$(OUTPUT): ++ $(call msg,MKDIR,$@) ++ $(Q)mkdir -p $@ ++ ++$(OUTPUT)/%.bpf.o: %.bpf.c ++ $(call msg,BPF,$@) ++ $(CLANG) $(NOSTDINC_FLAGS) $(EXTRA_CFLAGS) $(LINUX_INCLUDE) $(KBUILD_HOSTCFLAGS) \ ++ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ ++ -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \ ++ -Wno-gnu-variable-sized-type-not-at-end \ ++ -Wno-address-of-packed-member -Wno-tautological-compare \ ++ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ ++ -O2 -emit-llvm -c $< -o -| $(LLC) $(LLC_FLAGS) -o $@ ++ ++$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.bpf.o ++ ++$(OUTPUT)/bpf_load.o: bpf_load.c | $(OUTPUT) ++ $(call msg,CC,$@) ++ $(CC) $(NOSTDINC_FLAGS) $(EXTRA_CFLAGS) $(CFLAGS) -I$(KERNEL_PATH)/samples/bpf -I$(KERNEL_PATH)/tools/perf $(BPF_LOAD_INCLUDE) \ ++ -I$(KERNEL_PATH)/tools/lib/ -I$(KERNEL_PATH)/tools/include \ ++ -c $(filter %.c,$^) -o $@ ++ ++$(OUTPUT)/%.o: %.c | $(OUTPUT) ++ $(call msg,CC,$@) ++ $(CC) $(CFLAGS) $(INCLUDES) -I$(KERNEL_PATH)/samples/bpf -c $(filter %.c,$^) -o $@ ++ ++$(APPS): %: $(OUTPUT)/%.o $(OUTPUT)/bpf_load.o | $(OUTPUT) ++ $(call msg,BINARY,$@) ++ $(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -I$(KERNEL_PATH)/samples/bpf -lelf -lbpf -lz -o $@ ++ ++.DELETE_ON_ERROR: ++ ++.SECONDARY: +diff --git a/src/c/ebpf_collector/bpf_helpers.h b/src/c/ebpf_collector/bpf_helpers.h +new file mode 100644 +index 0000000..352965a +--- /dev/null ++++ b/src/c/ebpf_collector/bpf_helpers.h +@@ -0,0 +1,535 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#ifndef __BPF_HELPERS__ ++#define __BPF_HELPERS__ ++ ++#define __uint(name, val) int (*name)[val] ++#define __type(name, val) val *name ++ ++/* helper macro to print out debug messages */ ++#define bpf_printk(fmt, ...) \ ++({ \ ++ char ____fmt[] = fmt; \ ++ bpf_trace_printk(____fmt, sizeof(____fmt), \ ++ ##__VA_ARGS__); \ ++}) ++ ++#ifdef __clang__ ++ ++/* helper macro to place programs, maps, license in ++ * different sections in elf_bpf file. Section names ++ * are interpreted by elf_bpf loader ++ */ ++#define SEC(NAME) __attribute__((section(NAME), used)) ++ ++/* helper functions called from eBPF programs written in C */ ++static void *(*bpf_map_lookup_elem)(void *map, const void *key) = ++ (void *) BPF_FUNC_map_lookup_elem; ++static int (*bpf_map_update_elem)(void *map, const void *key, const void *value, ++ unsigned long long flags) = ++ (void *) BPF_FUNC_map_update_elem; ++static int (*bpf_map_delete_elem)(void *map, const void *key) = ++ (void *) BPF_FUNC_map_delete_elem; ++static int (*bpf_map_push_elem)(void *map, const void *value, ++ unsigned long long flags) = ++ (void *) BPF_FUNC_map_push_elem; ++static int (*bpf_map_pop_elem)(void *map, void *value) = ++ (void *) BPF_FUNC_map_pop_elem; ++static int (*bpf_map_peek_elem)(void *map, void *value) = ++ (void *) BPF_FUNC_map_peek_elem; ++static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr) = ++ (void *) BPF_FUNC_probe_read; ++static unsigned long long (*bpf_ktime_get_ns)(void) = ++ (void *) BPF_FUNC_ktime_get_ns; ++static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = ++ (void *) BPF_FUNC_trace_printk; ++static void (*bpf_tail_call)(void *ctx, void *map, int index) = ++ (void *) BPF_FUNC_tail_call; ++static unsigned long long (*bpf_get_smp_processor_id)(void) = ++ (void *) BPF_FUNC_get_smp_processor_id; ++static unsigned long long (*bpf_get_current_pid_tgid)(void) = ++ (void *) BPF_FUNC_get_current_pid_tgid; ++static unsigned long long (*bpf_get_current_uid_gid)(void) = ++ (void *) BPF_FUNC_get_current_uid_gid; ++static int (*bpf_get_current_comm)(void *buf, int buf_size) = ++ (void *) BPF_FUNC_get_current_comm; ++static unsigned long long (*bpf_perf_event_read)(void *map, ++ unsigned long long flags) = ++ (void *) BPF_FUNC_perf_event_read; ++static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = ++ (void *) BPF_FUNC_clone_redirect; ++static int (*bpf_redirect)(int ifindex, int flags) = ++ (void *) BPF_FUNC_redirect; ++static int (*bpf_redirect_map)(void *map, int key, int flags) = ++ (void *) BPF_FUNC_redirect_map; ++static int (*bpf_perf_event_output)(void *ctx, void *map, ++ unsigned long long flags, void *data, ++ int size) = ++ (void *) BPF_FUNC_perf_event_output; ++static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = ++ (void *) BPF_FUNC_get_stackid; ++static int (*bpf_probe_write_user)(void *dst, const void *src, int size) = ++ (void *) BPF_FUNC_probe_write_user; ++static int (*bpf_current_task_under_cgroup)(void *map, int index) = ++ (void *) BPF_FUNC_current_task_under_cgroup; ++static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) = ++ (void *) BPF_FUNC_skb_get_tunnel_key; ++static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) = ++ (void *) BPF_FUNC_skb_set_tunnel_key; ++static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) = ++ (void *) BPF_FUNC_skb_get_tunnel_opt; ++static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = ++ (void *) BPF_FUNC_skb_set_tunnel_opt; ++static unsigned long long (*bpf_get_prandom_u32)(void) = ++ (void *) BPF_FUNC_get_prandom_u32; ++static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = ++ (void *) BPF_FUNC_xdp_adjust_head; ++static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) = ++ (void *) BPF_FUNC_xdp_adjust_meta; ++static int (*bpf_get_socket_cookie)(void *ctx) = ++ (void *) BPF_FUNC_get_socket_cookie; ++static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, ++ int optlen) = ++ (void *) BPF_FUNC_setsockopt; ++static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval, ++ int optlen) = ++ (void *) BPF_FUNC_getsockopt; ++static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) = ++ (void *) BPF_FUNC_sock_ops_cb_flags_set; ++static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = ++ (void *) BPF_FUNC_sk_redirect_map; ++static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) = ++ (void *) BPF_FUNC_sk_redirect_hash; ++static int (*bpf_sock_map_update)(void *map, void *key, void *value, ++ unsigned long long flags) = ++ (void *) BPF_FUNC_sock_map_update; ++static int (*bpf_sock_hash_update)(void *map, void *key, void *value, ++ unsigned long long flags) = ++ (void *) BPF_FUNC_sock_hash_update; ++static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, ++ void *buf, unsigned int buf_size) = ++ (void *) BPF_FUNC_perf_event_read_value; ++static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, ++ unsigned int buf_size) = ++ (void *) BPF_FUNC_perf_prog_read_value; ++static int (*bpf_override_return)(void *ctx, unsigned long rc) = ++ (void *) BPF_FUNC_override_return; ++static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) = ++ (void *) BPF_FUNC_msg_redirect_map; ++static int (*bpf_msg_redirect_hash)(void *ctx, ++ void *map, void *key, int flags) = ++ (void *) BPF_FUNC_msg_redirect_hash; ++static int (*bpf_msg_apply_bytes)(void *ctx, int len) = ++ (void *) BPF_FUNC_msg_apply_bytes; ++static int (*bpf_msg_cork_bytes)(void *ctx, int len) = ++ (void *) BPF_FUNC_msg_cork_bytes; ++static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = ++ (void *) BPF_FUNC_msg_pull_data; ++static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = ++ (void *) BPF_FUNC_msg_push_data; ++static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) = ++ (void *) BPF_FUNC_msg_pop_data; ++static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = ++ (void *) BPF_FUNC_bind; ++static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = ++ (void *) BPF_FUNC_xdp_adjust_tail; ++static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, ++ int size, int flags) = ++ (void *) BPF_FUNC_skb_get_xfrm_state; ++static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) = ++ (void *) BPF_FUNC_sk_select_reuseport; ++static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = ++ (void *) BPF_FUNC_get_stack; ++static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, ++ int plen, __u32 flags) = ++ (void *) BPF_FUNC_fib_lookup; ++static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr, ++ unsigned int len) = ++ (void *) BPF_FUNC_lwt_push_encap; ++static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset, ++ void *from, unsigned int len) = ++ (void *) BPF_FUNC_lwt_seg6_store_bytes; ++static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param, ++ unsigned int param_len) = ++ (void *) BPF_FUNC_lwt_seg6_action; ++static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset, ++ unsigned int len) = ++ (void *) BPF_FUNC_lwt_seg6_adjust_srh; ++static int (*bpf_rc_repeat)(void *ctx) = ++ (void *) BPF_FUNC_rc_repeat; ++static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol, ++ unsigned long long scancode, unsigned int toggle) = ++ (void *) BPF_FUNC_rc_keydown; ++static unsigned long long (*bpf_get_current_cgroup_id)(void) = ++ (void *) BPF_FUNC_get_current_cgroup_id; ++static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) = ++ (void *) BPF_FUNC_get_local_storage; ++static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) = ++ (void *) BPF_FUNC_skb_cgroup_id; ++static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = ++ (void *) BPF_FUNC_skb_ancestor_cgroup_id; ++static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, ++ struct bpf_sock_tuple *tuple, ++ int size, unsigned long long netns_id, ++ unsigned long long flags) = ++ (void *) BPF_FUNC_sk_lookup_tcp; ++static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, ++ struct bpf_sock_tuple *tuple, ++ int size, unsigned long long netns_id, ++ unsigned long long flags) = ++ (void *) BPF_FUNC_skc_lookup_tcp; ++static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, ++ struct bpf_sock_tuple *tuple, ++ int size, unsigned long long netns_id, ++ unsigned long long flags) = ++ (void *) BPF_FUNC_sk_lookup_udp; ++static int (*bpf_sk_release)(struct bpf_sock *sk) = ++ (void *) BPF_FUNC_sk_release; ++static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) = ++ (void *) BPF_FUNC_skb_vlan_push; ++static int (*bpf_skb_vlan_pop)(void *ctx) = ++ (void *) BPF_FUNC_skb_vlan_pop; ++static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) = ++ (void *) BPF_FUNC_rc_pointer_rel; ++static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) = ++ (void *) BPF_FUNC_spin_lock; ++static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = ++ (void *) BPF_FUNC_spin_unlock; ++static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = ++ (void *) BPF_FUNC_sk_fullsock; ++static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = ++ (void *) BPF_FUNC_tcp_sock; ++static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = ++ (void *) BPF_FUNC_get_listener_sock; ++static int (*bpf_skb_ecn_set_ce)(void *ctx) = ++ (void *) BPF_FUNC_skb_ecn_set_ce; ++static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk, ++ void *ip, int ip_len, void *tcp, int tcp_len) = ++ (void *) BPF_FUNC_tcp_check_syncookie; ++static int (*bpf_sysctl_get_name)(void *ctx, char *buf, ++ unsigned long long buf_len, ++ unsigned long long flags) = ++ (void *) BPF_FUNC_sysctl_get_name; ++static int (*bpf_sysctl_get_current_value)(void *ctx, char *buf, ++ unsigned long long buf_len) = ++ (void *) BPF_FUNC_sysctl_get_current_value; ++static int (*bpf_sysctl_get_new_value)(void *ctx, char *buf, ++ unsigned long long buf_len) = ++ (void *) BPF_FUNC_sysctl_get_new_value; ++static int (*bpf_sysctl_set_new_value)(void *ctx, const char *buf, ++ unsigned long long buf_len) = ++ (void *) BPF_FUNC_sysctl_set_new_value; ++static int (*bpf_strtol)(const char *buf, unsigned long long buf_len, ++ unsigned long long flags, long *res) = ++ (void *) BPF_FUNC_strtol; ++static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len, ++ unsigned long long flags, unsigned long *res) = ++ (void *) BPF_FUNC_strtoul; ++static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, ++ void *value, __u64 flags) = ++ (void *) BPF_FUNC_sk_storage_get; ++static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = ++ (void *)BPF_FUNC_sk_storage_delete; ++static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal; ++static long long (*bpf_tcp_gen_syncookie)(struct bpf_sock *sk, void *ip, ++ int ip_len, void *tcp, int tcp_len) = ++ (void *) BPF_FUNC_tcp_gen_syncookie; ++ ++/* llvm builtin functions that eBPF C program may use to ++ * emit BPF_LD_ABS and BPF_LD_IND instructions ++ */ ++struct sk_buff; ++unsigned long long load_byte(void *skb, ++ unsigned long long off) asm("llvm.bpf.load.byte"); ++unsigned long long load_half(void *skb, ++ unsigned long long off) asm("llvm.bpf.load.half"); ++unsigned long long load_word(void *skb, ++ unsigned long long off) asm("llvm.bpf.load.word"); ++ ++/* a helper structure used by eBPF C program ++ * to describe map attributes to elf_bpf loader ++ */ ++struct bpf_map_def { ++ unsigned int type; ++ unsigned int key_size; ++ unsigned int value_size; ++ unsigned int max_entries; ++ unsigned int map_flags; ++ unsigned int inner_map_idx; ++ unsigned int numa_node; ++}; ++ ++#else ++ ++#include ++ ++#endif ++ ++#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ ++ struct ____btf_map_##name { \ ++ type_key key; \ ++ type_val value; \ ++ }; \ ++ struct ____btf_map_##name \ ++ __attribute__ ((section(".maps." #name), used)) \ ++ ____btf_map_##name = { } ++ ++static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = ++ (void *) BPF_FUNC_skb_load_bytes; ++static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) = ++ (void *) BPF_FUNC_skb_load_bytes_relative; ++static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = ++ (void *) BPF_FUNC_skb_store_bytes; ++static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = ++ (void *) BPF_FUNC_l3_csum_replace; ++static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = ++ (void *) BPF_FUNC_l4_csum_replace; ++static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) = ++ (void *) BPF_FUNC_csum_diff; ++static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = ++ (void *) BPF_FUNC_skb_under_cgroup; ++static int (*bpf_skb_change_head)(void *, int len, int flags) = ++ (void *) BPF_FUNC_skb_change_head; ++static int (*bpf_skb_pull_data)(void *, int len) = ++ (void *) BPF_FUNC_skb_pull_data; ++static unsigned int (*bpf_get_cgroup_classid)(void *ctx) = ++ (void *) BPF_FUNC_get_cgroup_classid; ++static unsigned int (*bpf_get_route_realm)(void *ctx) = ++ (void *) BPF_FUNC_get_route_realm; ++static int (*bpf_skb_change_proto)(void *ctx, __be16 proto, __u64 flags) = ++ (void *) BPF_FUNC_skb_change_proto; ++static int (*bpf_skb_change_type)(void *ctx, __u32 type) = ++ (void *) BPF_FUNC_skb_change_type; ++static unsigned int (*bpf_get_hash_recalc)(void *ctx) = ++ (void *) BPF_FUNC_get_hash_recalc; ++static unsigned long long (*bpf_get_current_task)(void) = ++ (void *) BPF_FUNC_get_current_task; ++static int (*bpf_skb_change_tail)(void *ctx, __u32 len, __u64 flags) = ++ (void *) BPF_FUNC_skb_change_tail; ++static long long (*bpf_csum_update)(void *ctx, __u32 csum) = ++ (void *) BPF_FUNC_csum_update; ++static void (*bpf_set_hash_invalid)(void *ctx) = ++ (void *) BPF_FUNC_set_hash_invalid; ++static int (*bpf_get_numa_node_id)(void) = ++ (void *) BPF_FUNC_get_numa_node_id; ++static int (*bpf_probe_read_str)(void *ctx, __u32 size, ++ const void *unsafe_ptr) = ++ (void *) BPF_FUNC_probe_read_str; ++static unsigned int (*bpf_get_socket_uid)(void *ctx) = ++ (void *) BPF_FUNC_get_socket_uid; ++static unsigned int (*bpf_set_hash)(void *ctx, __u32 hash) = ++ (void *) BPF_FUNC_set_hash; ++static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, ++ unsigned long long flags) = ++ (void *) BPF_FUNC_skb_adjust_room; ++ ++/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ ++#if defined(__TARGET_ARCH_x86) ++ #define bpf_target_x86 ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_s390) ++ #define bpf_target_s390 ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_arm) ++ #define bpf_target_arm ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_arm64) ++ #define bpf_target_arm64 ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_mips) ++ #define bpf_target_mips ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_powerpc) ++ #define bpf_target_powerpc ++ #define bpf_target_defined ++#elif defined(__TARGET_ARCH_sparc) ++ #define bpf_target_sparc ++ #define bpf_target_defined ++#else ++ #undef bpf_target_defined ++#endif ++ ++/* Fall back to what the compiler says */ ++#ifndef bpf_target_defined ++#if defined(__x86_64__) ++ #define bpf_target_x86 ++#elif defined(__s390__) ++ #define bpf_target_s390 ++#elif defined(__arm__) ++ #define bpf_target_arm ++#elif defined(__aarch64__) ++ #define bpf_target_arm64 ++#elif defined(__mips__) ++ #define bpf_target_mips ++#elif defined(__powerpc__) ++ #define bpf_target_powerpc ++#elif defined(__sparc__) ++ #define bpf_target_sparc ++#endif ++#endif ++ ++#if defined(bpf_target_x86) ++ ++#ifdef __KERNEL__ ++#define PT_REGS_PARM1(x) ((x)->di) ++#define PT_REGS_PARM2(x) ((x)->si) ++#define PT_REGS_PARM3(x) ((x)->dx) ++#define PT_REGS_PARM4(x) ((x)->cx) ++#define PT_REGS_PARM5(x) ((x)->r8) ++#define PT_REGS_RET(x) ((x)->sp) ++#define PT_REGS_FP(x) ((x)->bp) ++#define PT_REGS_RC(x) ((x)->ax) ++#define PT_REGS_SP(x) ((x)->sp) ++#define PT_REGS_IP(x) ((x)->ip) ++#else ++#ifdef __i386__ ++/* i386 kernel is built with -mregparm=3 */ ++#define PT_REGS_PARM1(x) ((x)->eax) ++#define PT_REGS_PARM2(x) ((x)->edx) ++#define PT_REGS_PARM3(x) ((x)->ecx) ++#define PT_REGS_PARM4(x) 0 ++#define PT_REGS_PARM5(x) 0 ++#define PT_REGS_RET(x) ((x)->esp) ++#define PT_REGS_FP(x) ((x)->ebp) ++#define PT_REGS_RC(x) ((x)->eax) ++#define PT_REGS_SP(x) ((x)->esp) ++#define PT_REGS_IP(x) ((x)->eip) ++#else ++#define PT_REGS_PARM1(x) ((x)->rdi) ++#define PT_REGS_PARM2(x) ((x)->rsi) ++#define PT_REGS_PARM3(x) ((x)->rdx) ++#define PT_REGS_PARM4(x) ((x)->rcx) ++#define PT_REGS_PARM5(x) ((x)->r8) ++#define PT_REGS_RET(x) ((x)->rsp) ++#define PT_REGS_FP(x) ((x)->rbp) ++#define PT_REGS_RC(x) ((x)->rax) ++#define PT_REGS_SP(x) ((x)->rsp) ++#define PT_REGS_IP(x) ((x)->rip) ++#endif ++#endif ++ ++#elif defined(bpf_target_s390) ++ ++/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ ++struct pt_regs; ++#define PT_REGS_S390 const volatile user_pt_regs ++#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) ++#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) ++#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) ++#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) ++#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) ++#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) ++/* Works only with CONFIG_FRAME_POINTER */ ++#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) ++#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) ++#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) ++#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) ++ ++#elif defined(bpf_target_arm) ++ ++#define PT_REGS_PARM1(x) ((x)->uregs[0]) ++#define PT_REGS_PARM2(x) ((x)->uregs[1]) ++#define PT_REGS_PARM3(x) ((x)->uregs[2]) ++#define PT_REGS_PARM4(x) ((x)->uregs[3]) ++#define PT_REGS_PARM5(x) ((x)->uregs[4]) ++#define PT_REGS_RET(x) ((x)->uregs[14]) ++#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ ++#define PT_REGS_RC(x) ((x)->uregs[0]) ++#define PT_REGS_SP(x) ((x)->uregs[13]) ++#define PT_REGS_IP(x) ((x)->uregs[12]) ++ ++#elif defined(bpf_target_arm64) ++ ++/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ ++struct pt_regs; ++#define PT_REGS_ARM64 const volatile struct user_pt_regs ++#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) ++#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) ++#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) ++#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) ++#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) ++#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) ++/* Works only with CONFIG_FRAME_POINTER */ ++#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) ++#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) ++#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) ++#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) ++ ++#elif defined(bpf_target_mips) ++ ++#define PT_REGS_PARM1(x) ((x)->regs[4]) ++#define PT_REGS_PARM2(x) ((x)->regs[5]) ++#define PT_REGS_PARM3(x) ((x)->regs[6]) ++#define PT_REGS_PARM4(x) ((x)->regs[7]) ++#define PT_REGS_PARM5(x) ((x)->regs[8]) ++#define PT_REGS_RET(x) ((x)->regs[31]) ++#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ ++#define PT_REGS_RC(x) ((x)->regs[1]) ++#define PT_REGS_SP(x) ((x)->regs[29]) ++#define PT_REGS_IP(x) ((x)->cp0_epc) ++ ++#elif defined(bpf_target_powerpc) ++ ++#define PT_REGS_PARM1(x) ((x)->gpr[3]) ++#define PT_REGS_PARM2(x) ((x)->gpr[4]) ++#define PT_REGS_PARM3(x) ((x)->gpr[5]) ++#define PT_REGS_PARM4(x) ((x)->gpr[6]) ++#define PT_REGS_PARM5(x) ((x)->gpr[7]) ++#define PT_REGS_RC(x) ((x)->gpr[3]) ++#define PT_REGS_SP(x) ((x)->sp) ++#define PT_REGS_IP(x) ((x)->nip) ++ ++#elif defined(bpf_target_sparc) ++ ++#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) ++#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) ++#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) ++#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) ++#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) ++#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) ++#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) ++#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) ++ ++/* Should this also be a bpf_target check for the sparc case? */ ++#if defined(__arch64__) ++#define PT_REGS_IP(x) ((x)->tpc) ++#else ++#define PT_REGS_IP(x) ((x)->pc) ++#endif ++ ++#endif ++ ++#if defined(bpf_target_powerpc) ++#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) ++#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP ++#elif defined(bpf_target_sparc) ++#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) ++#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP ++#else ++#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \ ++ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) ++#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \ ++ bpf_probe_read(&(ip), sizeof(ip), \ ++ (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) ++#endif ++ ++/* ++ * BPF_CORE_READ abstracts away bpf_probe_read() call and captures offset ++ * relocation for source address using __builtin_preserve_access_index() ++ * built-in, provided by Clang. ++ * ++ * __builtin_preserve_access_index() takes as an argument an expression of ++ * taking an address of a field within struct/union. It makes compiler emit ++ * a relocation, which records BTF type ID describing root struct/union and an ++ * accessor string which describes exact embedded field that was used to take ++ * an address. See detailed description of this relocation format and ++ * semantics in comments to struct bpf_offset_reloc in libbpf_internal.h. ++ * ++ * This relocation allows libbpf to adjust BPF instruction to use correct ++ * actual field offset, based on target kernel BTF type that matches original ++ * (local) BTF, used to record relocation. ++ */ ++#define BPF_CORE_READ(dst, src) \ ++ bpf_probe_read((dst), sizeof(*(src)), \ ++ __builtin_preserve_access_index(src)) ++ ++#endif +diff --git a/src/c/ebpf_collector/bpf_load.c b/src/c/ebpf_collector/bpf_load.c +new file mode 100644 +index 0000000..db33eb1 +--- /dev/null ++++ b/src/c/ebpf_collector/bpf_load.c +@@ -0,0 +1,709 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DEBUGFS "/sys/kernel/debug/tracing/" ++ ++static char license[128]; ++static int kern_version; ++static bool processed_sec[128]; ++char bpf_log_buf[BPF_LOG_BUF_SIZE]; ++int map_fd[MAX_MAPS]; ++int prog_fd[MAX_PROGS]; ++int event_fd[MAX_PROGS]; ++int prog_cnt; ++int prog_array_fd = -1; ++ ++struct bpf_map_data map_data[MAX_MAPS]; ++int map_data_count = 0; ++ ++static int populate_prog_array(const char *event, int prog_fd) ++{ ++ int ind = atoi(event), err; ++ ++ err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); ++ if (err < 0) { ++ printf("failed to store prog_fd in prog_array\n"); ++ return -1; ++ } ++ return 0; ++} ++ ++static int write_kprobe_events(const char *val) ++{ ++ int fd, ret, flags; ++ ++ if (val == NULL) ++ return -1; ++ else if (val[0] == '\0') ++ flags = O_WRONLY | O_TRUNC; ++ else ++ flags = O_WRONLY | O_APPEND; ++ ++ fd = open("/sys/kernel/debug/tracing/kprobe_events", flags); ++ ++ ret = write(fd, val, strlen(val)); ++ close(fd); ++ ++ return ret; ++} ++ ++static int load_and_attach(const char *event, struct bpf_insn *prog, int size) ++{ ++ bool is_socket = strncmp(event, "socket", 6) == 0; ++ bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; ++ bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; ++ bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; ++ bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0; ++ bool is_xdp = strncmp(event, "xdp", 3) == 0; ++ bool is_perf_event = strncmp(event, "perf_event", 10) == 0; ++ bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; ++ bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; ++ bool is_sockops = strncmp(event, "sockops", 7) == 0; ++ bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0; ++ bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0; ++ size_t insns_cnt = size / sizeof(struct bpf_insn); ++ enum bpf_prog_type prog_type; ++ char buf[256]; ++ int fd, efd, err, id; ++ struct perf_event_attr attr = {}; ++ ++ attr.type = PERF_TYPE_TRACEPOINT; ++ attr.sample_type = PERF_SAMPLE_RAW; ++ attr.sample_period = 1; ++ attr.wakeup_events = 1; ++ ++ if (is_socket) { ++ prog_type = BPF_PROG_TYPE_SOCKET_FILTER; ++ } else if (is_kprobe || is_kretprobe) { ++ prog_type = BPF_PROG_TYPE_KPROBE; ++ } else if (is_tracepoint) { ++ prog_type = BPF_PROG_TYPE_TRACEPOINT; ++ } else if (is_raw_tracepoint) { ++ prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT; ++ } else if (is_xdp) { ++ prog_type = BPF_PROG_TYPE_XDP; ++ } else if (is_perf_event) { ++ prog_type = BPF_PROG_TYPE_PERF_EVENT; ++ } else if (is_cgroup_skb) { ++ prog_type = BPF_PROG_TYPE_CGROUP_SKB; ++ } else if (is_cgroup_sk) { ++ prog_type = BPF_PROG_TYPE_CGROUP_SOCK; ++ } else if (is_sockops) { ++ prog_type = BPF_PROG_TYPE_SOCK_OPS; ++ } else if (is_sk_skb) { ++ prog_type = BPF_PROG_TYPE_SK_SKB; ++ } else if (is_sk_msg) { ++ prog_type = BPF_PROG_TYPE_SK_MSG; ++ } else { ++ printf("Unknown event '%s'\n", event); ++ return -1; ++ } ++ ++ if (prog_cnt == MAX_PROGS) ++ return -1; ++ ++ fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version, ++ bpf_log_buf, BPF_LOG_BUF_SIZE); ++ if (fd < 0) { ++ printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf); ++ return -1; ++ } ++ ++ prog_fd[prog_cnt++] = fd; ++ ++ if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) ++ return 0; ++ ++ if (is_socket || is_sockops || is_sk_skb || is_sk_msg) { ++ if (is_socket) ++ event += 6; ++ else ++ event += 7; ++ if (*event != '/') ++ return 0; ++ event++; ++ if (!isdigit(*event)) { ++ printf("invalid prog number\n"); ++ return -1; ++ } ++ return populate_prog_array(event, fd); ++ } ++ ++ if (is_raw_tracepoint) { ++ efd = bpf_raw_tracepoint_open(event + 15, fd); ++ if (efd < 0) { ++ printf("tracepoint %s %s\n", event + 15, strerror(errno)); ++ return -1; ++ } ++ event_fd[prog_cnt - 1] = efd; ++ return 0; ++ } ++ ++ if (is_kprobe || is_kretprobe) { ++ bool need_normal_check = true; ++ const char *event_prefix = ""; ++ ++ if (is_kprobe) ++ event += 7; ++ else ++ event += 10; ++ ++ if (*event == 0) { ++ printf("event name cannot be empty\n"); ++ return -1; ++ } ++ ++ if (isdigit(*event)) ++ return populate_prog_array(event, fd); ++ ++#ifdef __x86_64__ ++ if (strncmp(event, "sys_", 4) == 0) { ++ snprintf(buf, sizeof(buf), "%c:__x64_%s __x64_%s", ++ is_kprobe ? 'p' : 'r', event, event); ++ err = write_kprobe_events(buf); ++ if (err >= 0) { ++ need_normal_check = false; ++ event_prefix = "__x64_"; ++ } ++ } ++#endif ++ if (need_normal_check) { ++ if (strcmp("wbt_wait", event) == 0 || strcmp("blk_mq_get_tag", event) == 0) { ++ if (is_kprobe) { ++ snprintf(buf, sizeof(buf), "%c:%s_1 %s", ++ is_kprobe ? 'p' : 'r', event, event); ++ } ++ else { ++ snprintf(buf, sizeof(buf), "%c:%s_2 %s", ++ is_kprobe ? 'p' : 'r', event, event); ++ } ++ } ++ else { ++ snprintf(buf, sizeof(buf), "%c:%s %s", ++ is_kprobe ? 'p' : 'r', event, event); ++ } ++ err = write_kprobe_events(buf); ++ if (err < 0) { ++ printf("failed to create kprobe '%s' error '%s'\n", ++ event, strerror(errno)); ++ return -1; ++ } ++ } ++ ++ strcpy(buf, DEBUGFS); ++ strcat(buf, "events/kprobes/"); ++ strcat(buf, event_prefix); ++ strcat(buf, event); ++ ++ if (strcmp("wbt_wait", event) == 0 || strcmp("blk_mq_get_tag", event) == 0) { ++ if (is_kprobe) { ++ strcat(buf, "_1"); ++ } ++ else { ++ strcat(buf, "_2"); ++ } ++ } ++ strcat(buf, "/id"); ++ } else if (is_tracepoint) { ++ event += 11; ++ ++ if (*event == 0) { ++ printf("event name cannot be empty\n"); ++ return -1; ++ } ++ strcpy(buf, DEBUGFS); ++ strcat(buf, "events/"); ++ strcat(buf, event); ++ strcat(buf, "/id"); ++ } ++ ++ efd = open(buf, O_RDONLY, 0); ++ if (efd < 0) { ++ printf("failed to open event %s\n", event); ++ return -1; ++ } ++ ++ err = read(efd, buf, sizeof(buf)); ++ if (err < 0 || err >= sizeof(buf)) { ++ printf("read from '%s' failed '%s'\n", event, strerror(errno)); ++ return -1; ++ } ++ ++ close(efd); ++ ++ buf[err] = 0; ++ id = atoi(buf); ++ attr.config = id; ++ ++ efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); ++ if (efd < 0) { ++ printf("event %d fd %d err %s\n", id, efd, strerror(errno)); ++ return -1; ++ } ++ event_fd[prog_cnt - 1] = efd; ++ err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); ++ if (err < 0) { ++ printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n", ++ strerror(errno)); ++ return -1; ++ } ++ err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); ++ if (err < 0) { ++ printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n", ++ strerror(errno)); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int load_maps(struct bpf_map_data *maps, int nr_maps, ++ fixup_map_cb fixup_map) ++{ ++ int i, numa_node; ++ ++ for (i = 0; i < nr_maps; i++) { ++ if (fixup_map) { ++ fixup_map(&maps[i], i); ++ /* Allow userspace to assign map FD prior to creation */ ++ if (maps[i].fd != -1) { ++ map_fd[i] = maps[i].fd; ++ continue; ++ } ++ } ++ ++ numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ? ++ maps[i].def.numa_node : -1; ++ ++ if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || ++ maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) { ++ int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; ++ ++ map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type, ++ maps[i].name, ++ maps[i].def.key_size, ++ inner_map_fd, ++ maps[i].def.max_entries, ++ maps[i].def.map_flags, ++ numa_node); ++ } else { ++ map_fd[i] = bpf_create_map_node(maps[i].def.type, ++ maps[i].name, ++ maps[i].def.key_size, ++ maps[i].def.value_size, ++ maps[i].def.max_entries, ++ maps[i].def.map_flags, ++ numa_node); ++ } ++ if (map_fd[i] < 0) { ++ printf("failed to create a map: %d %s\n", ++ errno, strerror(errno)); ++ return 1; ++ } ++ maps[i].fd = map_fd[i]; ++ ++ if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY) ++ prog_array_fd = map_fd[i]; ++ } ++ return 0; ++} ++ ++static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, ++ GElf_Shdr *shdr, Elf_Data **data) ++{ ++ Elf_Scn *scn; ++ ++ scn = elf_getscn(elf, i); ++ if (!scn) ++ return 1; ++ ++ if (gelf_getshdr(scn, shdr) != shdr) ++ return 2; ++ ++ *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name); ++ if (!*shname || !shdr->sh_size) ++ return 3; ++ ++ *data = elf_getdata(scn, 0); ++ if (!*data || elf_getdata(scn, *data) != NULL) ++ return 4; ++ ++ return 0; ++} ++ ++static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, ++ GElf_Shdr *shdr, struct bpf_insn *insn, ++ struct bpf_map_data *maps, int nr_maps) ++{ ++ int i, nrels; ++ ++ nrels = shdr->sh_size / shdr->sh_entsize; ++ ++ for (i = 0; i < nrels; i++) { ++ GElf_Sym sym; ++ GElf_Rel rel; ++ unsigned int insn_idx; ++ bool match = false; ++ int j, map_idx; ++ ++ gelf_getrel(data, i, &rel); ++ ++ insn_idx = rel.r_offset / sizeof(struct bpf_insn); ++ ++ gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym); ++ ++ if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { ++ printf("invalid relo for insn[%d].code 0x%x\n", ++ insn_idx, insn[insn_idx].code); ++ return 1; ++ } ++ insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; ++ ++ /* Match FD relocation against recorded map_data[] offset */ ++ for (map_idx = 0; map_idx < nr_maps; map_idx++) { ++ if (maps[map_idx].elf_offset == sym.st_value) { ++ match = true; ++ break; ++ } ++ } ++ if (match) { ++ insn[insn_idx].imm = maps[map_idx].fd; ++ } else { ++ printf("invalid relo for insn[%d] no map_data match\n", ++ insn_idx); ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++ ++static int cmp_symbols(const void *l, const void *r) ++{ ++ const GElf_Sym *lsym = (const GElf_Sym *)l; ++ const GElf_Sym *rsym = (const GElf_Sym *)r; ++ ++ if (lsym->st_value < rsym->st_value) ++ return -1; ++ else if (lsym->st_value > rsym->st_value) ++ return 1; ++ else ++ return 0; ++} ++ ++static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, ++ Elf *elf, Elf_Data *symbols, int strtabidx) ++{ ++ int map_sz_elf, map_sz_copy; ++ bool validate_zero = false; ++ Elf_Data *data_maps; ++ int i, nr_maps; ++ GElf_Sym *sym; ++ Elf_Scn *scn; ++ int copy_sz; ++ ++ if (maps_shndx < 0) ++ return -EINVAL; ++ if (!symbols) ++ return -EINVAL; ++ ++ /* Get data for maps section via elf index */ ++ scn = elf_getscn(elf, maps_shndx); ++ if (scn) ++ data_maps = elf_getdata(scn, NULL); ++ if (!scn || !data_maps) { ++ printf("Failed to get Elf_Data from maps section %d\n", ++ maps_shndx); ++ return -EINVAL; ++ } ++ ++ /* For each map get corrosponding symbol table entry */ ++ sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym)); ++ for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { ++ assert(nr_maps < MAX_MAPS+1); ++ if (!gelf_getsym(symbols, i, &sym[nr_maps])) ++ continue; ++ if (sym[nr_maps].st_shndx != maps_shndx) ++ continue; ++ /* Only increment iif maps section */ ++ nr_maps++; ++ } ++ ++ /* Align to map_fd[] order, via sort on offset in sym.st_value */ ++ qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols); ++ ++ /* Keeping compatible with ELF maps section changes ++ * ------------------------------------------------ ++ * The program size of struct bpf_load_map_def is known by loader ++ * code, but struct stored in ELF file can be different. ++ * ++ * Unfortunately sym[i].st_size is zero. To calculate the ++ * struct size stored in the ELF file, assume all struct have ++ * the same size, and simply divide with number of map ++ * symbols. ++ */ ++ map_sz_elf = data_maps->d_size / nr_maps; ++ map_sz_copy = sizeof(struct bpf_load_map_def); ++ if (map_sz_elf < map_sz_copy) { ++ /* ++ * Backward compat, loading older ELF file with ++ * smaller struct, keeping remaining bytes zero. ++ */ ++ map_sz_copy = map_sz_elf; ++ } else if (map_sz_elf > map_sz_copy) { ++ /* ++ * Forward compat, loading newer ELF file with larger ++ * struct with unknown features. Assume zero means ++ * feature not used. Thus, validate rest of struct ++ * data is zero. ++ */ ++ validate_zero = true; ++ } ++ ++ /* Memcpy relevant part of ELF maps data to loader maps */ ++ for (i = 0; i < nr_maps; i++) { ++ struct bpf_load_map_def *def; ++ unsigned char *addr, *end; ++ const char *map_name; ++ size_t offset; ++ ++ map_name = elf_strptr(elf, strtabidx, sym[i].st_name); ++ maps[i].name = strdup(map_name); ++ if (!maps[i].name) { ++ printf("strdup(%s): %s(%d)\n", map_name, ++ strerror(errno), errno); ++ free(sym); ++ return -errno; ++ } ++ ++ /* Symbol value is offset into ELF maps section data area */ ++ offset = sym[i].st_value; ++ def = (struct bpf_load_map_def *)(data_maps->d_buf + offset); ++ maps[i].elf_offset = offset; ++ memset(&maps[i].def, 0, sizeof(struct bpf_load_map_def)); ++ memcpy(&maps[i].def, def, map_sz_copy); ++ ++ /* Verify no newer features were requested */ ++ if (validate_zero) { ++ addr = (unsigned char*) def + map_sz_copy; ++ end = (unsigned char*) def + map_sz_elf; ++ for (; addr < end; addr++) { ++ if (*addr != 0) { ++ free(sym); ++ return -EFBIG; ++ } ++ } ++ } ++ } ++ ++ free(sym); ++ return nr_maps; ++} ++ ++static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) ++{ ++ int fd, i, ret, maps_shndx = -1, strtabidx = -1; ++ Elf *elf; ++ GElf_Ehdr ehdr; ++ GElf_Shdr shdr, shdr_prog; ++ Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL; ++ char *shname, *shname_prog; ++ int nr_maps = 0; ++ ++ /* reset global variables */ ++ kern_version = 0; ++ memset(license, 0, sizeof(license)); ++ memset(processed_sec, 0, sizeof(processed_sec)); ++ ++ if (elf_version(EV_CURRENT) == EV_NONE) ++ return 1; ++ ++ fd = open(path, O_RDONLY, 0); ++ if (fd < 0) ++ return 1; ++ ++ elf = elf_begin(fd, ELF_C_READ, NULL); ++ ++ if (!elf) ++ return 1; ++ ++ if (gelf_getehdr(elf, &ehdr) != &ehdr) ++ return 1; ++ ++ /* clear all kprobes */ ++ i = write_kprobe_events(""); ++ ++ /* scan over all elf sections to get license and map info */ ++ for (i = 1; i < ehdr.e_shnum; i++) { ++ ++ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) ++ continue; ++ ++ if (0) /* helpful for llvm debugging */ ++ printf("section %d:%s data %p size %zd link %d flags %d\n", ++ i, shname, data->d_buf, data->d_size, ++ shdr.sh_link, (int) shdr.sh_flags); ++ ++ if (strcmp(shname, "license") == 0) { ++ processed_sec[i] = true; ++ memcpy(license, data->d_buf, data->d_size); ++ } else if (strcmp(shname, "version") == 0) { ++ processed_sec[i] = true; ++ if (data->d_size != sizeof(int)) { ++ printf("invalid size of version section %zd\n", ++ data->d_size); ++ return 1; ++ } ++ memcpy(&kern_version, data->d_buf, sizeof(int)); ++ } else if (strcmp(shname, "maps") == 0) { ++ int j; ++ ++ maps_shndx = i; ++ data_maps = data; ++ for (j = 0; j < MAX_MAPS; j++) ++ map_data[j].fd = -1; ++ } else if (shdr.sh_type == SHT_SYMTAB) { ++ strtabidx = shdr.sh_link; ++ symbols = data; ++ } ++ } ++ ++ ret = 1; ++ ++ if (!symbols) { ++ printf("missing SHT_SYMTAB section\n"); ++ goto done; ++ } ++ ++ if (data_maps) { ++ nr_maps = load_elf_maps_section(map_data, maps_shndx, ++ elf, symbols, strtabidx); ++ if (nr_maps < 0) { ++ printf("Error: Failed loading ELF maps (errno:%d):%s\n", ++ nr_maps, strerror(-nr_maps)); ++ goto done; ++ } ++ if (load_maps(map_data, nr_maps, fixup_map)) ++ goto done; ++ map_data_count = nr_maps; ++ ++ processed_sec[maps_shndx] = true; ++ } ++ ++ /* process all relo sections, and rewrite bpf insns for maps */ ++ for (i = 1; i < ehdr.e_shnum; i++) { ++ if (processed_sec[i]) ++ continue; ++ ++ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) ++ continue; ++ ++ if (shdr.sh_type == SHT_REL) { ++ struct bpf_insn *insns; ++ ++ /* locate prog sec that need map fixup (relocations) */ ++ if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog, ++ &shdr_prog, &data_prog)) ++ continue; ++ ++ if (shdr_prog.sh_type != SHT_PROGBITS || ++ !(shdr_prog.sh_flags & SHF_EXECINSTR)) ++ continue; ++ ++ insns = (struct bpf_insn *) data_prog->d_buf; ++ processed_sec[i] = true; /* relo section */ ++ ++ if (parse_relo_and_apply(data, symbols, &shdr, insns, ++ map_data, nr_maps)) ++ continue; ++ } ++ } ++ ++ /* load programs */ ++ for (i = 1; i < ehdr.e_shnum; i++) { ++ ++ if (processed_sec[i]) ++ continue; ++ ++ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) ++ continue; ++ ++ if (memcmp(shname, "kprobe/", 7) == 0 || ++ memcmp(shname, "kretprobe/", 10) == 0 || ++ memcmp(shname, "tracepoint/", 11) == 0 || ++ memcmp(shname, "raw_tracepoint/", 15) == 0 || ++ memcmp(shname, "xdp", 3) == 0 || ++ memcmp(shname, "perf_event", 10) == 0 || ++ memcmp(shname, "socket", 6) == 0 || ++ memcmp(shname, "cgroup/", 7) == 0 || ++ memcmp(shname, "sockops", 7) == 0 || ++ memcmp(shname, "sk_skb", 6) == 0 || ++ memcmp(shname, "sk_msg", 6) == 0) { ++ ret = load_and_attach(shname, data->d_buf, ++ data->d_size); ++ if (ret != 0) ++ goto done; ++ } ++ } ++ ++done: ++ close(fd); ++ return ret; ++} ++ ++int load_bpf_file(char *path) ++{ ++ return do_load_bpf_file(path, NULL); ++} ++ ++int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map) ++{ ++ return do_load_bpf_file(path, fixup_map); ++} ++ ++void read_trace_pipe(void) ++{ ++ int trace_fd; ++ ++ trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0); ++ if (trace_fd < 0) ++ return; ++ ++ while (1) { ++ static char buf[4096]; ++ ssize_t sz; ++ ++ sz = read(trace_fd, buf, sizeof(buf) - 1); ++ if (sz > 0) { ++ buf[sz] = 0; ++ puts(buf); ++ } ++ } ++} +diff --git a/src/c/ebpf_collector/ebpf_collector.bpf.c b/src/c/ebpf_collector/ebpf_collector.bpf.c +new file mode 100644 +index 0000000..28cdde2 +--- /dev/null ++++ b/src/c/ebpf_collector/ebpf_collector.bpf.c +@@ -0,0 +1,1408 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. ++ * Description: ebpf collector program ++ * Author: Zhang Nan ++ * Create: 2024-09-27 ++ */ ++#define KBUILD_MODNAME "foo" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "bpf_helpers.h" ++#include "ebpf_collector.h" ++ ++#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;}) ++ ++struct bpf_map_def SEC("maps") blk_map = { ++ .type = BPF_MAP_TYPE_HASH, ++ .key_size = sizeof(u32), ++ .value_size = sizeof(struct io_counter), ++ .max_entries = 10000, ++}; ++ ++struct bpf_map_def SEC("maps") blk_res = { ++ .type = BPF_MAP_TYPE_ARRAY, ++ .key_size = sizeof(u32), ++ .value_size = sizeof(struct stage_data), ++ .max_entries = 128, ++}; ++ ++struct bpf_map_def SEC("maps") bio_map = { ++ .type = BPF_MAP_TYPE_HASH, ++ .key_size = sizeof(u32), ++ .value_size = sizeof(struct io_counter), ++ .max_entries = 10000, ++}; ++ ++struct bpf_map_def SEC("maps") bio_res = { ++ .type = BPF_MAP_TYPE_ARRAY, ++ .key_size = sizeof(u32), ++ .value_size = sizeof(struct stage_data), ++ .max_entries = 128, ++}; ++ ++struct bpf_map_def SEC("maps") wbt_map = { ++ .type = BPF_MAP_TYPE_HASH, ++ .key_size = sizeof(u32), ++ .value_size = sizeof(struct io_counter), ++ .max_entries = 10000, ++}; ++ ++struct bpf_map_def SEC("maps") wbt_res = { ++ .type = BPF_MAP_TYPE_ARRAY, ++ .key_size = sizeof(u32), ++ .value_size = sizeof(struct stage_data), ++ .max_entries = 128, ++}; ++ ++struct bpf_map_def SEC("maps") wbt_args = { ++ .type = BPF_MAP_TYPE_HASH, ++ .key_size = sizeof(u32), ++ .value_size = sizeof(u64), ++ .max_entries = 1000, ++}; ++ ++struct bpf_map_def SEC("maps") tag_map = { ++ .type = BPF_MAP_TYPE_HASH, ++ .key_size = sizeof(u32), ++ .value_size = sizeof(struct io_counter), ++ .max_entries = 10000, ++}; ++ ++struct bpf_map_def SEC("maps") tag_res = { ++ .type = BPF_MAP_TYPE_ARRAY, ++ .key_size = sizeof(u32), ++ .value_size = sizeof(struct stage_data), ++ .max_entries = 128, ++}; ++ ++struct bpf_map_def SEC("maps") tag_args = { ++ .type = BPF_MAP_TYPE_HASH, ++ .key_size = sizeof(u32), ++ .value_size = sizeof(u64), ++ .max_entries = 1000, ++}; ++ ++struct blk_mq_alloc_data { ++ /* input parameter */ ++ struct request_queue *q; ++ blk_mq_req_flags_t flags; ++ unsigned int shallow_depth; ++ ++ /* input & output parameter */ ++ struct blk_mq_ctx *ctx; ++ struct blk_mq_hw_ctx *hctx; ++}; ++ ++static __always_inline void blk_fill_rwbs(char *rwbs, unsigned int op) ++{ ++ switch (op & REQ_OP_MASK) { ++ case REQ_OP_WRITE: ++ case REQ_OP_WRITE_SAME: ++ rwbs[0] = 'W'; ++ break; ++ case REQ_OP_DISCARD: ++ rwbs[0] = 'D'; ++ break; ++ case REQ_OP_SECURE_ERASE: ++ rwbs[0] = 'E'; ++ break; ++ case REQ_OP_FLUSH: ++ rwbs[0] = 'F'; ++ break; ++ case REQ_OP_READ: ++ rwbs[0] = 'R'; ++ break; ++ default: ++ rwbs[0] = 'N'; ++ } ++ ++ if (op & REQ_FUA) { ++ rwbs[1] = 'F'; ++ } else { ++ rwbs[1] = '#'; ++ } ++ if (op & REQ_RAHEAD) { ++ rwbs[2] = 'A'; ++ } else { ++ rwbs[2] = '#'; ++ } ++ if (op & REQ_SYNC) { ++ rwbs[3] = 'S'; ++ } else { ++ rwbs[3] = '#'; ++ } ++ if (op & REQ_META) { ++ rwbs[4] = 'M'; ++ } else { ++ rwbs[4] = '#'; ++ } ++} ++ ++void update_new_data_in_start(struct stage_data *new_data, struct update_params *params) { ++ blk_fill_rwbs(new_data->io_type, params->cmd_flags); ++ if (new_data->bucket[params->update_bucket].start_range == params->curr_start_range){ ++ new_data->bucket[params->update_bucket].io_count += 1; ++ } else { ++ new_data->bucket[MAX_BUCKETS].io_count += new_data->bucket[params->update_bucket].io_count; ++ new_data->bucket[params->update_bucket].io_count = 1; ++ new_data->bucket[params->update_bucket].start_range = params->curr_start_range; ++ } ++} ++ ++void update_curr_data_in_start(struct stage_data *curr_data, struct update_params *params) { ++ if (curr_data && params) { ++ curr_data->start_count += 1; ++ curr_data->major = params->major; ++ curr_data->first_minor = params->first_minor; ++ blk_fill_rwbs(curr_data->io_type, params->cmd_flags); ++ if (curr_data->bucket[params->update_bucket].start_range == params->curr_start_range) { ++ curr_data->bucket[params->update_bucket].io_count += 1; ++ } else { ++ curr_data->bucket[MAX_BUCKETS].io_count += curr_data->bucket[params->update_bucket].io_count; ++ curr_data->bucket[params->update_bucket].io_count = 1; ++ } ++ curr_data->bucket[params->update_bucket].start_range = params->curr_start_range; ++ } ++} ++ ++void update_new_data_in_finish(struct stage_data *new_data, struct update_params *params) { ++ blk_fill_rwbs(new_data->io_type, params->cmd_flags); ++ if (new_data->bucket[params->update_bucket].start_range == params->curr_start_range){ ++ new_data->bucket[params->update_bucket].io_count = (new_data->bucket[params->update_bucket].io_count > 1) ? new_data->bucket[params->update_bucket].io_count - 1 : 0; ++ } else { ++ new_data->bucket[MAX_BUCKETS].io_count = (new_data->bucket[MAX_BUCKETS].io_count > 1) ? new_data->bucket[MAX_BUCKETS].io_count - 1 : 0; ++ } ++} ++ ++void update_curr_data_in_finish(struct stage_data *curr_data, struct update_params *params, u64 duration) { ++ if (curr_data && params) { ++ curr_data->finish_count += 1; ++ curr_data->major = params->major; ++ curr_data->first_minor = params->first_minor; ++ blk_fill_rwbs(curr_data->io_type, params->cmd_flags); ++ if (duration > DURATION_THRESHOLD) { ++ curr_data->finish_over_time += 1; ++ } ++ } ++} ++ ++static void init_io_counter(struct io_counter *counterp, int major, int first_minor) { ++ if (counterp) { ++ counterp->start_time = bpf_ktime_get_ns(); ++ counterp->major = major; ++ counterp->first_minor = first_minor; ++ } ++} ++ ++ ++u32 find_matching_tag_1_keys(int major, int minor) { ++ u32 key = 0; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&tag_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 1; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&tag_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 2; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&tag_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_tag_2_keys(int major, int minor) { ++ u32 key = 3; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&tag_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 4; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&tag_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 5; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&tag_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_tag_3_keys(int major, int minor) { ++ u32 key = 6; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&tag_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 7; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&tag_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 8; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&tag_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_tag_4_keys(int major, int minor) { ++ u32 key = 9; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&tag_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 10; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&tag_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 11; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&tag_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_tag_5_keys(int major, int minor) { ++ u32 key = 12; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&tag_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 13; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&tag_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 14; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&tag_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_blk_1_keys(int major, int minor) { ++ u32 key = 0; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&blk_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 1; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&blk_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 2; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&blk_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_blk_2_keys(int major, int minor) { ++ u32 key = 3; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&blk_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 4; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&blk_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 5; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&blk_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_blk_3_keys(int major, int minor) { ++ u32 key = 6; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&blk_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 7; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&blk_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 8; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&blk_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_blk_4_keys(int major, int minor) { ++ u32 key = 9; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&blk_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 10; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&blk_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 11; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&blk_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_blk_5_keys(int major, int minor) { ++ u32 key = 12; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&blk_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 13; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&blk_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 14; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&blk_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_bio_1_keys(int major, int minor) { ++ u32 key = 0; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&bio_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 1; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&bio_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 2; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&bio_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_bio_2_keys(int major, int minor) { ++ u32 key = 3; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&bio_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 4; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&bio_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 5; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&bio_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_bio_3_keys(int major, int minor) { ++ u32 key = 6; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&bio_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 7; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&bio_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 8; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&bio_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_bio_4_keys(int major, int minor) { ++ u32 key = 9; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&bio_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 10; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&bio_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 11; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&bio_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_bio_5_keys(int major, int minor) { ++ u32 key = 12; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&bio_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 13; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&bio_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 14; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&bio_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_wbt_1_keys(int major, int minor) { ++ u32 key = 0; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&wbt_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 1; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&wbt_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 2; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&wbt_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_wbt_2_keys(int major, int minor) { ++ u32 key = 3; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&wbt_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 4; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&wbt_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 5; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&wbt_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_wbt_3_keys(int major, int minor) { ++ u32 key = 6; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&wbt_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 7; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&wbt_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 8; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&wbt_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_wbt_4_keys(int major, int minor) { ++ u32 key = 9; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&wbt_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 10; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&wbt_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 11; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&wbt_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++u32 find_matching_wbt_5_keys(int major, int minor) { ++ u32 key = 12; ++ struct stage_data *curr_data = bpf_map_lookup_elem(&wbt_res, &key); ++ ++ if (curr_data != NULL && curr_data->major == major && curr_data->first_minor == minor) { ++ return key; ++ } ++ ++ u32 key_2 = 13; ++ struct stage_data *curr_data_2 = bpf_map_lookup_elem(&wbt_res, &key_2); ++ ++ if (curr_data_2 != NULL && curr_data_2->major == major && curr_data_2->first_minor == minor) { ++ return key_2; ++ } ++ ++ u32 key_3 = 14; ++ struct stage_data *curr_data_3 = bpf_map_lookup_elem(&wbt_res, &key_3); ++ ++ if (curr_data_3 != NULL && curr_data_3->major == major && curr_data_3->first_minor == minor) { ++ return key_3; ++ } ++ ++ return MAP_SIZE + 1; ++} ++ ++SEC("kprobe/blk_mq_start_request") ++int kprobe_blk_mq_start_request(struct pt_regs *regs) ++{ ++ struct request *rq = (struct request *)PT_REGS_PARM1(regs); ++ struct gendisk *curr_rq_disk = _(rq->rq_disk); ++ int major = _(curr_rq_disk->major); ++ int first_minor = _(curr_rq_disk->first_minor); ++ unsigned int cmd_flags = _(rq->cmd_flags); ++ ++ struct io_counter *counterp, zero = {}; ++ ++ u32 key = find_matching_blk_1_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_blk_2_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_blk_3_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_blk_4_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_blk_5_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ return 0; ++ } ++ } ++ } ++ } ++ } ++ ++ init_io_counter(&zero, major, first_minor); ++ ++ counterp = bpf_map_lookup_elem(&blk_map, &rq); ++ if (counterp || major == 0) ++ return 0; ++ long err = bpf_map_update_elem(&blk_map, &rq, &zero, BPF_NOEXIST); ++ if (err) ++ return 0; ++ ++ u64 curr_start_range = zero.start_time / THRESHOLD / MAX_BUCKETS; ++ u64 update_bucket = curr_start_range % MAX_BUCKETS; ++ ++ struct update_params params = { ++ .major = major, ++ .first_minor = first_minor, ++ .cmd_flags = cmd_flags, ++ .update_bucket = update_bucket, ++ .curr_start_range = curr_start_range, ++ }; ++ ++ struct stage_data *curr_data; ++ curr_data = bpf_map_lookup_elem(&blk_res, &key); ++ if (!curr_data) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 0, ++ .finish_over_time = 0, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_start(&new_data, ¶ms); ++ bpf_map_update_elem(&blk_res, &key, &new_data, 0); ++ } else { ++ update_curr_data_in_start(curr_data, ¶ms); ++ } ++ ++ return 0; ++} ++ ++SEC("kprobe/blk_mq_free_request") ++int kprobe_blk_mq_free_request(struct pt_regs *regs) ++{ ++ struct request *rq = (struct request *)PT_REGS_PARM1(regs); ++ struct gendisk *curr_rq_disk = _(rq->rq_disk); ++ int major = _(curr_rq_disk->major); ++ int first_minor = _(curr_rq_disk->first_minor); ++ unsigned int cmd_flags = _(rq->cmd_flags); ++ ++ struct io_counter *counterp; ++ u32 key = find_matching_blk_1_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_blk_2_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_blk_3_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_blk_4_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_blk_5_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ return 0; ++ } ++ } ++ } ++ } ++ } ++ ++ counterp = bpf_map_lookup_elem(&blk_map, &rq); ++ ++ if (!counterp) { ++ return 0; ++ } ++ ++ u64 duration = bpf_ktime_get_ns() - counterp->start_time; ++ u64 curr_start_range = counterp->start_time / THRESHOLD / MAX_BUCKETS; ++ u64 update_bucket = curr_start_range % MAX_BUCKETS; ++ ++ struct update_params params = { ++ .major = major, ++ .first_minor = first_minor, ++ .cmd_flags = cmd_flags, ++ .update_bucket = update_bucket, ++ .curr_start_range = curr_start_range, ++ }; ++ ++ struct stage_data *curr_data; ++ curr_data = bpf_map_lookup_elem(&blk_res, &key); ++ if (curr_data == NULL && duration > DURATION_THRESHOLD) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 1, ++ .finish_over_time = 1, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_finish(&new_data, ¶ms); ++ bpf_map_update_elem(&blk_res, &key, &new_data, 0); ++ } else if (curr_data == NULL) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 1, ++ .finish_over_time = 0, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_finish(&new_data, ¶ms); ++ bpf_map_update_elem(&blk_res, &key, &new_data, 0); ++ } else { ++ if (curr_data->bucket[update_bucket].start_range == curr_start_range) { ++ curr_data->bucket[update_bucket].io_count = (curr_data->bucket[update_bucket].io_count > 1) ? curr_data->bucket[update_bucket].io_count - 1 : 0; ++ } else { ++ curr_data->bucket[MAX_BUCKETS].io_count = (curr_data->bucket[MAX_BUCKETS].io_count > 1) ? curr_data->bucket[MAX_BUCKETS].io_count - 1 : 0; ++ ++ } ++ curr_data->duration += duration; ++ update_curr_data_in_finish(curr_data, ¶ms, &duration); ++ } ++ ++ bpf_map_delete_elem(&blk_map, &rq); ++ return 0; ++} ++ ++SEC("kprobe/blk_mq_make_request") ++int kprobe_blk_mq_make_request(struct pt_regs *regs) ++{ ++ struct bio *bio = (struct bio *)PT_REGS_PARM2(regs); ++ struct gendisk *curr_rq_disk = _(bio->bi_disk); ++ int major = _(curr_rq_disk->major); ++ int first_minor = _(curr_rq_disk->first_minor); ++ unsigned int cmd_flags = _(bio->bi_opf); ++ ++ struct io_counter *counterp, zero = {}; ++ u32 key = find_matching_bio_1_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_bio_2_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_bio_3_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_bio_4_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_bio_5_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ return 0; ++ } ++ } ++ } ++ } ++ } ++ ++ init_io_counter(&zero, major, first_minor); ++ ++ counterp = bpf_map_lookup_elem(&bio_map, &bio); ++ if (counterp || major == 0) ++ return 0; ++ ++ long err = bpf_map_update_elem(&bio_map, &bio, &zero, BPF_NOEXIST); ++ if (err && err != -EEXIST) ++ return 0; ++ ++ u64 curr_start_range = zero.start_time / THRESHOLD / MAX_BUCKETS; ++ u64 update_bucket = curr_start_range % MAX_BUCKETS; ++ ++ struct update_params params = { ++ .major = major, ++ .first_minor = first_minor, ++ .cmd_flags = cmd_flags, ++ .update_bucket = update_bucket, ++ .curr_start_range = curr_start_range, ++ }; ++ ++ struct stage_data *curr_data; ++ curr_data = bpf_map_lookup_elem(&bio_res, &key); ++ if (curr_data == NULL) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 0, ++ .finish_over_time = 0, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_start(&new_data, ¶ms); ++ bpf_map_update_elem(&bio_res, &key, &new_data, 0); ++ } else { ++ update_curr_data_in_start(curr_data, ¶ms); ++ } ++ ++ return 0; ++} ++ ++SEC("kprobe/bio_endio") ++int kprobe_bio_endio(struct pt_regs *regs) ++{ ++ struct bio *bio = (struct bio *)PT_REGS_PARM1(regs); ++ struct gendisk *curr_rq_disk = _(bio->bi_disk); ++ int major = _(curr_rq_disk->major); ++ int first_minor = _(curr_rq_disk->first_minor); ++ unsigned int cmd_flags = _(bio->bi_opf); ++ ++ struct io_counter *counterp; ++ void *delete_map = NULL; ++ u32 key = find_matching_bio_1_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_bio_2_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_bio_3_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_bio_4_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_bio_5_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ return 0; ++ } ++ } ++ } ++ } ++ } ++ ++ counterp = bpf_map_lookup_elem(&bio_map, &bio); ++ ++ if (!counterp) { ++ return 0; ++ } ++ ++ delete_map = &bio_map; ++ ++ u64 duration = bpf_ktime_get_ns() - counterp->start_time; ++ u64 curr_start_range = counterp->start_time / THRESHOLD / MAX_BUCKETS; ++ u64 update_bucket = curr_start_range % MAX_BUCKETS; ++ ++ struct update_params params = { ++ .major = major, ++ .first_minor = first_minor, ++ .cmd_flags = cmd_flags, ++ .update_bucket = update_bucket, ++ .curr_start_range = curr_start_range, ++ }; ++ ++ struct stage_data *curr_data; ++ curr_data = bpf_map_lookup_elem(&bio_res, &key); ++ if (curr_data == NULL && duration > DURATION_THRESHOLD) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 1, ++ .finish_over_time = 1, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_finish(&new_data, ¶ms); ++ bpf_map_update_elem(&bio_res, &key, &new_data, 0); ++ } else if (curr_data == NULL) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 1, ++ .finish_over_time = 0, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_finish(&new_data, ¶ms); ++ bpf_map_update_elem(&bio_res, &key, &new_data, 0); ++ } else { ++ if (curr_data->bucket[update_bucket].start_range == curr_start_range) { ++ curr_data->bucket[update_bucket].io_count = (curr_data->bucket[update_bucket].io_count > 1) ? curr_data->bucket[update_bucket].io_count - 1 : 0; ++ } else { ++ curr_data->bucket[MAX_BUCKETS].io_count = (curr_data->bucket[MAX_BUCKETS].io_count > 1) ? curr_data->bucket[MAX_BUCKETS].io_count - 1 : 0; ++ ++ } ++ curr_data->duration += duration; ++ update_curr_data_in_finish(curr_data, ¶ms, &duration); ++ } ++ ++ bpf_map_delete_elem(delete_map, &bio); ++ return 0; ++} ++ ++SEC("kprobe/wbt_wait") ++int kprobe_wbt_wait(struct pt_regs *regs) ++{ ++ u64 wbtkey = bpf_get_current_task(); ++ u64 value = (u64)PT_REGS_PARM2(regs); ++ (void)bpf_map_update_elem(&wbt_args, &wbtkey, &value, BPF_ANY); ++ struct bio *bio = (struct bio *)value; ++ struct gendisk *curr_rq_disk = _(bio->bi_disk); ++ int major = _(curr_rq_disk->major); ++ int first_minor = _(curr_rq_disk->first_minor); ++ unsigned int cmd_flags = _(bio->bi_opf); ++ ++ struct io_counter *counterp, zero = {}; ++ u32 key = find_matching_wbt_1_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_wbt_2_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_wbt_3_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_wbt_4_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_wbt_5_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ return 0; ++ } ++ } ++ } ++ } ++ } ++ ++ init_io_counter(&zero, major, first_minor); ++ ++ counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey); ++ ++ if (counterp || major == 0) ++ return 0; ++ long err = bpf_map_update_elem(&wbt_map, &wbtkey, &zero, BPF_NOEXIST); ++ if (err) ++ return 0; ++ ++ u64 curr_start_range = zero.start_time / THRESHOLD / MAX_BUCKETS; ++ u64 update_bucket = curr_start_range % MAX_BUCKETS; ++ ++ struct update_params params = { ++ .major = major, ++ .first_minor = first_minor, ++ .cmd_flags = cmd_flags, ++ .update_bucket = update_bucket, ++ .curr_start_range = curr_start_range, ++ }; ++ ++ struct stage_data *curr_data; ++ curr_data = bpf_map_lookup_elem(&wbt_res, &key); ++ if (!curr_data) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 0, ++ .finish_over_time = 0, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_start(&new_data, ¶ms); ++ bpf_map_update_elem(&wbt_res, &key, &new_data, 0); ++ } else { ++ update_curr_data_in_start(curr_data, ¶ms); ++ } ++ ++ return 0; ++} ++ ++SEC("kretprobe/wbt_wait") ++int kretprobe_wbt_wait(struct pt_regs *regs) ++{ ++ struct bio *bio = NULL; ++ u64 *wbtargs = NULL; ++ u64 wbtkey = bpf_get_current_task(); ++ wbtargs = (u64 *)bpf_map_lookup_elem(&wbt_args, &wbtkey); ++ if (wbtargs == NULL) { ++ bpf_map_delete_elem(&wbt_args, &wbtkey); ++ return 0; ++ } ++ bio = (struct bio *)(*wbtargs); ++ struct gendisk *curr_rq_disk = _(bio->bi_disk); ++ int major = _(curr_rq_disk->major); ++ int first_minor = _(curr_rq_disk->first_minor); ++ unsigned int cmd_flags = _(bio->bi_opf); ++ ++ struct io_counter *counterp; ++ u32 key = find_matching_wbt_1_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_wbt_2_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_wbt_3_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_wbt_4_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_wbt_5_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ return 0; ++ } ++ } ++ } ++ } ++ } ++ ++ counterp = bpf_map_lookup_elem(&wbt_map, &wbtkey); ++ ++ if (!counterp) ++ return 0; ++ ++ u64 duration = bpf_ktime_get_ns() - counterp->start_time; ++ u64 curr_start_range = counterp->start_time / THRESHOLD / MAX_BUCKETS; ++ u64 update_bucket = curr_start_range % MAX_BUCKETS; ++ ++ struct update_params params = { ++ .major = major, ++ .first_minor = first_minor, ++ .cmd_flags = cmd_flags, ++ .update_bucket = update_bucket, ++ .curr_start_range = curr_start_range, ++ }; ++ ++ struct stage_data *curr_data; ++ curr_data = bpf_map_lookup_elem(&wbt_res, &key); ++ if (curr_data == NULL && duration > DURATION_THRESHOLD) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 1, ++ .finish_over_time = 1, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_finish(&new_data, ¶ms); ++ bpf_map_update_elem(&wbt_res, &key, &new_data, 0); ++ } else if (curr_data == NULL) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 1, ++ .finish_over_time = 0, ++ .duration = 0, ++ .io_type = "", ++ .major = major, ++ .first_minor = first_minor, ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_finish(&new_data, ¶ms); ++ bpf_map_update_elem(&wbt_res, &key, &new_data, 0); ++ } else { ++ if (curr_data->bucket[update_bucket].start_range == curr_start_range) { ++ curr_data->bucket[update_bucket].io_count = (curr_data->bucket[update_bucket].io_count > 1) ? curr_data->bucket[update_bucket].io_count - 1 : 0; ++ } else { ++ curr_data->bucket[MAX_BUCKETS].io_count = (curr_data->bucket[MAX_BUCKETS].io_count > 1) ? curr_data->bucket[MAX_BUCKETS].io_count - 1 : 0; ++ ++ } ++ curr_data->duration += duration; ++ update_curr_data_in_finish(curr_data, ¶ms, &duration); ++ } ++ ++ bpf_map_delete_elem(&wbt_map, &wbtkey); ++ bpf_map_delete_elem(&wbt_args, &wbtkey); ++ return 0; ++} ++ ++SEC("kprobe/blk_mq_get_tag") ++int kprobe_blk_mq_get_tag(struct pt_regs *regs) ++{ ++ u64 tagkey = bpf_get_current_task(); ++ u64 value = (u64)PT_REGS_PARM1(regs); ++ (void)bpf_map_update_elem(&tag_args, &tagkey, &value, BPF_ANY); ++ struct blk_mq_alloc_data *bd= (struct blk_mq_alloc_data *)value; ++ struct request_queue *q = _(bd->q); ++ struct backing_dev_info *backing_dev_info = _(q->backing_dev_info); ++ struct device *owner = _(backing_dev_info->owner); ++ dev_t devt = _(owner->devt); ++ int major = MAJOR(devt); ++ int first_minor = MINOR(devt); ++ unsigned int cmd_flags = 0; ++ ++ struct io_counter *counterp, zero = {}; ++ u32 key = find_matching_tag_1_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_tag_2_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_tag_3_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_tag_4_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_tag_5_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ return 0; ++ } ++ } ++ } ++ } ++ } ++ ++ init_io_counter(&zero, major, first_minor); ++ ++ counterp = bpf_map_lookup_elem(&tag_map, &tagkey); ++ if (counterp || major == 0) ++ return 0; ++ long err = bpf_map_update_elem(&tag_map, &tagkey, &zero, BPF_NOEXIST); ++ if (err) ++ return 0; ++ ++ u64 curr_start_range = zero.start_time / THRESHOLD / MAX_BUCKETS; ++ u64 update_bucket = curr_start_range % MAX_BUCKETS; ++ ++ struct update_params params = { ++ .major = major, ++ .first_minor = first_minor, ++ .cmd_flags = cmd_flags, ++ .update_bucket = update_bucket, ++ .curr_start_range = curr_start_range, ++ }; ++ ++ struct stage_data *curr_data; ++ curr_data = bpf_map_lookup_elem(&tag_res, &key); ++ if (!curr_data) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 0, ++ .finish_over_time = 0, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_start(&new_data, ¶ms); ++ bpf_map_update_elem(&tag_res, &key, &new_data, 0); ++ } else { ++ update_curr_data_in_start(curr_data, ¶ms); ++ } ++ ++ return 0; ++} ++ ++SEC("kretprobe/blk_mq_get_tag") ++int kretprobe_blk_mq_get_tag(struct pt_regs *regs) ++{ ++ u64 tagkey = bpf_get_current_task(); ++ u64 *tagargs = NULL; ++ struct blk_mq_alloc_data *bd = NULL; ++ ++ tagargs = (u64 *)bpf_map_lookup_elem(&tag_args, &tagkey); ++ if (tagargs == NULL) { ++ bpf_map_delete_elem(&tag_args, &tagkey); ++ return 0; ++ } ++ bd = (struct blk_mq_alloc_data *)(*tagargs); ++ struct request_queue *q = _(bd->q); ++ struct backing_dev_info *backing_dev_info = _(q->backing_dev_info); ++ struct device *owner = _(backing_dev_info->owner); ++ dev_t devt = _(owner->devt); ++ int major = MAJOR(devt); ++ int first_minor = MINOR(devt); ++ unsigned int cmd_flags = 0; ++ ++ struct io_counter *counterp; ++ u32 key = find_matching_tag_1_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_tag_2_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_tag_3_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_tag_4_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ key = find_matching_tag_5_keys(major, first_minor); ++ if (key >= MAP_SIZE){ ++ return 0; ++ } ++ } ++ } ++ } ++ } ++ ++ counterp = bpf_map_lookup_elem(&tag_map, &tagkey); ++ ++ if (!counterp) ++ return 0; ++ ++ u64 duration = bpf_ktime_get_ns() - counterp->start_time; ++ u64 curr_start_range = counterp->start_time / THRESHOLD / MAX_BUCKETS; ++ u64 update_bucket = curr_start_range % MAX_BUCKETS; ++ ++ struct update_params params = { ++ .major = major, ++ .first_minor = first_minor, ++ .cmd_flags = cmd_flags, ++ .update_bucket = update_bucket, ++ .curr_start_range = curr_start_range, ++ }; ++ ++ struct stage_data *curr_data; ++ curr_data = bpf_map_lookup_elem(&tag_res, &key); ++ if (curr_data == NULL && duration > DURATION_THRESHOLD) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 1, ++ .finish_over_time = 1, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_finish(&new_data, ¶ms); ++ bpf_map_update_elem(&tag_res, &key, &new_data, 0); ++ } else if (curr_data == NULL) { ++ struct stage_data new_data = { ++ .start_count = 1, ++ .finish_count = 1, ++ .finish_over_time = 0, ++ .duration = 0, ++ .major = major, ++ .first_minor = first_minor, ++ .io_type = "", ++ .bucket = { ++ [0] = {.start_range = 0, .io_count = 0}, ++ [1] = {.start_range = 0, .io_count = 0}, ++ }, ++ }; ++ update_new_data_in_finish(&new_data, ¶ms); ++ bpf_map_update_elem(&tag_res, &key, &new_data, 0); ++ } else { ++ if (curr_data->bucket[update_bucket].start_range == curr_start_range) { ++ curr_data->bucket[update_bucket].io_count = (curr_data->bucket[update_bucket].io_count > 1) ? curr_data->bucket[update_bucket].io_count - 1 : 0; ++ } else { ++ curr_data->bucket[MAX_BUCKETS].io_count = (curr_data->bucket[MAX_BUCKETS].io_count > 1) ? curr_data->bucket[MAX_BUCKETS].io_count - 1 : 0; ++ ++ } ++ curr_data->duration += duration; ++ update_curr_data_in_finish(curr_data, ¶ms, &duration); ++ } ++ bpf_map_delete_elem(&tag_map, &tagkey); ++ bpf_map_delete_elem(&tag_args, &tagkey); ++ return 0; ++} ++ ++char LICENSE[] SEC("license") = "Dual BSD/GPL"; ++u32 _version SEC("version") = LINUX_VERSION_CODE; ++ +diff --git a/src/c/ebpf_collector/ebpf_collector.c b/src/c/ebpf_collector/ebpf_collector.c +new file mode 100644 +index 0000000..88b9cb6 +--- /dev/null ++++ b/src/c/ebpf_collector/ebpf_collector.c +@@ -0,0 +1,274 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. ++ * Description: ebpf collector program ++ * Author: Zhang Nan ++ * Create: 2024-09-27 ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "ebpf_collector.h" ++ ++#define BLK_MAP (map_fd[0]) ++#define BLK_RES (map_fd[1]) ++#define BIO_MAP (map_fd[2]) ++#define BIO_RES (map_fd[3]) ++#define WBT_MAP (map_fd[4]) ++#define WBT_RES (map_fd[5]) ++#define TAG_MAP (map_fd[7]) ++#define TAG_RES (map_fd[8]) ++#define BPF_FILE "/usr/lib64/ebpf_collector.bpf.o" ++ ++typedef struct { ++ int major; ++ int minor; ++} DeviceInfo; ++ ++static volatile bool exiting; ++ ++const char argp_program_doc[] = ++"Show block device I/O pattern.\n" ++"\n" ++"USAGE: ebpf_collector [--help]\n" ++"\n" ++"EXAMPLES:\n" ++" ebpf_collector # show block I/O pattern\n"; ++ ++static const struct argp_option opts[] = { ++ { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, ++ {}, ++}; ++ ++static error_t parse_arg(int key, char *arg, struct argp_state *state) { ++ static int pos_args; ++ ++ switch (key) { ++ case 'h': ++ argp_state_help(state, stderr, ARGP_HELP_STD_HELP); ++ break; ++ default: ++ return ARGP_ERR_UNKNOWN; ++ } ++ return 0; ++} ++ ++static void sig_handler(int sig) ++{ ++ exiting = true; ++} ++ ++char* extract_device_name(const char *path) { ++ const char *dev_dir = "/dev/"; ++ char *name = strrchr(path, '/') + 1; ++ if (strncmp(dev_dir, path, strlen(dev_dir)) == 0) { ++ return strdup(name); ++ } ++ return NULL; ++} ++ ++ ++ ++char* find_device_name(dev_t dev) { ++ DIR *dir; ++ struct dirent *entry; ++ struct stat sb; ++ char *device_name = NULL; ++ char path[1024]; ++ ++ dir = opendir("/dev"); ++ if (dir == NULL) { ++ perror("Failed to open /dev"); ++ return NULL; ++ } ++ ++ while ((entry = readdir(dir)) != NULL) { ++ snprintf(path, sizeof(path), "/dev/%s", entry->d_name); ++ ++ if (entry->d_type == DT_DIR || entry->d_type == DT_LNK) { ++ continue; ++ } ++ ++ if (stat(path, &sb) == -1) { ++ continue; ++ } ++ ++ if (major(sb.st_rdev) == major(dev) && minor(sb.st_rdev) == minor(dev)) { ++ device_name = extract_device_name(path); ++ break; ++ } ++ } ++ ++ closedir(dir); ++ return device_name; ++} ++ ++static int print_map_res(struct bpf_map *map_res, char *stage, int *map_size) ++{ ++ int err; ++ struct stage_data counter; ++ int key = 0; ++ ++ struct sysinfo info; ++ sysinfo(&info); ++ ++ for (key = 0; key < map_size; key++) { ++ err = bpf_map_lookup_elem(map_res, &key, &counter); ++ if (err < 0) { ++ fprintf(stderr, "failed to lookup %s map_res: %d\n", stage, err); ++ return -1; ++ } ++ ++ size_t length = strlen(counter.io_type); ++ char io_type; ++ if (length > 0) { ++ io_type = counter.io_type[0]; ++ } else { ++ io_type = NULL; ++ } ++ int major = counter.major; ++ int first_minor = counter.first_minor; ++ dev_t dev = makedev(major, first_minor); ++ char *device_name = find_device_name(dev); ++ if (device_name && io_type) { ++ printf("%-7s %10llu %10llu %u %c %s\n", ++ stage, ++ counter.finish_count, ++ counter.duration, ++ counter.bucket[MAX_BUCKETS].io_count, ++ io_type, ++ device_name ++ ); ++ fflush(stdout); ++ } ++ } ++ ++ return 0; ++} ++ ++int init_map(int *map_fd, const char *map_name, int *map_size, DeviceInfo *devices) { ++ struct stage_data init_data = {0}; ++ struct stage_data counter; ++ int err; ++ memset(init_data.io_type, 0, sizeof(init_data.io_type)); ++ memset(init_data.bucket, 0, sizeof(init_data.bucket)); ++ ++ for (int i = 0; i < map_size; i++) { ++ init_data.major = devices[i].major; ++ init_data.first_minor = devices[i].minor; ++ if (bpf_map_update_elem(map_fd, &i, &init_data, BPF_ANY) != 0) { ++ printf("Failed to initialize map %s at index %u\n", map_name, i); ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++ ++int main(int argc, char **argv) { ++ struct partitions *partitions = NULL; ++ const struct partition *partition; ++ static const struct argp argp = { ++ .options = opts, ++ .parser = parse_arg, ++ .doc = argp_program_doc, ++ }; ++ int err; ++ char filename[256]; ++ DIR *dir; ++ struct dirent *entry; ++ char path[1024]; ++ int major, minor; ++ DeviceInfo devices[MAP_SIZE]; ++ int device_count = 0; ++ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; ++ setrlimit(RLIMIT_MEMLOCK, &r); ++ ++ err = argp_parse(&argp, argc, argv, 0, NULL, NULL); ++ if (err) ++ return err; ++ ++ snprintf(filename, sizeof(filename), BPF_FILE); ++ ++ if (load_bpf_file(filename)) { ++ return 1; ++ } ++ ++ signal(SIGINT, sig_handler); ++ ++ dir = opendir("/dev"); ++ if (dir == NULL) { ++ printf("Failed to open /dev directory"); ++ return EXIT_FAILURE; ++ } ++ ++ while ((entry = readdir(dir)) != NULL) { ++ if (entry->d_type == DT_BLK) { ++ snprintf(path, sizeof(path), "/dev/%s", entry->d_name); ++ struct stat statbuf; ++ if (lstat(path, &statbuf) == 0) { ++ if (S_ISBLK(statbuf.st_mode)) { ++ devices[device_count].major = major(statbuf.st_rdev); ++ devices[device_count].minor = minor(statbuf.st_rdev); ++ device_count++; ++ if (device_count >= MAP_SIZE) { ++ break; ++ } ++ } ++ } ++ } ++ } ++ ++ closedir(dir); ++ ++ if (init_map(BLK_RES, "blk_res_map", device_count, devices) != 0) { ++ return 1; ++ } ++ if (init_map(BIO_RES, "blo_res_map", device_count, devices) != 0) { ++ return 1; ++ } ++ if (init_map(WBT_RES, "wbt_res_map", device_count, devices) != 0) { ++ return 1; ++ } ++ if (init_map(TAG_RES, "tag_res_map", device_count, devices) != 0) { ++ return 1; ++ } ++ ++ for (;;) { ++ ++ sleep(1); ++ ++ err = print_map_res(BLK_RES, "rq_driver", device_count); ++ if (err) ++ break; ++ ++ err = print_map_res(BIO_RES, "bio", device_count); ++ if (err) ++ break; ++ ++ err = print_map_res(TAG_RES, "gettag", device_count); ++ if (err) ++ break; ++ ++ err = print_map_res(WBT_RES, "wbt", device_count); ++ if (err) ++ break; ++ ++ if (exiting) ++ break; ++ } ++ ++ return -err; ++} +diff --git a/src/c/ebpf_collector/ebpf_collector.h b/src/c/ebpf_collector/ebpf_collector.h +new file mode 100644 +index 0000000..1ae33de +--- /dev/null ++++ b/src/c/ebpf_collector/ebpf_collector.h +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. ++ * Description: ebpf collector program ++ * Author: Zhang Nan ++ * Create: 2024-09-27 ++ */ ++#ifndef __EBPFCOLLECTOR_H ++#define __EBPFCOLLECTOR_H ++ ++typedef long long unsigned int u64; ++typedef unsigned int u32; ++ ++#define MAX_BUCKETS 1 ++#define THRESHOLD 1000 ++#define DURATION_THRESHOLD 500000000 ++ ++#define RWBS_LEN 8 ++ ++#define REQ_OP_BITS 8 ++#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) ++#define REQ_FUA (1ULL << __REQ_FUA) ++#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) ++#define REQ_SYNC (1ULL << __REQ_SYNC) ++#define REQ_META (1ULL << __REQ_META) ++#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) ++#define REQ_OP_READ 0 ++#define REQ_OP_WRITE 1 ++#define REQ_OP_FLUSH 2 ++#define REQ_OP_DISCARD 3 ++#define REQ_OP_SECURE_ERASE 5 ++#define REQ_OP_WRITE_SAME 7 ++#define MAP_SIZE 128 ++ ++enum stage_type { ++ BIO=0, ++ WBT, ++ GET_TAG, ++ DEADLINE, ++ BFQ, ++ KYBER, ++ RQ_DRIVER, ++ MAX_STAGE_TYPE, ++}; ++ ++struct time_bucket { ++ u64 start_range; ++ u32 io_count; ++}; ++ ++struct stage_data { ++ u64 start_count; ++ u64 finish_count; ++ u64 finish_over_time; ++ u64 duration; ++ int major; ++ int first_minor; ++ char io_type[RWBS_LEN]; ++ struct time_bucket bucket[MAX_BUCKETS+1]; ++}; ++ ++struct io_counter { ++ u64 duration; ++ u64 start_time; ++ u32 isend; ++ int major; ++ int first_minor; ++}; ++ ++struct update_params { ++ int major; ++ int first_minor; ++ unsigned int cmd_flags; ++ u64 update_bucket; ++ u64 curr_start_range; ++}; ++ ++#endif /* __EBPFCOLLECTOR_H */ +diff --git a/src/python/sentryCollector/collect_io.py b/src/python/sentryCollector/collect_io.py +index 019d174..e45947a 100644 +--- a/src/python/sentryCollector/collect_io.py ++++ b/src/python/sentryCollector/collect_io.py +@@ -16,12 +16,18 @@ import os + import time + import logging + import threading ++import subprocess ++from typing import Union + + from .collect_config import CollectConfig + + Io_Category = ["read", "write", "flush", "discard"] + IO_GLOBAL_DATA = {} + IO_CONFIG_DATA = [] ++EBPF_GLOBAL_DATA = [] ++EBPF_PROCESS = None ++EBPF_STAGE_LIST = ["wbt", "rq_driver", "bio", "gettag"] ++EBPF_SUPPORT_VERSION = ["4.19.90"] + + class IoStatus(): + TOTAL = 0 +@@ -41,6 +47,8 @@ class CollectIo(): + self.disk_map_stage = {} + self.window_value = {} + ++ self.ebpf_base_path = 'ebpf_collector' ++ + self.loop_all = False + + if disk_str == "default": +@@ -62,7 +70,7 @@ class CollectIo(): + logging.error("The file %s does not exist", stats_file) + return -1 + except Exception as e: +- logging.error("An error occurred3: %s", e) ++ logging.error("An error occurred: %s", e) + return -1 + + curr_value = lines.strip().split('\n') +@@ -193,33 +201,109 @@ class CollectIo(): + IO_GLOBAL_DATA[disk_name] = {} + + return len(IO_GLOBAL_DATA) != 0 +- +- def main_loop(self): +- logging.info("collect io thread start") ++ ++ def is_ebpf_avaliable(self): ++ with open('/proc/version', 'r') as f: ++ kernel_version = f.read().split()[2] ++ major_version = kernel_version.split('-')[0] ++ ++ base_path = '/sys/kernel/debug/block' ++ for disk_name in os.listdir(base_path): ++ if not self.loop_all and disk_name not in self.disk_list: ++ continue ++ self.disk_map_stage[disk_name] = EBPF_STAGE_LIST ++ self.window_value[disk_name] = {} ++ IO_GLOBAL_DATA[disk_name] = {} + +- if not self.is_kernel_avaliable() or len(self.disk_map_stage) == 0: +- logging.warning("no disks meet the requirements. collect io thread exit") +- return +- + for disk_name, stage_list in self.disk_map_stage.items(): + for stage in stage_list: +- self.window_value[disk_name][stage] = [] ++ self.window_value[disk_name][stage] = {} + IO_GLOBAL_DATA[disk_name][stage] = {} + for category in Io_Category: + IO_GLOBAL_DATA[disk_name][stage][category] = [] ++ self.window_value[disk_name][stage][category] = [[0,0,0], [0,0,0]] + +- while True: +- start_time = time.time() ++ return major_version in EBPF_SUPPORT_VERSION and os.path.exists('/usr/bin/ebpf_collector') and len(IO_GLOBAL_DATA) != 0 ++ ++ def get_ebpf_raw_data( ++ self ++ ) -> None: ++ global EBPF_PROCESS ++ global EBPF_GLOBAL_DATA + ++ while True: + if self.stop_event.is_set(): + logging.debug("collect io thread exit") + return ++ line = EBPF_PROCESS.stdout.readline() ++ if not line: ++ logging.info("no ebpf data found, wait for collect") ++ break ++ EBPF_GLOBAL_DATA.append(line.strip()) ++ time.sleep(0.1) ++ ++ def update_ebpf_collector_data( ++ self, ++ ) -> None: ++ global EBPF_GLOBAL_DATA + ++ while True: ++ if self.stop_event.is_set(): ++ logging.debug("collect io thread exit") ++ return ++ if EBPF_GLOBAL_DATA: ++ for data in EBPF_GLOBAL_DATA: ++ data_list = data.split() ++ stage, finish_count, latency, io_dump, io_type ,disk_name = data_list ++ if disk_name not in self.window_value: ++ continue ++ io_type = self.get_ebpf_io_type(io_type) ++ if not io_type: ++ continue ++ if (len(self.window_value[disk_name][stage][io_type])) >= 2: ++ self.window_value[disk_name][stage][io_type].pop() ++ self.window_value[disk_name][stage][io_type].append([int(finish_count), int(latency), int(io_dump)]) ++ EBPF_GLOBAL_DATA.clear() ++ time.sleep(0.1) ++ ++ def get_ebpf_io_type( ++ self, ++ io_type: str ++ ) -> str: ++ io_type_mapping = { ++ "R": "read", ++ "W": "write", ++ "F": "flush", ++ "D": "discard" ++ } ++ io_type = io_type_mapping.get(io_type, None) ++ return io_type ++ ++ def append_ebpf_period_data( ++ self, ++ ) -> None: ++ global IO_GLOBAL_DATA ++ while True: ++ if self.stop_event.is_set(): ++ logging.debug("collect io thread exit") ++ return ++ start_time = time.time() + for disk_name, stage_list in self.disk_map_stage.items(): +- if self.get_blk_io_hierarchy(disk_name, stage_list) < 0: +- continue +- self.append_period_lat(disk_name, stage_list) +- ++ for stage in stage_list: ++ for io_type in Io_Category: ++ if len(self.window_value[disk_name][stage][io_type]) < 2: ++ return ++ if (len(IO_GLOBAL_DATA[disk_name][stage][io_type])) >= self.max_save: ++ IO_GLOBAL_DATA[disk_name][stage][io_type].pop() ++ curr_finish_count, curr_latency, curr_io_dump_count = self.window_value[disk_name][stage][io_type][-1] ++ prev_finish_count, prev_latency, prev_io_dump_count = self.window_value[disk_name][stage][io_type][-2] ++ self.window_value[disk_name][stage][io_type].pop(0) ++ self.window_value[disk_name][stage][io_type].insert(1, self.window_value[disk_name][stage][io_type][0]) ++ curr_lat = self.get_ebpf_latency_value(curr_latency=curr_latency, prev_latency=prev_latency, curr_finish_count=curr_finish_count, prev_finish_count=prev_finish_count) ++ curr_iops = self.get_ebpf_iops(curr_finish_count=curr_finish_count, prev_finish_count=prev_finish_count) ++ curr_io_length = self.get_ebpf_io_length(curr_latency=curr_latency, prev_latency=prev_latency) ++ curr_io_dump = self.get_ebpf_io_dump(curr_io_dump_count=curr_io_dump_count, prev_io_dump_count=prev_io_dump_count) ++ IO_GLOBAL_DATA[disk_name][stage][io_type].insert(0, [curr_lat, curr_iops, curr_io_length, curr_io_dump]) + elapsed_time = time.time() - start_time + sleep_time = self.period_time - elapsed_time + if sleep_time < 0: +@@ -231,6 +315,133 @@ class CollectIo(): + time.sleep(1) + sleep_time -= 1 + time.sleep(sleep_time) ++ ++ def get_ebpf_latency_value( ++ self, ++ curr_latency: int, ++ prev_latency: int, ++ curr_finish_count: int, ++ prev_finish_count: int ++ ) -> Union[int, float]: ++ finish = curr_finish_count - prev_finish_count ++ lat_time = curr_latency - prev_latency ++ if finish <= 0 or lat_time <= 0: ++ return 0 ++ value = lat_time / finish / 1000 / 1000 ++ if value.is_integer(): ++ return int(value) ++ else: ++ return round(value, 1) ++ ++ def get_ebpf_iops( ++ self, ++ curr_finish_count: int, ++ prev_finish_count: int ++ ) -> Union[int, float]: ++ finish = curr_finish_count - prev_finish_count ++ if finish <= 0: ++ return 0 ++ value = finish / self.period_time / 1000 / 1000 ++ if value.is_integer(): ++ return int(value) ++ else: ++ return round(value, 1) ++ ++ def get_ebpf_io_length( ++ self, ++ curr_latency: int, ++ prev_latency: int, ++ ) -> Union[int, float]: ++ lat_time = curr_latency - prev_latency ++ if lat_time <= 0: ++ return 0 ++ value = lat_time / self.period_time ++ if value.is_integer(): ++ return int(value) ++ else: ++ return round(value, 1) ++ ++ def get_ebpf_io_dump( ++ self, ++ curr_io_dump_count: int, ++ prev_io_dump_count: int ++ ) -> Union[int, float]: ++ io_dump_count = curr_io_dump_count - prev_io_dump_count ++ if io_dump_count <= 0: ++ return 0 ++ value = io_dump_count ++ return int(value) ++ ++ def start_ebpf_subprocess( ++ self ++ ) -> None: ++ global EBPF_PROCESS ++ EBPF_PROCESS = subprocess.Popen(self.ebpf_base_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) ++ ++ def stop_ebpf_subprocess( ++ self ++ ) -> None: ++ global EBPF_PROCESS ++ if EBPF_PROCESS: ++ EBPF_PROCESS.terminate() ++ EBPF_PROCESS.wait() ++ logging.info("ebpf collector thread exit") ++ ++ def main_loop(self): ++ global IO_GLOBAL_DATA ++ logging.info("collect io thread start") ++ ++ if self.is_kernel_avaliable() and len(self.disk_map_stage) != 0: ++ for disk_name, stage_list in self.disk_map_stage.items(): ++ for stage in stage_list: ++ self.window_value[disk_name][stage] = [] ++ IO_GLOBAL_DATA[disk_name][stage] = {} ++ for category in Io_Category: ++ IO_GLOBAL_DATA[disk_name][stage][category] = [] ++ ++ while True: ++ start_time = time.time() ++ ++ if self.stop_event.is_set(): ++ logging.debug("collect io thread exit") ++ return ++ ++ for disk_name, stage_list in self.disk_map_stage.items(): ++ if self.get_blk_io_hierarchy(disk_name, stage_list) < 0: ++ continue ++ self.append_period_lat(disk_name, stage_list) ++ ++ elapsed_time = time.time() - start_time ++ sleep_time = self.period_time - elapsed_time ++ if sleep_time < 0: ++ continue ++ while sleep_time > 1: ++ if self.stop_event.is_set(): ++ logging.debug("collect io thread exit") ++ return ++ time.sleep(1) ++ sleep_time -= 1 ++ time.sleep(sleep_time) ++ elif self.is_ebpf_avaliable(): ++ self.start_ebpf_subprocess() ++ ++ thread_get_data = threading.Thread(target=self.get_ebpf_raw_data) ++ thread_update_data = threading.Thread(target=self.update_ebpf_collector_data) ++ thread_append_data = threading.Thread(target=self.append_ebpf_period_data) ++ ++ thread_get_data.start() ++ thread_update_data.start() ++ thread_append_data.start() ++ ++ thread_get_data.join() ++ thread_update_data.join() ++ thread_append_data.join() ++ ++ self.stop_ebpf_subprocess() ++ logging.info("ebpf collector thread exits") ++ else: ++ logging.warning("fail to start ebpf collector thread. collect io thread exits") ++ return + + # set stop event, notify thread exit + def stop_thread(self): +diff --git a/src/python/sentryPlugins/avg_block_io/avg_block_io.py b/src/python/sentryPlugins/avg_block_io/avg_block_io.py +index ac35be2..a83bd9b 100644 +--- a/src/python/sentryPlugins/avg_block_io/avg_block_io.py ++++ b/src/python/sentryPlugins/avg_block_io/avg_block_io.py +@@ -114,7 +114,7 @@ def read_config_lat_iodump(io_dic, config): + common_param = {} + lat_sec = None + if not config.has_section("latency"): +- logging.warning("Cannot find algorithm section in config file") ++ logging.warning("Cannot find latency section in config file") + else: + lat_sec = config["latency"] + +@@ -122,7 +122,7 @@ def read_config_lat_iodump(io_dic, config): + if not config.has_section("iodump"): + logging.warning("Cannot find iodump section in config file") + else: +- lat_sec = config["iodump"] ++ iodump_sec = config["iodump"] + + if not lat_sec and not iodump_sec: + return common_param +-- +2.33.0 \ No newline at end of file diff --git a/sysSentry.spec b/sysSentry.spec index 7f3e833..1024e38 100644 --- a/sysSentry.spec +++ b/sysSentry.spec @@ -4,7 +4,7 @@ Summary: System Inspection Framework Name: sysSentry Version: 1.0.2 -Release: 18 +Release: 19 License: Mulan PSL v2 Group: System Environment/Daemons Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -30,12 +30,15 @@ Patch17: optimize-the-handing-of-cat-cli-error-msg-in-cpu_sentry.patch Patch18: over-threshold-should-be-warn-level-log-in-cat-cli.patch Patch19: fix-bug-step-2-about-collect-module-and-avg-block-io.patch Patch20: add-log-level-and-change-log-format.patch +Patch21: add-ebpf-collector.patch BuildRequires: cmake gcc-c++ BuildRequires: python3 python3-setuptools BuildRequires: json-c-devel BuildRequires: chrpath +BuildRequires: elfutils-devel clang libbpf-devel llvm kernel-source kernel-devel Requires: libxalarm = %{version} +Requires: libbpf %description sysSentry provides framework tools for system inspection. @@ -100,6 +103,10 @@ make popd popd +pushd src/c/ebpf_collector +make +popd + %install # sysSentry mkdir -p %{buildroot}%{_bindir} @@ -111,6 +118,8 @@ install -d -m 700 %{buildroot}/etc/sysSentry/tasks/ install -d -m 700 %{buildroot}/etc/sysSentry/plugins/ install -m 600 config/inspect.conf %{buildroot}%{_sysconfdir}/sysSentry install -m 600 service/sysSentry.service %{buildroot}%{_unitdir} +install -m 755 src/c/ebpf_collector/ebpf_collector %{buildroot}%{_bindir} +install -m 755 src/c/ebpf_collector/output/ebpf_collector.bpf.o /usr/lib64 # xalarm sh build/build.sh -i %{buildroot}%{_libdir} @@ -187,6 +196,7 @@ rm -rf %{buildroot} %attr(0750,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/plugins %attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/inspect.conf %attr(0600,root,root) %{_unitdir}/sysSentry.service +%attr(0755,root,root) %{_bindir}/ebpf_collector # xalarm %attr(0550,root,root) %{_bindir}/xalarmd @@ -248,7 +258,13 @@ rm -rf %{buildroot} %attr(0550,root,root) %{python3_sitelib}/sentryPlugins/ai_threshold_slow_io_detection %changelog -* Fri Sep 27 2024 zhuofeng - 1.0.2-18 +* Fri Sep 27 2024 zhangnan - 1.0.2-19 +- Type:requirement +- CVE:NA +- SUG:NA +- DESC:add ebpf collector + +* Fri Sep 27 2024 zhuofeng - 1.0.2-17 - Type:bugfix - CVE:NA - SUG:NA @@ -357,3 +373,4 @@ rm -rf %{buildroot} - CVE:NA - SUG:NA - DESC:Package init + -- Gitee From 6af4ef5434253e5e3d86ede96f7e8376ec4542f5 Mon Sep 17 00:00:00 2001 From: znzjugod Date: Mon, 30 Sep 2024 07:56:17 +0000 Subject: [PATCH 2/3] update add-ebpf-collector.patch. Signed-off-by: znzjugod --- add-ebpf-collector.patch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/add-ebpf-collector.patch b/add-ebpf-collector.patch index 189047a..595d184 100644 --- a/add-ebpf-collector.patch +++ b/add-ebpf-collector.patch @@ -2835,7 +2835,7 @@ index 0000000..88b9cb6 +#define WBT_RES (map_fd[5]) +#define TAG_MAP (map_fd[7]) +#define TAG_RES (map_fd[8]) -+#define BPF_FILE "/usr/lib64/ebpf_collector.bpf.o" ++#define BPF_FILE "/usr/lib/ebpf_collector.bpf.o" + +typedef struct { + int major; -- Gitee From 8aeb2e6a368635041abb56be4617148582854552 Mon Sep 17 00:00:00 2001 From: znzjugod Date: Mon, 30 Sep 2024 07:58:08 +0000 Subject: [PATCH 3/3] update sysSentry.spec. Signed-off-by: znzjugod --- sysSentry.spec | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sysSentry.spec b/sysSentry.spec index 1024e38..3077fc7 100644 --- a/sysSentry.spec +++ b/sysSentry.spec @@ -4,7 +4,7 @@ Summary: System Inspection Framework Name: sysSentry Version: 1.0.2 -Release: 19 +Release: 18 License: Mulan PSL v2 Group: System Environment/Daemons Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -119,7 +119,7 @@ install -d -m 700 %{buildroot}/etc/sysSentry/plugins/ install -m 600 config/inspect.conf %{buildroot}%{_sysconfdir}/sysSentry install -m 600 service/sysSentry.service %{buildroot}%{_unitdir} install -m 755 src/c/ebpf_collector/ebpf_collector %{buildroot}%{_bindir} -install -m 755 src/c/ebpf_collector/output/ebpf_collector.bpf.o /usr/lib64 +install -m 600 src/c/ebpf_collector/output/ebpf_collector.bpf.o %{buildroot}/usr/lib # xalarm sh build/build.sh -i %{buildroot}%{_libdir} @@ -197,6 +197,7 @@ rm -rf %{buildroot} %attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysSentry/inspect.conf %attr(0600,root,root) %{_unitdir}/sysSentry.service %attr(0755,root,root) %{_bindir}/ebpf_collector +%attr(0600,root,root) /usr/lib/ebpf_collector.bpf.o # xalarm %attr(0550,root,root) %{_bindir}/xalarmd @@ -258,7 +259,7 @@ rm -rf %{buildroot} %attr(0550,root,root) %{python3_sitelib}/sentryPlugins/ai_threshold_slow_io_detection %changelog -* Fri Sep 27 2024 zhangnan - 1.0.2-19 +* Fri Sep 27 2024 zhangnan - 1.0.2-18 - Type:requirement - CVE:NA - SUG:NA -- Gitee