diff --git a/source/tools/detect/cgtool/Makefile b/source/tools/detect/cgtool/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..15eeed40e22edfd3d76c07f0f3571f25faf67458 --- /dev/null +++ b/source/tools/detect/cgtool/Makefile @@ -0,0 +1,3 @@ +target := cgtool + +include $(SRC)/mk/sub.mk diff --git a/source/tools/detect/cgtool/README.md b/source/tools/detect/cgtool/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4d6d0ec4bf8954e2f78e9725de5d3545beb45952 --- /dev/null +++ b/source/tools/detect/cgtool/README.md @@ -0,0 +1,127 @@ +1. cgtool工具介绍 +cgtool(Cgroup Tool)是Cgroup子系统流程跟踪和问题诊断工具集。主要包括: + memory_usage: 统计memcg子系统中进程内存使用量; + cpuacct_load:统计cpuacct子系统中cpu的aveload; + +2. 代码目录 + cgrun: cgtool执行脚本 + cpuacct_load:cpuacct_load工具目录 + memory_usage:memory_usage工具目录 + cgtool*.h:公共库文件 + +3. 编译 +./configure --enable-libbpf --enable-target-cgtool --enable-target-cgtool +make + +4. 工具使用 + +4.1 注意事项 +* 当前工具只在4.19.91-007.ali4000.alios7.x86_64开发适配,其他内核版本暂不支持; + +4.2 cgtool +4.2.1 命令说明 +sysak cgtool [options] [cgtool [cgtoolargs]] + options: -h, help information + -l, list all tools for cgroup + cgtool: + tool name for list + cgtoolargs: + args for the tool, -h get more + +4.2.2 举例 +#举例1:列出所支持的tools +#sysak cgtool -l +memory_usage # Tracing memory usage of the memory cgroup +cpuacct_load # Tracing cpu load for the cpuacct cgroup + +4.3 memory_usage +4.3.1 命令说明 +sysak cgtool memory_usage [OPTION...] + -b, --btf=BTF_PATH Specify path of the custom btf + -d, --debug Enable libbpf debug output + -f, --dir=dir cgroup dir + -t, --timeout=time time out + -?, --help Give this help list + --usage Give a short usage message + -V, --version Print program version + +4.3.2 举例 +#举例1:统计memcg目录1秒总体进程内存使用情况 +#sysak cgtool memory_usage -t 1 +task number:4 cgroup dir:/sys/fs/cgroup/memory/ + PID TID COMM PGSIZE +----------------------------------- +29084 29084 systemd-cgroups 31 +29080 29080 systemd-cgroups 31 +29055 29055 systemd-cgroups 30 +1 1 systemd 144 + +task number:2 cgroup dir:/sys/fs/cgroup/memory/yagent_script + PID TID COMM PGSIZE +----------------------------------- +29025 29025 sh 151 +29089 29089 hostinfo 71 + +#举例2: 统计单一memcg组1秒进程内存使用情况 +#sysak cgtool memory_usage -t 1 -f /sys/fs/cgroup/memory/user.slice +task number:5 cgroup dir:/sys/fs/cgroup/memory/user.slice + PID TID COMM PGSIZE +----------------------------------- +31085 31085 grep 49 +31087 31087 tail 28 +31084 31084 ps 121 +31083 31083 sh 85 +31086 31086 awk 48 + + +4.4 cpuacct_load +4.3.1 命令说明 +sysak cgtool cpuacct_load [OPTION...] + -b, --btf=BTF_PATH Specify path of the custom btf + -d, --debug Enable libbpf debug output + -f, --dir=dir cgroup dir + -t, --timeout=time time out + -?, --help Give this help list + --usage Give a short usage message + -V, --version Print program version + +4.3.2 举例 +#举例1:统计cpuacct目录60秒负载 +#sysak cgtool cpuacct_load -t 60 +cgroup dir:/sys/fs/cgroup/cpuacct/h2o +times: +avenrun0: 126 115 105 96 +avenrun1: 31 30 29 28 +avenrun2: 8 7 6 5 +load0: 0.06 0.06 0.05 0.05 +load1: 0.02 0.01 0.01 0.01 +load2: 0.00 0.00 0.00 0.00 + +cgroup dir:/sys/fs/cgroup/cpuacct/docker +times: +avenrun0: 0 0 0 0 +avenrun1: 37 36 35 34 +avenrun2: 24 23 22 21 +load0: 0.00 0.00 0.00 0.00 +load1: 0.02 0.02 0.02 0.02 +load2: 0.01 0.01 0.01 0.01 + +cgroup dir:/sys/fs/cgroup/cpuacct/docker/26f45842eb4304617e0d121a384a7a0fcb7c25e0420771424102dde7f3886a28 +times: +avenrun0: 0 0 0 0 +avenrun1: 37 36 35 34 +avenrun2: 24 23 22 21 +load0: 0.00 0.00 0.00 0.00 +load1: 0.02 0.02 0.02 0.02 +load2: 0.01 0.01 0.01 0.01 + +#举例2: 统计单一cpuacct组60秒负载 +#sysak cpuacct_load -t 60 -f /sys/fs/cgroup/cpuacct/h2o +cgroup dir:/sys/fs/cgroup/cpuacct/h2o +times: +avenrun0: 232 213 195 179 164 315 +avenrun1: 60 59 58 57 56 90 +avenrun2: 15 14 13 12 11 22 +load0: 0.11 0.10 0.10 0.09 0.08 0.15 +load1: 0.03 0.03 0.03 0.03 0.03 0.04 +load2: 0.01 0.01 0.01 0.01 0.01 0.01 diff --git a/source/tools/detect/cgtool/cgrun/Makefile b/source/tools/detect/cgtool/cgrun/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..0407c5a4fd8ab1692a3e0d0efcaf9d771495c9ba --- /dev/null +++ b/source/tools/detect/cgtool/cgrun/Makefile @@ -0,0 +1,3 @@ +target = cgtool + +include $(SRC)/mk/sh.mk diff --git a/source/tools/detect/cgtool/cgrun/cgtool.sh b/source/tools/detect/cgtool/cgrun/cgtool.sh new file mode 100755 index 0000000000000000000000000000000000000000..197a99d115e0d585e9c39cd856eb9c0d380ae4bf --- /dev/null +++ b/source/tools/detect/cgtool/cgrun/cgtool.sh @@ -0,0 +1,67 @@ +#!/bin/bash +#****************************************************************# +# ScriptName: cgtool.sh +# Author: Bixuan Cui +# Create Date: 2022-01-07 +# Function: +#***************************************************************# +if [ "$SYSAK_WORK_PATH" != "" ]; then +WORK_PATH=$SYSAK_WORK_PATH +else +WORK_PATH=/usr/local/sbin/.sysak_compoents +fi + +MEMORYUSAGE_BIN=$WORK_PATH/tools/memory_usage +CPUACCTLOAD_BIN=$WORK_PATH/tools/cpuacct_load + +# arguments +usage() +{ + echo "cgtool: tools for analyzing cgroups" + echo "Usage:" + echo " sysak cgtool [options] [cgtool [cgtoolargs]]" + echo " options: -h, help information" + echo " -l, list all tools for cgroup" + echo " cgtool:" + echo " tool name for list" + echo " cgtoolargs:" + echo " args for the tool, -h get more" + echo "Examples:" + echo " sysak cgtool -l" + echo " sysak cgtool usage -h" +} + +trace_list() +{ + echo "memory_usage # Tracing memory usage of the memory cgroup" + echo "cpuacct_load # Tracing cpu load for the cpuacct cgroup" +} + +while getopts 'hl' OPT; do + case $OPT in + "h") + usage + exit 0 + ;; + "l") + trace_list + exit 0 + ;; + *) + usage + exit 0 + ;; + esac +done + +cgtool=${@:$OPTIND:1} +cgtoolcmd=${*:$OPTIND+1}; + +if [ "X${cgtool}" == "Xmemory_usage" ]; then + $MEMORYUSAGE_BIN $cgtoolcmd +elif [ "X${cgtool}" == "Xcpuacct_load" ]; then + $CPUACCTLOAD_BIN $cgtoolcmd +else + echo "not support tool:${cgtool}" + exit -1 +fi diff --git a/source/tools/detect/cgtool/cgtool_comm.h b/source/tools/detect/cgtool/cgtool_comm.h new file mode 100644 index 0000000000000000000000000000000000000000..07af7325e5bfafc608c64a7594086449705cf9b1 --- /dev/null +++ b/source/tools/detect/cgtool/cgtool_comm.h @@ -0,0 +1,296 @@ +#ifndef __CGTRACE_COMM_H +#define __CGTRACE_COMM_H + +typedef u32 uint32_t; +typedef u64 uint64_t; +typedef u16 uint16_t; +typedef u8 uint8_t; + +struct mm_struct___MEMCG { + struct { + struct vm_area_struct *mmap; + struct rb_root mm_rb; + u64 vmacache_seqnum; + long unsigned int (*get_unmapped_area)(struct file *, long unsigned int, long unsigned int, long unsigned int, long unsigned int); + long unsigned int mmap_base; + long unsigned int mmap_legacy_base; + long unsigned int mmap_compat_base; + long unsigned int mmap_compat_legacy_base; + long unsigned int task_size; + long unsigned int highest_vm_end; + pgd_t *pgd; + atomic_t mm_users; + atomic_t mm_count; + atomic_long_t pgtables_bytes; + int map_count; + spinlock_t page_table_lock; + struct rw_semaphore mmap_sem; + struct list_head mmlist; + long unsigned int hiwater_rss; + long unsigned int hiwater_vm; + long unsigned int total_vm; + long unsigned int locked_vm; + long unsigned int pinned_vm; + long unsigned int data_vm; + long unsigned int exec_vm; + long unsigned int stack_vm; + long unsigned int def_flags; + spinlock_t arg_lock; + long unsigned int start_code; + long unsigned int end_code; + long unsigned int start_data; + long unsigned int end_data; + long unsigned int start_brk; + long unsigned int brk; + long unsigned int start_stack; + long unsigned int arg_start; + long unsigned int arg_end; + long unsigned int env_start; + long unsigned int env_end; + long unsigned int saved_auxv[46]; + struct mm_rss_stat rss_stat; + struct linux_binfmt *binfmt; + mm_context_t context; + long unsigned int flags; + struct core_state *core_state; + atomic_t membarrier_state; + spinlock_t ioctx_lock; + struct kioctx_table *ioctx_table; + struct task_struct *owner; + struct user_namespace *user_ns; + struct file *exe_file; + struct mmu_notifier_mm *mmu_notifier_mm; + long unsigned int numa_next_scan; + long unsigned int numa_scan_offset; + int numa_scan_seq; + atomic_t tlb_flush_pending; + bool tlb_flush_batched; + struct uprobes_state uprobes_state; + atomic_long_t hugetlb_usage; + struct work_struct async_put_work; + }; + long unsigned int ali_reserved1; + long unsigned int ali_reserved2; + long unsigned int cpu_bitmap[0]; +}; + +struct page_counter { + atomic_long_t usage; + long unsigned int min; + long unsigned int low; + long unsigned int max; + struct page_counter *parent; + long unsigned int emin; + atomic_long_t min_usage; + atomic_long_t children_min_usage; + long unsigned int elow; + atomic_long_t low_usage; + atomic_long_t children_low_usage; + long unsigned int wmark_low; + long unsigned int wmark_high; + long unsigned int watermark; + long unsigned int failcnt; +}; + +struct vmpressure { + long unsigned int scanned; + long unsigned int reclaimed; + long unsigned int tree_scanned; + long unsigned int tree_reclaimed; + struct spinlock sr_lock; + struct list_head events; + struct mutex events_lock; + struct work_struct work; +}; + +struct mem_cgroup_thresholds { + struct mem_cgroup_threshold_ary *primary; + struct mem_cgroup_threshold_ary *spare; +}; + +struct memcg_padding { + char x[0]; +}; + +enum memcg_kmem_state { + KMEM_NONE = 0, + KMEM_ALLOCATED = 1, + KMEM_ONLINE = 2, +}; + +struct mem_cgroup_id { + int id; + atomic_t ref; +}; + +struct deferred_split { + spinlock_t split_queue_lock; + struct list_head split_queue; + long unsigned int split_queue_len; +}; + +union kernfs_node_id { + struct { + u32 ino; + u32 generation; + }; + u64 id; +}; + +struct mem_cgroup { + struct cgroup_subsys_state css; + struct mem_cgroup_id id; + struct page_counter memory; + struct page_counter swap; + struct page_counter memsw; + struct page_counter kmem; + struct page_counter tcpmem; + long unsigned int high; + struct work_struct high_work; + long unsigned int soft_limit; + struct vmpressure vmpressure; + bool use_hierarchy; + bool oom_group; + bool oom_lock; + int under_oom; + int swappiness; + int oom_kill_disable; + struct cgroup_file events_file; + struct cgroup_file swap_events_file; + struct mutex thresholds_lock; + struct mem_cgroup_thresholds thresholds; + struct mem_cgroup_thresholds memsw_thresholds; + struct list_head oom_notify; + long unsigned int move_charge_at_immigrate; + spinlock_t move_lock; + long unsigned int move_lock_flags; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + struct memcg_padding _pad1_; + atomic_t moving_account; + struct task_struct *move_lock_task; + struct mem_cgroup_stat_cpu *stat_cpu; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + struct memcg_padding _pad2_; + atomic_long_t stat[35]; + atomic_long_t events[84]; + atomic_long_t memory_events[7]; + long unsigned int socket_pressure; + bool tcpmem_active; + int tcpmem_pressure; + unsigned int wmark_ratio; + struct work_struct wmark_work; + unsigned int wmark_scale_factor; + int kmemcg_id; + enum memcg_kmem_state kmem_state; + struct list_head kmem_caches; + int last_scanned_node; + nodemask_t scan_nodes; + atomic_t numainfo_events; + atomic_t numainfo_updating; + struct list_head cgwb_list; + struct wb_domain cgwb_domain; + struct list_head event_list; + spinlock_t event_list_lock; + struct deferred_split deferred_split_queue; + long unsigned int ali_reserved1; + long unsigned int ali_reserved2; + long unsigned int ali_reserved3; + long unsigned int ali_reserved4; + struct mem_cgroup_per_node *nodeinfo[0]; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; +}; + +struct cgroup___MEMCG { + struct cgroup_subsys_state self; + long unsigned int flags; + int id; + int level; + int max_depth; + int nr_descendants; + int nr_dying_descendants; + int max_descendants; + int nr_populated_csets; + int nr_populated_domain_children; + int nr_populated_threaded_children; + int nr_threaded_children; + struct kernfs_node *kn; + struct cgroup_file procs_file; + struct cgroup_file events_file; + u16 subtree_control; + u16 subtree_ss_mask; + u16 old_subtree_control; + u16 old_subtree_ss_mask; + struct cgroup_subsys_state *subsys[13]; + struct cgroup_root *root; + struct list_head cset_links; + struct list_head e_csets[13]; + struct cgroup *dom_cgrp; + struct cgroup *old_dom_cgrp; + struct cgroup_rstat_cpu *rstat_cpu; + struct list_head rstat_css_list; + struct cgroup_base_stat pending_bstat; + struct cgroup_base_stat bstat; + struct prev_cputime prev_cputime; + struct list_head pidlists; + struct mutex pidlist_mutex; + wait_queue_head_t offline_waitq; + struct work_struct release_agent_work; + struct psi_group psi; + struct cgroup_bpf bpf; + atomic_t congestion_count; + int ancestor_ids[0]; +}; + +struct kernfs_node___419 { + atomic_t count; + atomic_t active; + struct kernfs_node *parent; + const char *name; + struct rb_node rb; + const void *ns; + unsigned int hash; + union { + struct kernfs_elem_dir dir; + struct kernfs_elem_symlink symlink; + struct kernfs_elem_attr attr; + }; + void *priv; + union kernfs_node_id id; + short unsigned int flags; + umode_t mode; + struct kernfs_iattrs *iattr; +}; + +struct cpuacct___AVE { + struct cgroup_subsys_state css; + struct cpuacct_usage *cpuusage; + struct cpuacct_alistats *alistats; + struct kernel_cpustat *cpustat; + long unsigned int avenrun[3]; + long unsigned int avenrun_r[3]; + struct list_head sli_list; + bool sli_enabled; + struct sched_cgroup_lat_stat_cpu *lat_stat_cpu; + u64 next_load_update; + long unsigned int ali_reserved1; + long unsigned int ali_reserved2; + long unsigned int ali_reserved3; + long unsigned int ali_reserved4; +}; + +#endif diff --git a/source/tools/detect/cgtool/cgtoollib.h b/source/tools/detect/cgtool/cgtoollib.h new file mode 100644 index 0000000000000000000000000000000000000000..96e19e9b766f7eca3939c186358b0bb6d3b2acea --- /dev/null +++ b/source/tools/detect/cgtool/cgtoollib.h @@ -0,0 +1,50 @@ +#ifndef __CGTRACELIB_H +#define __CGTRACELIB_H + +#include +#include +#include +#include + +#define CMD_LEN 4096 + +static int get_dir_by_knid(unsigned int kn_id, char *sub, char *buf, unsigned int size) +{ + FILE *fp = NULL; + char cmd[CMD_LEN]; + + sprintf(cmd, "find /sys/fs/cgroup/%s/ -inum %u", sub, kn_id); + + fp = popen(cmd,"r"); + if (fp == NULL) + return -1; + + fgets(buf, size, fp); + + pclose(fp); + + return 0; +} + +static unsigned int get_knid_by_dir(char *dir) +{ + struct stat buf; + stat(dir, &buf); + + return buf.st_ino; +} + +static void bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +#endif diff --git a/source/tools/detect/cgtool/cgtoollib_bpf.h b/source/tools/detect/cgtool/cgtoollib_bpf.h new file mode 100644 index 0000000000000000000000000000000000000000..80e553b1e365026f9f9d30241a1eb8065409294c --- /dev/null +++ b/source/tools/detect/cgtool/cgtoollib_bpf.h @@ -0,0 +1,31 @@ +#ifndef __CGTRACELIB_BPF_H +#define __CGTRACELIB_BPF_H + +#include +#include +#include +#include +#include + +#include "cgtool_comm.h" + +#define BPF_ANY 0 +#define NULL ((void*)0) + +static u64 get_knid_by_cgroup(struct cgroup___MEMCG *cgrp) +{ + struct kernfs_node___419 *node; + union kernfs_node_id id; + unsigned int knid; + + if (bpf_core_read(&node, sizeof(struct kernfs_node___419 *), &cgrp->kn)) + return 0; + if (bpf_core_read(&id, sizeof(union kernfs_node_id), &node->id)) + return 0; + if (bpf_core_read(&knid, sizeof(u64), &id.id)) + return 0; + + return knid; +} + +#endif diff --git a/source/tools/detect/cgtool/cpuacct_load/Makefile b/source/tools/detect/cgtool/cpuacct_load/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1f2ff3d4375bad7bed26fbd83abf03e1f60f5b67 --- /dev/null +++ b/source/tools/detect/cgtool/cpuacct_load/Makefile @@ -0,0 +1,9 @@ +newdirs += . +csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) +bpfsrcs += $(wildcard *.bpf.c) + +CFLAGS += -static + +target = cpuacct_load + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.bpf.c b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..84c460e09872509476858647eb87b4e1552c3cad --- /dev/null +++ b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.bpf.c @@ -0,0 +1,69 @@ +#include "../cgtoollib_bpf.h" +#include "../cgtool_comm.h" +#include "cpuacct_load.h" + +struct bpf_map_def SEC("maps") cpuacct_load_hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct cpuacct_load), + .max_entries = 256, +}; + +SEC("kprobe/cpuacct_calc_load") +int kprobe_cpuacct_calc_load(struct pt_regs *ctx) +{ + struct cpuacct___AVE *acct = (struct cpuacct___AVE*)PT_REGS_PARM1(ctx); + int key=0; + struct cpuacct_load load = {0}; + struct cpuacct_load *load_up; + struct cgroup___MEMCG *cgrp; + struct cgroup_subsys_state *css; + unsigned int index = 0; + unsigned int avenrun_n = 0; + + if (bpf_core_read(&cgrp, sizeof(struct cgroup___MEMCG *), &acct->css.cgroup)) + return 0; + if (bpf_core_read(&key, sizeof(int), &cgrp->id)) + return 0; + + load_up = bpf_map_lookup_elem(&cpuacct_load_hash_map, &key); + if (load_up != NULL) { + // update the avenrun + index = load_up->avenrun_index; + avenrun_n = load_up->avenrun_n; + + if (index >= AVENRUN_MAX) + return 0; + + if (bpf_core_read(&load_up->run[index][0], sizeof(unsigned long), &acct->avenrun[0])) + return 0; + if (bpf_core_read(&load_up->run[index][1], sizeof(unsigned long), &acct->avenrun[1])) + return 0; + if (bpf_core_read(&load_up->run[index][2], sizeof(unsigned long), &acct->avenrun[2])) + return 0; + + load_up->avenrun_index = (index + 1) % AVENRUN_MAX; + load_up->avenrun_n = avenrun_n + 1 > AVENRUN_MAX ? AVENRUN_MAX : avenrun_n + 1; + } else { + // add new load + if (bpf_core_read(&load.run[index][0], sizeof(unsigned long), &acct->avenrun[0])) + return 0; + if (bpf_core_read(&load.run[index][1], sizeof(unsigned long), &acct->avenrun[1])) + return 0; + if (bpf_core_read(&load.run[index][2], sizeof(unsigned long), &acct->avenrun[2])) + return 0; + + load.avenrun_index = (index + 1) % AVENRUN_MAX; + load.avenrun_n = avenrun_n + 1 > AVENRUN_MAX ? AVENRUN_MAX : avenrun_n + 1; + + load.knid = get_knid_by_cgroup(cgrp); + if (load.knid == 0) + return 0; + + bpf_map_update_elem(&cpuacct_load_hash_map, &key, &load, BPF_ANY); + } + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.c b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.c new file mode 100644 index 0000000000000000000000000000000000000000..03458b56cc52fb5b3e20ad0cd5251c9d0431dbff --- /dev/null +++ b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.c @@ -0,0 +1,193 @@ +#include +#include +#include +#include +#include +#include "cpuacct_load.h" +#include "cpuacct_load.skel.h" +#include "../cgtoollib.h" + +const char *argp_program_version = "cgtrace cpuacct_load 1.0"; +static const char argp_program_doc[] = + "\n Tracing cpu load for the cpuacct cgroup\n" + ; + +static const struct argp_option cpuacct_load_options[] = { + {"timeout", 't', "timeout", 0, "time out"}, + {"dir", 'f', "dir", 0, "cgroup dir"}, + {"btf", 'b', "BTF_PATH", 0, "Specify path of the custom btf"}, + {"debug", 'd', NULL, 0, "Enable libbpf debug output"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +static struct env +{ + int timeout; + char *dir; + bool debug; + char *btf_custom_path; +} env = { + .debug = false, + .btf_custom_path = NULL, +}; + +static struct cpuacct_load_bpf *obj; + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (!env.debug) + return 0; + return vfprintf(stderr, format, args); +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + + switch (key) + { + case 't': + env.timeout = atoi(arg); + break; + case 'f': + env.dir = arg; + break; + case 'd': + env.debug = true; + break; + case 'b': + env.btf_custom_path = arg; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static void print_cpuacct_load(struct cpuacct_load_bpf *obj, unsigned int knid) +{ + int fd = bpf_map__fd(obj->maps.cpuacct_load_hash_map); + int j, k, loop; + unsigned long key, next_key; + struct cpuacct_load load; + bool find = false; + char dir[PATH_MAX]; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) + { + if ((bpf_map_lookup_elem(fd, &next_key, &load)) != 0) { + goto next; + } + + if (load.avenrun_n >= 10) + k = load.avenrun_index; + else + k = 0; + + if (knid == -1 || load.knid == knid) + find = true; + else + goto next; + + if (get_dir_by_knid(load.knid, "cpuacct", dir, sizeof(dir)) < 0) { + printf("can't get cgroup dir by knid:%u\n", load.knid); + continue; + } + + printf("cgroup dir:%s", dir); + printf("times:\n"); + + /* print avenrun */ + for (loop = 0; loop < 3; loop++) { + printf("avenrun%d:", loop); + j = k; + for (int i = 0; i < load.avenrun_n; i++) { + printf(" %lu", load.run[j][loop]); + j = (j + 1) % 10; + } + printf("\n"); + } + + /* print load */ + for (loop = 0; loop < 3; loop++) { + printf("load%d:", loop); + j = k; + for (int i = 0; i < load.avenrun_n; i++) { + printf(" %lu.%02lu", cal_load_int(load.run[j][loop]), cal_load_frac(load.run[j][loop])); + j = (j + 1) % 10; + } + printf("\n"); + } + printf("\n"); +next: + bpf_map_delete_elem(fd, &next_key); + key = next_key; + } + + if (find == false) { + printf("can't get load trace, maybe should do:\n"); + printf("1) echo 1 > /proc/async_load_calc\n"); + printf("2) echo 1 > cpuacct.enable_sli\n"); + printf("3) check whether the [dir] parameter is correct\n"); + } +} + +static void alarm_stop(int signo) +{ + unsigned int knid = -1; + + if (env.dir != NULL) + knid = get_knid_by_dir(env.dir); + + print_cpuacct_load(obj, knid); +} + +int main(int argc, char **argv) +{ + int err; + static const struct argp argp = { + .options = cpuacct_load_options, + .parser = parse_arg, + .doc = argp_program_doc, + .args_doc = NULL, + }; + + bump_memlock_rlimit(); + libbpf_set_print(libbpf_print_fn); + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + obj = cpuacct_load_bpf__open(); + if (!obj) + { + printf("failed to open BPF object\n"); + return 1; + } + err = cpuacct_load_bpf__load(obj); + if (err) + { + printf("failed to load BPF object: %s\n", strerror(-err)); + goto cleanup; + } + err = cpuacct_load_bpf__attach(obj); + if (err) + { + printf("failed to attach BPF programs: %s\n", strerror(-err)); + goto cleanup; + } + + if (env.timeout == 0) + env.timeout = -1; + + signal(SIGINT, alarm_stop); + signal(SIGALRM, alarm_stop); + + alarm(env.timeout); + sleep(env.timeout + 1); + +cleanup: + cpuacct_load_bpf__destroy(obj); + return 0; +} diff --git a/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.h b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.h new file mode 100644 index 0000000000000000000000000000000000000000..b871c3c872cf893eb8dd517bcc71deb76d72f090 --- /dev/null +++ b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.h @@ -0,0 +1,30 @@ +#ifndef __CGTRACE_H +#define __CGTRACE_H + +#define AVENRUN_MAX 10 + +struct cpuacct_load { + unsigned long run[AVENRUN_MAX][3]; + unsigned int avenrun_index; + unsigned int avenrun_n; + unsigned int knid; +}; + +/* cal load from kernel */ +#define FSHIFT 11 /* nr of bits of precision */ +#define FIXED_1 (1<> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +static unsigned long cal_load_int(unsigned long avenrun) +{ + unsigned long load_tmp = (avenrun + FIXED_1/200) << 0; + return LOAD_INT(load_tmp); +} + +static unsigned long cal_load_frac(unsigned long avenrun) +{ + unsigned long load_tmp = (avenrun + FIXED_1/200) << 0; + return LOAD_FRAC(load_tmp); +} +#endif diff --git a/source/tools/detect/cgtool/memory_usage/Makefile b/source/tools/detect/cgtool/memory_usage/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5c73f073fc8482b5dc3db98c684249b3b087a3d3 --- /dev/null +++ b/source/tools/detect/cgtool/memory_usage/Makefile @@ -0,0 +1,9 @@ +newdirs += . +csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) +bpfsrcs += $(wildcard *.bpf.c) + +CFLAGS += -static + +target = memory_usage + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/cgtool/memory_usage/memory_usage.bpf.c b/source/tools/detect/cgtool/memory_usage/memory_usage.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..e6b65a0f2cb95c88ac9f53c72fbeabb5f9d15283 --- /dev/null +++ b/source/tools/detect/cgtool/memory_usage/memory_usage.bpf.c @@ -0,0 +1,64 @@ +#include "../cgtoollib_bpf.h" +#include "../cgtool_comm.h" +#include "memory_usage.h" + +struct bpf_map_def SEC("maps") usage_hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(unsigned long), + .value_size = sizeof(struct memcg_usage), + .max_entries = 256, +}; + +SEC("kprobe/mem_cgroup_try_charge") +int kprobe_mem_cgroup_try_charge(struct pt_regs *ctx) +{ + struct mm_struct___MEMCG *mm = (struct mm_struct___MEMCG *)PT_REGS_PARM2(ctx); + unsigned long pid_tgid = bpf_get_current_pid_tgid(); + struct memcg_usage usage = {0}; + struct memcg_usage *usage_up; + struct task_struct *tk; + + if (bpf_core_read(&tk, sizeof(struct task_struct *), &mm->owner)) + return 0; + + usage_up = bpf_map_lookup_elem(&usage_hash_map, &pid_tgid); + if (usage_up == NULL) { + usage.ptid = pid_tgid; + if (bpf_core_read(&usage.comm, sizeof(usage.comm), &tk->comm)) + return 0; + + bpf_map_update_elem(&usage_hash_map, &pid_tgid, &usage, BPF_ANY); + } else { + if (bpf_core_read(&usage_up->comm, sizeof(usage_up->comm), &tk->comm)) + return 0; + } + + return 0; +} + +SEC("kprobe/try_charge") +int kprobe_try_charge(struct pt_regs *ctx) +{ + struct mem_cgroup *memcg = (struct mem_cgroup *)PT_REGS_PARM1(ctx); + unsigned long pid_tgid = bpf_get_current_pid_tgid(); + struct memcg_usage *usage_up; + struct cgroup_subsys_state css; + struct cgroup___MEMCG *cgrp; + + usage_up = bpf_map_lookup_elem(&usage_hash_map, &pid_tgid); + if (usage_up != NULL) { + if (bpf_core_read(&css, sizeof(struct cgroup_subsys_state), &memcg->css)) + return 0; + if (bpf_core_read(&cgrp, sizeof(struct cgroup___MEMCG *), &css.cgroup)) + return 0; + usage_up->knid = get_knid_by_cgroup(cgrp); + if (usage_up->knid == 0) + return 0; + + usage_up->pgsize += (unsigned int)PT_REGS_PARM3(ctx); + } + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/detect/cgtool/memory_usage/memory_usage.c b/source/tools/detect/cgtool/memory_usage/memory_usage.c new file mode 100644 index 0000000000000000000000000000000000000000..70cbf83ca259a271e5b356d306cebd39fd2ec6e4 --- /dev/null +++ b/source/tools/detect/cgtool/memory_usage/memory_usage.c @@ -0,0 +1,314 @@ +#include +#include +#include +#include +#include +#include +#include +#include "memory_usage.h" +#include "memory_usage.skel.h" +#include "../cgtoollib.h" + +const char *argp_program_version = "cgtrace usage 1.0"; + +static const char argp_program_doc[] = + "\n Tracing memory usage for the memory cgroup\n" + ; + +static const struct argp_option usage_options[] = { + {"timeout", 't', "time", 0, "time out"}, + {"dir", 'f', "dir", 0, "cgroup dir"}, + {"btf", 'b', "BTF_PATH", 0, "Specify path of the custom btf"}, + {"debug", 'd', NULL, 0, "Enable libbpf debug output"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +static struct env +{ + int timeout; + char *dir; + bool debug; + char *btf_custom_path; +} env = { + .debug = false, + .btf_custom_path = NULL, +}; + +static struct memory_usage_bpf *obj = NULL; +static struct memcg_mess* mess = NULL; + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (!env.debug) + return 0; + return vfprintf(stderr, format, args); +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + + switch (key) + { + case 't': + env.timeout = atoi(arg); + break; + case 'f': + env.dir = arg; + break; + case 'd': + env.debug = true; + break; + case 'b': + env.btf_custom_path = arg; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static int add_taskinfo(struct memcg_mess *m, struct memcg_usage usage) +{ + struct task_info *info_tmp = m->info; + struct task_info *p = (struct task_info *)malloc(sizeof(struct task_info)); + if (p == NULL) { + printf("malloc task info failed\n"); + return -ENOMEM; + } + + p->pid = usage.ptid >> 32; + p->tid = (unsigned int)usage.ptid; + strncpy(p->comm, usage.comm, sizeof(p->comm) - 1); + p->comm[sizeof(p->comm) - 1] = '\0'; + p->pgsize = usage.pgsize; + p->next = NULL; + + if (info_tmp == NULL) { + m->info = p; + } + else { + while (info_tmp->next != NULL) + info_tmp = info_tmp->next; + + info_tmp->next = p; + } + + m->task_num++; + + return 0; +} + +static struct memcg_mess* mess_list_find(int key) +{ + struct memcg_mess *mess_tmp = mess; + + while (mess_tmp != NULL) { + if (mess_tmp->knid == key) + return mess_tmp; + + mess_tmp = mess_tmp->next; + } + + return NULL; +} + +static int mess_list_insert(struct memcg_usage usage) +{ + int ret = 0; + struct memcg_mess *mess_tmp; + struct memcg_mess *m; + + m = (struct memcg_mess *)malloc(sizeof(struct memcg_mess)); + if (m == NULL) { + printf("malloc mess failed\n"); + return -ENOMEM; + } + + m->task_num = 0; + m->info = NULL; + m->knid = usage.knid; + m->next = NULL; + ret = add_taskinfo(m, usage); + if (ret < 0) + goto out_free; + + if (mess == NULL) + mess = m; + else { + mess_tmp = mess; + while (mess_tmp->next != NULL) + mess_tmp = mess_tmp->next; + + mess_tmp->next = m; + } + + return ret; + +out_free: + free(m); + return ret; +} + +void mess_list_free() +{ + struct memcg_mess *mess_tmp; + struct task_info *info_tmp; + + while (mess != NULL) { + /* free task info of mess */ + while (mess->info != NULL) { + info_tmp = mess->info; + mess->info = mess->info->next; + free(info_tmp); + } + + /* free mess */ + mess_tmp = mess; + mess = mess->next; + free(mess_tmp); + } +} + +static int usage_restore() +{ + int fd = bpf_map__fd(obj->maps.usage_hash_map); + unsigned long key, next_key; + struct memcg_usage usage; + struct memcg_mess *m; + int ret = 0; + + // calculate the number of tasks for each memcg + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &usage); + + m = mess_list_find(usage.knid); + if (m == NULL) { + ret = mess_list_insert(usage); + if (ret < 0) + return ret; + } + else { + ret = add_taskinfo(m, usage); + if (ret < 0) + return ret; + } + + key = next_key; + } + + return ret; +} + +static void free_map(void) +{ + int fd = bpf_map__fd(obj->maps.usage_hash_map); + unsigned long key, next_key; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_delete_elem(fd, &next_key); + key = next_key; + } +} + +static void usage_show(unsigned int knid) +{ + struct memcg_mess *mess_tmp = mess; + struct task_info *info_tmp; + char dir[PATH_MAX]; + bool find = false; + + while (mess_tmp != NULL) { + if (knid == -1 || mess_tmp->knid == knid) + find = true; + else + goto next; + + if (get_dir_by_knid(mess_tmp->knid, "memory", dir, sizeof(dir)) < 0) { + printf("can't get cgroup dir by knid:%u\n", mess_tmp->knid); + goto next; + } + + printf("task number:%d cgroup dir:%s", mess_tmp->task_num, dir); + printf(" PID TID COMM PGSIZE\n"); + printf("-----------------------------------\n"); + + /* list task info of mess */ + info_tmp = mess_tmp->info; + while (info_tmp != NULL) { + printf("%-6u %-6u %-16s %u\n", info_tmp->pid, info_tmp->tid, info_tmp->comm, info_tmp->pgsize); + info_tmp = info_tmp->next; + } + printf("\n"); +next: + mess_tmp = mess_tmp->next; + } + + if (find == false) { + printf("can't get memory usage in %s. ", env.dir); + printf("check whether the [dir] parameter is correct.\n"); + } +} + +static void alarm_stop(int signo) +{ + unsigned int knid = -1; + + if (env.dir != NULL) + knid = get_knid_by_dir(env.dir); + + if (usage_restore() == 0) + usage_show(knid); + + free_map(); + mess_list_free(); +} + +int main(int argc, char **argv) +{ + int err; + static const struct argp argp = { + .options = usage_options, + .parser = parse_arg, + .doc = argp_program_doc, + .args_doc = NULL, + }; + + bump_memlock_rlimit(); + libbpf_set_print(libbpf_print_fn); + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + obj = memory_usage_bpf__open(); + if (!obj) + { + printf("failed to open BPF object\n"); + return 1; + } + err = memory_usage_bpf__load(obj); + if (err) + { + printf("failed to load BPF object: %s\n", strerror(-err)); + goto cleanup; + } + err = memory_usage_bpf__attach(obj); + if (err) + { + printf("failed to attach BPF programs: %s\n", strerror(-err)); + goto cleanup; + } + + if (env.timeout == 0) + env.timeout = -1; + + signal(SIGINT, alarm_stop); + signal(SIGALRM, alarm_stop); + alarm(env.timeout); + sleep(env.timeout + 1); + +cleanup: + memory_usage_bpf__destroy(obj); + return 0; +} diff --git a/source/tools/detect/cgtool/memory_usage/memory_usage.h b/source/tools/detect/cgtool/memory_usage/memory_usage.h new file mode 100644 index 0000000000000000000000000000000000000000..da1c74a6485e5caf90692f6327d2f0f6a4148651 --- /dev/null +++ b/source/tools/detect/cgtool/memory_usage/memory_usage.h @@ -0,0 +1,28 @@ +#ifndef __CGTRACE_H +#define __CGTRACE_H + +#define TASK_COMM_LEN 16 + +struct memcg_usage { + char comm[TASK_COMM_LEN]; + unsigned int pgsize; + int knid; + unsigned long ptid; +}; + +struct task_info { + unsigned int pid; + unsigned int tid; + char comm[TASK_COMM_LEN]; + unsigned int pgsize; + struct task_info *next; +}; + +struct memcg_mess { + int knid; + struct task_info *info; + unsigned int task_num; + struct memcg_mess *next; +}; + +#endif