From 04d9a127a1a9831348f597cf9c1808572df43d38 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Sat, 22 Jan 2022 14:16:12 +0800 Subject: [PATCH] Add cgtool for tracing cgroup subsystem Signed-off-by: Bixuan Cui --- source/tools/detect/cgtool/Makefile | 3 + source/tools/detect/cgtool/README.md | 179 ++++++++++ source/tools/detect/cgtool/cgcheck/Makefile | 5 + source/tools/detect/cgtool/cgcheck/README.md | 19 ++ source/tools/detect/cgtool/cgcheck/cgcheck.py | 131 +++++++ source/tools/detect/cgtool/cgrun/Makefile | 3 + source/tools/detect/cgtool/cgrun/cgtool.sh | 75 ++++ source/tools/detect/cgtool/cgtool_comm.h | 296 ++++++++++++++++ source/tools/detect/cgtool/cgtoollib.h | 73 ++++ source/tools/detect/cgtool/cgtoollib_bpf.h | 31 ++ .../tools/detect/cgtool/cpuacct_load/Makefile | 9 + .../cgtool/cpuacct_load/cpuacct_load.bpf.c | 68 ++++ .../detect/cgtool/cpuacct_load/cpuacct_load.c | 202 +++++++++++ .../detect/cgtool/cpuacct_load/cpuacct_load.h | 30 ++ .../tools/detect/cgtool/memcg_show/Makefile | 3 + .../detect/cgtool/memcg_show/memcg_show.sh | 152 +++++++++ .../tools/detect/cgtool/memcg_usage/Makefile | 9 + .../cgtool/memcg_usage/memcg_usage.bpf.c | 91 +++++ .../detect/cgtool/memcg_usage/memcg_usage.c | 321 ++++++++++++++++++ .../detect/cgtool/memcg_usage/memcg_usage.h | 28 ++ 20 files changed, 1728 insertions(+) create mode 100644 source/tools/detect/cgtool/Makefile create mode 100644 source/tools/detect/cgtool/README.md create mode 100644 source/tools/detect/cgtool/cgcheck/Makefile create mode 100644 source/tools/detect/cgtool/cgcheck/README.md create mode 100755 source/tools/detect/cgtool/cgcheck/cgcheck.py create mode 100644 source/tools/detect/cgtool/cgrun/Makefile create mode 100755 source/tools/detect/cgtool/cgrun/cgtool.sh create mode 100644 source/tools/detect/cgtool/cgtool_comm.h create mode 100644 source/tools/detect/cgtool/cgtoollib.h create mode 100644 source/tools/detect/cgtool/cgtoollib_bpf.h create mode 100644 source/tools/detect/cgtool/cpuacct_load/Makefile create mode 100644 source/tools/detect/cgtool/cpuacct_load/cpuacct_load.bpf.c create mode 100644 source/tools/detect/cgtool/cpuacct_load/cpuacct_load.c create mode 100644 source/tools/detect/cgtool/cpuacct_load/cpuacct_load.h create mode 100644 source/tools/detect/cgtool/memcg_show/Makefile create mode 100755 source/tools/detect/cgtool/memcg_show/memcg_show.sh create mode 100644 source/tools/detect/cgtool/memcg_usage/Makefile create mode 100644 source/tools/detect/cgtool/memcg_usage/memcg_usage.bpf.c create mode 100644 source/tools/detect/cgtool/memcg_usage/memcg_usage.c create mode 100644 source/tools/detect/cgtool/memcg_usage/memcg_usage.h diff --git a/source/tools/detect/cgtool/Makefile b/source/tools/detect/cgtool/Makefile new file mode 100644 index 00000000..15eeed40 --- /dev/null +++ b/source/tools/detect/cgtool/Makefile @@ -0,0 +1,3 @@ +target := cgtool + +include $(SRC)/mk/sub.mk diff --git a/source/tools/detect/cgtool/README.md b/source/tools/detect/cgtool/README.md new file mode 100644 index 00000000..6a9317ab --- /dev/null +++ b/source/tools/detect/cgtool/README.md @@ -0,0 +1,179 @@ +1. cgtool工具介绍 +================== + +cgtool(Cgroup Tool)是Cgroup子系统流程跟踪和问题诊断工具集。主要包括: + memcg_usage: 统计memcg子系统memory.usage_in_bytes中详细进程使用信息; + memcg_show: 监控各个memcg子系统中usage,rss,cache等数据; + cpuacct_load:统计cpuacct子系统中cpu的aveload; + cgcheck:系统cgroup健康检查工具; + +2. 代码目录 +================== + cgrun: cgtool执行脚本 + cgtool*.h:公共库文件 + 其它是以工具命名的目录 + +3. 编译 +================== +./configure --enable-libbpf --enable-target-cgtool --enable-target-cgtool +make + +4. 工具使用说明 +================== + +4.1 注意事项 +无 + +4.2 cgtool工具 +4.2.1 命令说明 +sysak cgtool [options] [cgtool [cgtoolargs]] + options: -h, help information + -l, list all tools for cgroup + cgtool: + tool name for list + cgtoolargs: + args for the tool, -h get more + +4.2.2 举例 +#举例1:列出所支持的tools +#sysak cgtool -l +memcg_usage # Tracing memory usage of the memory cgroup +cpuacct_load # Tracing cpu load for the cpuacct cgroup + +#举例2: 使用memcg_usage工具 +见5.1 + +5. tools使用说明 +================== + +5.1 memcg_usage +5.1.1 命令说明 +sysak cgtool memcg_usage [OPTION...] + -b, --btf=BTF_PATH Specify path of the custom btf + -d, --debug Enable libbpf debug output + -f, --dir=dir cgroup dir + -t, --timeout=time time out + -?, --help Give this help list + --usage Give a short usage message + -V, --version Print program version + +5.1.2 举例 +#举例1:统计memcg目录1秒总体进程内存使用情况 +#sysak cgtool memcg_usage -t 1 +task number:4 cgroup dir:/sys/fs/cgroup/memory/ + PID TID COMM PGSIZE +----------------------------------- +29084 29084 systemd-cgroups 31 +29080 29080 systemd-cgroups 31 +29055 29055 systemd-cgroups 30 +1 1 systemd 144 + +task number:2 cgroup dir:/sys/fs/cgroup/memory/yagent_script + PID TID COMM PGSIZE +----------------------------------- +29025 29025 sh 151 +29089 29089 hostinfo 71 + +#举例2: 统计单一memcg组1秒进程内存使用情况 +#sysak cgtool memcg_usage -t 1 -f /sys/fs/cgroup/memory/user.slice +task number:5 cgroup dir:/sys/fs/cgroup/memory/user.slice + PID TID COMM PGSIZE +----------------------------------- +31085 31085 grep 49 +31087 31087 tail 28 +31084 31084 ps 121 +31083 31083 sh 85 +31086 31086 awk 48 + +5.2 cpuacct_load +5.2.1 命令说明 +sysak cgtool cpuacct_load [OPTION...] + -b, --btf=BTF_PATH Specify path of the custom btf + -d, --debug Enable libbpf debug output + -f, --dir=dir cgroup dir + -t, --timeout=time time out + -?, --help Give this help list + --usage Give a short usage message + -V, --version Print program version + +5.2.2 举例 +#举例1:统计cpuacct目录60秒负载 +#sysak cgtool cpuacct_load -t 60 +cgroup dir:/sys/fs/cgroup/cpuacct/h2o +times: +avenrun0: 126 115 105 96 +avenrun1: 31 30 29 28 +avenrun2: 8 7 6 5 +load0: 0.06 0.06 0.05 0.05 +load1: 0.02 0.01 0.01 0.01 +load2: 0.00 0.00 0.00 0.00 + +cgroup dir:/sys/fs/cgroup/cpuacct/docker +times: +avenrun0: 0 0 0 0 +avenrun1: 37 36 35 34 +avenrun2: 24 23 22 21 +load0: 0.00 0.00 0.00 0.00 +load1: 0.02 0.02 0.02 0.02 +load2: 0.01 0.01 0.01 0.01 + +cgroup dir:/sys/fs/cgroup/cpuacct/docker/26f45842eb4304617e0d121a384a7a0fcb7c25e0420771424102dde7f3886a28 +times: +avenrun0: 0 0 0 0 +avenrun1: 37 36 35 34 +avenrun2: 24 23 22 21 +load0: 0.00 0.00 0.00 0.00 +load1: 0.02 0.02 0.02 0.02 +load2: 0.01 0.01 0.01 0.01 + +#举例2: 统计单一cpuacct组60秒负载 +#sysak cpuacct_load -t 60 -f /sys/fs/cgroup/cpuacct/h2o +cgroup dir:/sys/fs/cgroup/cpuacct/h2o +times: +avenrun0: 232 213 195 179 164 315 +avenrun1: 60 59 58 57 56 90 +avenrun2: 15 14 13 12 11 22 +load0: 0.11 0.10 0.10 0.09 0.08 0.15 +load1: 0.03 0.03 0.03 0.03 0.03 0.04 +load2: 0.01 0.01 0.01 0.01 0.01 0.01 + +5.3 memcg_show +5.3.1 命令说明 + sysak cgtool memcg_show [OPTION...] + -h, help information + -i, detection time interval, default: 10s + -t, detection times, default: 5 times + -u, [B/KB/MB/G], default: MB + -d, memcg dir, default: /sys/fs/cgroup/memory + +Examples: + sysak cgtool memcg_show + sysak cgtool memcg_show -i 60 -t 10 -u G + +5.3.2 举例 +#举例1:每隔1s监控/sys/fs/cgroup/memory/agent/目录memory数据,监控5次后结束 +#sysak cgtool memcg_show -i 1 -t 5 -d /sys/fs/cgroup/memory/agent/ +============================================== +/sys/fs/cgroup/memory/agent//Argus +usage: 0 0 0 0 0 +rss: 40960 40960 40960 40960 40960 +cache: 0 0 0 0 0 +swap: 0 0 0 0 0 +cache+rss+swap: 40960 40960 40960 40960 40960 +kmemusage: 0 0 0 0 0 +memswusage: 0 0 0 0 0 + +============================================== +/sys/fs/cgroup/memory/agent//staragent +usage: 69574656 69574656 70127616 69578752 69582848 +rss: 13565952 13565952 14139392 13787136 13787136 +cache: 55734272 55734272 55734272 55734272 55734272 +swap: 0 0 0 0 0 +cache+rss+swap: 69300224 69300224 69873664 69521408 69521408 +kmemusage: 0 0 0 0 0 +memswusage: 69574656 69574656 70127616 69578752 69582848 + +每一列数据为1次监控取值。 + +5.4 cgcheck +详见cgcheck/README.md diff --git a/source/tools/detect/cgtool/cgcheck/Makefile b/source/tools/detect/cgtool/cgcheck/Makefile new file mode 100644 index 00000000..320688ca --- /dev/null +++ b/source/tools/detect/cgtool/cgcheck/Makefile @@ -0,0 +1,5 @@ +target = cgcheck + +mods += $(target) + +include $(SRC)/mk/py.mk diff --git a/source/tools/detect/cgtool/cgcheck/README.md b/source/tools/detect/cgtool/cgcheck/README.md new file mode 100644 index 00000000..44555144 --- /dev/null +++ b/source/tools/detect/cgtool/cgcheck/README.md @@ -0,0 +1,19 @@ +1. cgcheck工具介绍 +================== + +cgcheck(Cgroup Check)是对系统cgroup配置、内存使用状态等健康状态检查工具。 +主要包括: +1) 检查Cgroup子组是否过多(超过1000),可能造成系统卡顿; + +2. 代码目录 +================== + cgcheck.py: cgcheck执行脚本 + +3. 使用说明 +================== +sysak cgtool cgcheck + +#举例1:系统cgroup数量过多 +#sysak cgtool cgcheck +cgroup子系统:memory 数量:1502 层级:4 + diff --git a/source/tools/detect/cgtool/cgcheck/cgcheck.py b/source/tools/detect/cgtool/cgcheck/cgcheck.py new file mode 100755 index 00000000..8ca41d6b --- /dev/null +++ b/source/tools/detect/cgtool/cgcheck/cgcheck.py @@ -0,0 +1,131 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# @Author: shiyan + +from subprocess import * +import os, fcntl, re, sys +from time import sleep +import socket +import time,datetime +import json,base64,hashlib,re +import threading +import sched +import importlib +import json +import argparse + +if sys.version[0] == '2': + reload(sys) + sys.setdefaultencoding('utf8') + +def get_cmddata(sn, data, cmd, updated=0): + if updated or cmd not in data: + data[cmd] = '' + try: + output = os.popen(cmd) + data[cmd] = output.read() + output.close() + except: + print( 'get_cmddata exception!') + data[cmd] = '' + return data[cmd] + +def get_dockerids(sn, data, updated=0): + if updated or 'dockerids' not in data: + data['dockerids'] = [] + if 'dockercmd' not in data: + cmd = 'which pouch 2>/dev/null' + output = os.popen(cmd) + ret = output.read() + output.close() + if len(ret) <= 0 or ret.find('which') >= 0: + cmd = 'which docker 2>/dev/null' + output = os.popen(cmd) + ret = output.read() + if len(ret) <= 0 or ret.find('which') >= 0: + data['dockercmd'] = '' + else: + data['dockercmd'] = 'docker' + output.close() + else: + data['dockercmd'] = 'pouch' + if len(data['dockercmd']) == 0: + data['dockerids'] = [] + return data['dockerids'] + try: + cmd = "%s ps -q | awk '{print $1}'"%(data['dockercmd']) + output = os.popen(cmd) + ret = output.read() + output.close() + + ret = ret.split() + data['dockerids'] = ret + except: + print( 'get_dockerids exception!') + data['dockerids'] = [] + return data['dockerids'] + +def num_check(): + sn = '' + data = {} + summary = '' + rets = {} + rets['issue_result'] = {} + ret = {} + ret['return'] = False + ret['solution'] = {} + all_matched = {} + + try: + cgroup_msg = {} + cmd = "cat /proc/cgroups 2>/dev/null" + cgproc = get_cmddata(sn, data, cmd).splitlines() + for line in cgproc: + if 'num_cgroups' not in line: + item = line.split() + if int(item[2]) >= 1000: + cgroup_msg[item[0]] = {} + cgroup_msg[item[0]]['hierarchy'] = int(item[1]) + cgroup_msg[item[0]]['num_cgroups'] = int(item[2]) + + if len(list(cgroup_msg)) > 0: + for item in cgroup_msg: + summary += "cgroup子系统:%s 数量:%s 层级:%s\n"%(item,cgroup_msg[item]['num_cgroups'],cgroup_msg[item]['hierarchy']) + for subdir, dirs, files in os.walk("%s/../rules"%(os.path.dirname(os.path.abspath(__file__)))): + for file in files: + filepath = subdir + os.sep + file + if os.path.isfile(filepath): + if file.endswith('.py') and file in CGROUP_NUM_ISSUE: + rule_mod = file[:-3] + try: + mod = importlib.import_module(rule_mod) + ret = mod.query(sn, data) + rets['issue_result'][rule_mod] = {} + rets['issue_result'][rule_mod] = ret + if ret['return']: + if all_matched.get('online') is None: + all_matched['online'] = {} + if all_matched['online'].get(file) is None: + all_matched['online'][file] = [] + all_matched['online'][file].append(ret['solution']) + except Exception as e: + print( '%s Exception!'%(mod),e) + pass + if len(list(all_matched)) > 0: + for i in all_matched: + summary += "%s\n"%(json.dumps(all_matched[i],ensure_ascii=False)) + + dockerids = get_dockerids(sn, data) + if len(dockerids) >= 1000: + summary += "该主机容器数量过多:%s\n"%len(dockerids) + + return summary + except: + print( "num_check failed!") + pass + +def main(): + print( num_check()) + +if __name__ == "__main__": + main() diff --git a/source/tools/detect/cgtool/cgrun/Makefile b/source/tools/detect/cgtool/cgrun/Makefile new file mode 100644 index 00000000..0407c5a4 --- /dev/null +++ b/source/tools/detect/cgtool/cgrun/Makefile @@ -0,0 +1,3 @@ +target = cgtool + +include $(SRC)/mk/sh.mk diff --git a/source/tools/detect/cgtool/cgrun/cgtool.sh b/source/tools/detect/cgtool/cgrun/cgtool.sh new file mode 100755 index 00000000..15cf970f --- /dev/null +++ b/source/tools/detect/cgtool/cgrun/cgtool.sh @@ -0,0 +1,75 @@ +#!/bin/bash +#****************************************************************# +# ScriptName: cgtool.sh +# Author: Bixuan Cui +# Create Date: 2022-01-07 +# Function: +#***************************************************************# +if [ "$SYSAK_WORK_PATH" != "" ]; then +WORK_PATH=$SYSAK_WORK_PATH +else +WORK_PATH=/usr/local/sbin/.sysak_compoents +fi + +MEMCGUSAGE_BIN=$WORK_PATH/tools/memcg_usage +MEMCGSHOW_BIN=$WORK_PATH/tools/memcg_show +CPUACCTLOAD_BIN=$WORK_PATH/tools/cpuacct_load +CGCHECK_BIN=$WORK_PATH/tools/cgcheck + +# arguments +usage() +{ + echo "cgtool: tools for analyzing cgroups" + echo "Usage:" + echo " sysak cgtool [options] [cgtool [cgtoolargs]]" + echo " options: -h, help information" + echo " -l, list all tools for cgroup" + echo " cgtool:" + echo " tool name for list" + echo " cgtoolargs:" + echo " args for the tool, -h get more" + echo "Examples:" + echo " sysak cgtool -l" + echo " sysak cgtool usage -h" +} + +trace_list() +{ + echo "memcg_usage # tracing memory usage of the memory cgroup" + echo "memcg_show # statistics of usage,rss,cache... of each memcg" + echo "cpuacct_load # tracing cpu load for the cpuacct cgroup" + echo "cgcheck # cgroup check in the system " +} + +while getopts 'hl' OPT; do + case $OPT in + "h") + usage + exit 0 + ;; + "l") + trace_list + exit 0 + ;; + *) + usage + exit 0 + ;; + esac +done + +cgtool=${@:$OPTIND:1} +cgtoolcmd=${*:$OPTIND+1}; + +if [ "X${cgtool}" == "Xmemcg_usage" ]; then + $MEMCGUSAGE_BIN $cgtoolcmd +elif [ "X${cgtool}" == "Xmemcg_show" ]; then + $MEMCGSHOW_BIN $cgtoolcmd +elif [ "X${cgtool}" == "Xcpuacct_load" ]; then + $CPUACCTLOAD_BIN $cgtoolcmd +elif [ "X${cgtool}" == "Xcgcheck" ]; then + $CGCHECK_BIN $cgtoolcmd +else + echo "not support tool:${cgtool}" + exit -1 +fi diff --git a/source/tools/detect/cgtool/cgtool_comm.h b/source/tools/detect/cgtool/cgtool_comm.h new file mode 100644 index 00000000..07af7325 --- /dev/null +++ b/source/tools/detect/cgtool/cgtool_comm.h @@ -0,0 +1,296 @@ +#ifndef __CGTRACE_COMM_H +#define __CGTRACE_COMM_H + +typedef u32 uint32_t; +typedef u64 uint64_t; +typedef u16 uint16_t; +typedef u8 uint8_t; + +struct mm_struct___MEMCG { + struct { + struct vm_area_struct *mmap; + struct rb_root mm_rb; + u64 vmacache_seqnum; + long unsigned int (*get_unmapped_area)(struct file *, long unsigned int, long unsigned int, long unsigned int, long unsigned int); + long unsigned int mmap_base; + long unsigned int mmap_legacy_base; + long unsigned int mmap_compat_base; + long unsigned int mmap_compat_legacy_base; + long unsigned int task_size; + long unsigned int highest_vm_end; + pgd_t *pgd; + atomic_t mm_users; + atomic_t mm_count; + atomic_long_t pgtables_bytes; + int map_count; + spinlock_t page_table_lock; + struct rw_semaphore mmap_sem; + struct list_head mmlist; + long unsigned int hiwater_rss; + long unsigned int hiwater_vm; + long unsigned int total_vm; + long unsigned int locked_vm; + long unsigned int pinned_vm; + long unsigned int data_vm; + long unsigned int exec_vm; + long unsigned int stack_vm; + long unsigned int def_flags; + spinlock_t arg_lock; + long unsigned int start_code; + long unsigned int end_code; + long unsigned int start_data; + long unsigned int end_data; + long unsigned int start_brk; + long unsigned int brk; + long unsigned int start_stack; + long unsigned int arg_start; + long unsigned int arg_end; + long unsigned int env_start; + long unsigned int env_end; + long unsigned int saved_auxv[46]; + struct mm_rss_stat rss_stat; + struct linux_binfmt *binfmt; + mm_context_t context; + long unsigned int flags; + struct core_state *core_state; + atomic_t membarrier_state; + spinlock_t ioctx_lock; + struct kioctx_table *ioctx_table; + struct task_struct *owner; + struct user_namespace *user_ns; + struct file *exe_file; + struct mmu_notifier_mm *mmu_notifier_mm; + long unsigned int numa_next_scan; + long unsigned int numa_scan_offset; + int numa_scan_seq; + atomic_t tlb_flush_pending; + bool tlb_flush_batched; + struct uprobes_state uprobes_state; + atomic_long_t hugetlb_usage; + struct work_struct async_put_work; + }; + long unsigned int ali_reserved1; + long unsigned int ali_reserved2; + long unsigned int cpu_bitmap[0]; +}; + +struct page_counter { + atomic_long_t usage; + long unsigned int min; + long unsigned int low; + long unsigned int max; + struct page_counter *parent; + long unsigned int emin; + atomic_long_t min_usage; + atomic_long_t children_min_usage; + long unsigned int elow; + atomic_long_t low_usage; + atomic_long_t children_low_usage; + long unsigned int wmark_low; + long unsigned int wmark_high; + long unsigned int watermark; + long unsigned int failcnt; +}; + +struct vmpressure { + long unsigned int scanned; + long unsigned int reclaimed; + long unsigned int tree_scanned; + long unsigned int tree_reclaimed; + struct spinlock sr_lock; + struct list_head events; + struct mutex events_lock; + struct work_struct work; +}; + +struct mem_cgroup_thresholds { + struct mem_cgroup_threshold_ary *primary; + struct mem_cgroup_threshold_ary *spare; +}; + +struct memcg_padding { + char x[0]; +}; + +enum memcg_kmem_state { + KMEM_NONE = 0, + KMEM_ALLOCATED = 1, + KMEM_ONLINE = 2, +}; + +struct mem_cgroup_id { + int id; + atomic_t ref; +}; + +struct deferred_split { + spinlock_t split_queue_lock; + struct list_head split_queue; + long unsigned int split_queue_len; +}; + +union kernfs_node_id { + struct { + u32 ino; + u32 generation; + }; + u64 id; +}; + +struct mem_cgroup { + struct cgroup_subsys_state css; + struct mem_cgroup_id id; + struct page_counter memory; + struct page_counter swap; + struct page_counter memsw; + struct page_counter kmem; + struct page_counter tcpmem; + long unsigned int high; + struct work_struct high_work; + long unsigned int soft_limit; + struct vmpressure vmpressure; + bool use_hierarchy; + bool oom_group; + bool oom_lock; + int under_oom; + int swappiness; + int oom_kill_disable; + struct cgroup_file events_file; + struct cgroup_file swap_events_file; + struct mutex thresholds_lock; + struct mem_cgroup_thresholds thresholds; + struct mem_cgroup_thresholds memsw_thresholds; + struct list_head oom_notify; + long unsigned int move_charge_at_immigrate; + spinlock_t move_lock; + long unsigned int move_lock_flags; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + struct memcg_padding _pad1_; + atomic_t moving_account; + struct task_struct *move_lock_task; + struct mem_cgroup_stat_cpu *stat_cpu; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + struct memcg_padding _pad2_; + atomic_long_t stat[35]; + atomic_long_t events[84]; + atomic_long_t memory_events[7]; + long unsigned int socket_pressure; + bool tcpmem_active; + int tcpmem_pressure; + unsigned int wmark_ratio; + struct work_struct wmark_work; + unsigned int wmark_scale_factor; + int kmemcg_id; + enum memcg_kmem_state kmem_state; + struct list_head kmem_caches; + int last_scanned_node; + nodemask_t scan_nodes; + atomic_t numainfo_events; + atomic_t numainfo_updating; + struct list_head cgwb_list; + struct wb_domain cgwb_domain; + struct list_head event_list; + spinlock_t event_list_lock; + struct deferred_split deferred_split_queue; + long unsigned int ali_reserved1; + long unsigned int ali_reserved2; + long unsigned int ali_reserved3; + long unsigned int ali_reserved4; + struct mem_cgroup_per_node *nodeinfo[0]; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; + long: 64; +}; + +struct cgroup___MEMCG { + struct cgroup_subsys_state self; + long unsigned int flags; + int id; + int level; + int max_depth; + int nr_descendants; + int nr_dying_descendants; + int max_descendants; + int nr_populated_csets; + int nr_populated_domain_children; + int nr_populated_threaded_children; + int nr_threaded_children; + struct kernfs_node *kn; + struct cgroup_file procs_file; + struct cgroup_file events_file; + u16 subtree_control; + u16 subtree_ss_mask; + u16 old_subtree_control; + u16 old_subtree_ss_mask; + struct cgroup_subsys_state *subsys[13]; + struct cgroup_root *root; + struct list_head cset_links; + struct list_head e_csets[13]; + struct cgroup *dom_cgrp; + struct cgroup *old_dom_cgrp; + struct cgroup_rstat_cpu *rstat_cpu; + struct list_head rstat_css_list; + struct cgroup_base_stat pending_bstat; + struct cgroup_base_stat bstat; + struct prev_cputime prev_cputime; + struct list_head pidlists; + struct mutex pidlist_mutex; + wait_queue_head_t offline_waitq; + struct work_struct release_agent_work; + struct psi_group psi; + struct cgroup_bpf bpf; + atomic_t congestion_count; + int ancestor_ids[0]; +}; + +struct kernfs_node___419 { + atomic_t count; + atomic_t active; + struct kernfs_node *parent; + const char *name; + struct rb_node rb; + const void *ns; + unsigned int hash; + union { + struct kernfs_elem_dir dir; + struct kernfs_elem_symlink symlink; + struct kernfs_elem_attr attr; + }; + void *priv; + union kernfs_node_id id; + short unsigned int flags; + umode_t mode; + struct kernfs_iattrs *iattr; +}; + +struct cpuacct___AVE { + struct cgroup_subsys_state css; + struct cpuacct_usage *cpuusage; + struct cpuacct_alistats *alistats; + struct kernel_cpustat *cpustat; + long unsigned int avenrun[3]; + long unsigned int avenrun_r[3]; + struct list_head sli_list; + bool sli_enabled; + struct sched_cgroup_lat_stat_cpu *lat_stat_cpu; + u64 next_load_update; + long unsigned int ali_reserved1; + long unsigned int ali_reserved2; + long unsigned int ali_reserved3; + long unsigned int ali_reserved4; +}; + +#endif diff --git a/source/tools/detect/cgtool/cgtoollib.h b/source/tools/detect/cgtool/cgtoollib.h new file mode 100644 index 00000000..fa76e115 --- /dev/null +++ b/source/tools/detect/cgtool/cgtoollib.h @@ -0,0 +1,73 @@ +#ifndef __CGTRACELIB_H +#define __CGTRACELIB_H + +#include +#include +#include +#include + +#define CMD_LEN 4096 +#define SYM_LEN 1024 + +static int get_dir_by_knid(unsigned int kn_id, char *sub, char *buf, unsigned int size) +{ + FILE *fp = NULL; + char cmd[CMD_LEN]; + + sprintf(cmd, "find /sys/fs/cgroup/%s/ -inum %u", sub, kn_id); + + fp = popen(cmd, "r"); + if (fp == NULL) + return -1; + + fgets(buf, size, fp); + + pclose(fp); + + return 0; +} + +static unsigned int get_knid_by_dir(char *dir) +{ + struct stat buf; + stat(dir, &buf); + + return buf.st_ino; +} + +static void bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +static bool find_ksym_by_name(const char *name) +{ + FILE *fp = NULL; + char cmd[CMD_LEN]; + char buf[SYM_LEN] = "\0"; + + sprintf(cmd, "FIND_KSYM=`echo %s |awk -F'kprobe_' '{print $2}'`; cat /proc/kallsyms |grep -w $FIND_KSYM", name); + + fp = popen(cmd, "r"); + if (fp == NULL) + return false; + + fgets(buf, SYM_LEN, fp); + + pclose(fp); + + if (strlen(buf) == 0) + return false; + else + return true; +} + +#endif diff --git a/source/tools/detect/cgtool/cgtoollib_bpf.h b/source/tools/detect/cgtool/cgtoollib_bpf.h new file mode 100644 index 00000000..80e553b1 --- /dev/null +++ b/source/tools/detect/cgtool/cgtoollib_bpf.h @@ -0,0 +1,31 @@ +#ifndef __CGTRACELIB_BPF_H +#define __CGTRACELIB_BPF_H + +#include +#include +#include +#include +#include + +#include "cgtool_comm.h" + +#define BPF_ANY 0 +#define NULL ((void*)0) + +static u64 get_knid_by_cgroup(struct cgroup___MEMCG *cgrp) +{ + struct kernfs_node___419 *node; + union kernfs_node_id id; + unsigned int knid; + + if (bpf_core_read(&node, sizeof(struct kernfs_node___419 *), &cgrp->kn)) + return 0; + if (bpf_core_read(&id, sizeof(union kernfs_node_id), &node->id)) + return 0; + if (bpf_core_read(&knid, sizeof(u64), &id.id)) + return 0; + + return knid; +} + +#endif diff --git a/source/tools/detect/cgtool/cpuacct_load/Makefile b/source/tools/detect/cgtool/cpuacct_load/Makefile new file mode 100644 index 00000000..1f2ff3d4 --- /dev/null +++ b/source/tools/detect/cgtool/cpuacct_load/Makefile @@ -0,0 +1,9 @@ +newdirs += . +csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) +bpfsrcs += $(wildcard *.bpf.c) + +CFLAGS += -static + +target = cpuacct_load + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.bpf.c b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.bpf.c new file mode 100644 index 00000000..91f13a83 --- /dev/null +++ b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.bpf.c @@ -0,0 +1,68 @@ +#include "../cgtoollib_bpf.h" +#include "../cgtool_comm.h" +#include "cpuacct_load.h" + +struct bpf_map_def SEC("maps") cpuacct_load_hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct cpuacct_load), + .max_entries = 256, +}; + +SEC("kprobe/cpuacct_calc_load") +int kprobe_cpuacct_calc_load(struct pt_regs *ctx) +{ + struct cpuacct___AVE *acct = (struct cpuacct___AVE*)PT_REGS_PARM1(ctx); + int key=0; + struct cpuacct_load load = {0}; + struct cpuacct_load *load_up; + struct cgroup___MEMCG *cgrp; + unsigned int index = 0; + unsigned int avenrun_n = 0; + + if (bpf_core_read(&cgrp, sizeof(struct cgroup___MEMCG *), &acct->css.cgroup)) + return 0; + if (bpf_core_read(&key, sizeof(int), &cgrp->id)) + return 0; + + load_up = bpf_map_lookup_elem(&cpuacct_load_hash_map, &key); + if (load_up != NULL) { + // update the avenrun + index = load_up->avenrun_index; + avenrun_n = load_up->avenrun_n; + + if (index >= AVENRUN_MAX) + return 0; + + if (bpf_core_read(&load_up->run[index][0], sizeof(unsigned long), &acct->avenrun[0])) + return 0; + if (bpf_core_read(&load_up->run[index][1], sizeof(unsigned long), &acct->avenrun[1])) + return 0; + if (bpf_core_read(&load_up->run[index][2], sizeof(unsigned long), &acct->avenrun[2])) + return 0; + + load_up->avenrun_index = (index + 1) % AVENRUN_MAX; + load_up->avenrun_n = avenrun_n + 1 > AVENRUN_MAX ? AVENRUN_MAX : avenrun_n + 1; + } else { + // add new load + if (bpf_core_read(&load.run[index][0], sizeof(unsigned long), &acct->avenrun[0])) + return 0; + if (bpf_core_read(&load.run[index][1], sizeof(unsigned long), &acct->avenrun[1])) + return 0; + if (bpf_core_read(&load.run[index][2], sizeof(unsigned long), &acct->avenrun[2])) + return 0; + + load.avenrun_index = (index + 1) % AVENRUN_MAX; + load.avenrun_n = avenrun_n + 1 > AVENRUN_MAX ? AVENRUN_MAX : avenrun_n + 1; + + load.knid = get_knid_by_cgroup(cgrp); + if (load.knid == 0) + return 0; + + bpf_map_update_elem(&cpuacct_load_hash_map, &key, &load, BPF_ANY); + } + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.c b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.c new file mode 100644 index 00000000..4deeb27a --- /dev/null +++ b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.c @@ -0,0 +1,202 @@ +#include +#include +#include +#include +#include +#include "cpuacct_load.h" +#include "cpuacct_load.skel.h" +#include "../cgtoollib.h" + +const char *argp_program_version = "cgtrace cpuacct_load 1.0"; +static const char argp_program_doc[] = + "\n Tracing cpu load for the cpuacct cgroup\n" + ; + +static const struct argp_option cpuacct_load_options[] = { + {"timeout", 't', "timeout", 0, "time out"}, + {"dir", 'f', "dir", 0, "cgroup dir"}, + {"btf", 'b', "BTF_PATH", 0, "Specify path of the custom btf"}, + {"debug", 'd', NULL, 0, "Enable libbpf debug output"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +static struct env +{ + int timeout; + char *dir; + bool debug; + char *btf_custom_path; +} env = { + .debug = false, + .btf_custom_path = NULL, +}; + +static struct cpuacct_load_bpf *obj; + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (!env.debug) + return 0; + return vfprintf(stderr, format, args); +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + + switch (key) + { + case 't': + env.timeout = atoi(arg); + break; + case 'f': + env.dir = arg; + break; + case 'd': + env.debug = true; + break; + case 'b': + env.btf_custom_path = arg; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static void print_cpuacct_load(struct cpuacct_load_bpf *obj, unsigned int knid) +{ + int fd = bpf_map__fd(obj->maps.cpuacct_load_hash_map); + int j, k, loop; + unsigned long key, next_key; + struct cpuacct_load load; + bool find = false; + char dir[PATH_MAX]; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) + { + if ((bpf_map_lookup_elem(fd, &next_key, &load)) != 0) { + goto next; + } + + if (load.avenrun_n >= 10) + k = load.avenrun_index; + else + k = 0; + + if (knid == -1 || load.knid == knid) + find = true; + else + goto next; + + if (get_dir_by_knid(load.knid, "cpuacct", dir, sizeof(dir)) < 0) { + printf("can't get cgroup dir by knid:%u\n", load.knid); + goto next; + } + + printf("cgroup dir:%s", dir); + printf("times:\n"); + + /* print avenrun */ + for (loop = 0; loop < 3; loop++) { + printf("avenrun%d:", loop); + j = k; + for (int i = 0; i < load.avenrun_n; i++) { + printf(" %lu", load.run[j][loop]); + j = (j + 1) % 10; + } + printf("\n"); + } + + /* print load */ + for (loop = 0; loop < 3; loop++) { + printf("load%d:", loop); + j = k; + for (int i = 0; i < load.avenrun_n; i++) { + printf(" %lu.%02lu", cal_load_int(load.run[j][loop]), cal_load_frac(load.run[j][loop])); + j = (j + 1) % 10; + } + printf("\n"); + } + printf("\n"); +next: + bpf_map_delete_elem(fd, &next_key); + key = next_key; + } + + if (find == false) { + printf("can't get load trace, maybe should do:\n"); + printf("1) echo 1 > /proc/async_load_calc\n"); + printf("2) echo 1 > cpuacct.enable_sli\n"); + printf("3) check whether the [dir] parameter is correct\n"); + } +} + +static void alarm_stop(int signo) +{ + unsigned int knid = -1; + + if (env.dir != NULL) + knid = get_knid_by_dir(env.dir); + + print_cpuacct_load(obj, knid); +} + +int main(int argc, char **argv) +{ + int err; + struct bpf_program *prog; + static const struct argp argp = { + .options = cpuacct_load_options, + .parser = parse_arg, + .doc = argp_program_doc, + .args_doc = NULL, + }; + + bump_memlock_rlimit(); + libbpf_set_print(libbpf_print_fn); + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + obj = cpuacct_load_bpf__open(); + if (!obj) + { + printf("failed to open BPF object\n"); + return 1; + } + + bpf_object__for_each_program(prog, obj->obj) { + if (!find_ksym_by_name(bpf_program__name(prog))) { + printf("not find %s in /proc/kallsyms, not support\n", bpf_program__section_name(prog)); + return -ENOTSUP; + } + } + + err = cpuacct_load_bpf__load(obj); + if (err) + { + printf("failed to load BPF object: %s\n", strerror(-err)); + goto cleanup; + } + err = cpuacct_load_bpf__attach(obj); + if (err) + { + printf("failed to attach BPF programs: %s\n", strerror(-err)); + goto cleanup; + } + + if (env.timeout == 0) + env.timeout = -1; + + signal(SIGINT, alarm_stop); + signal(SIGALRM, alarm_stop); + + alarm(env.timeout); + sleep(env.timeout + 1); + +cleanup: + cpuacct_load_bpf__destroy(obj); + return 0; +} diff --git a/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.h b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.h new file mode 100644 index 00000000..b871c3c8 --- /dev/null +++ b/source/tools/detect/cgtool/cpuacct_load/cpuacct_load.h @@ -0,0 +1,30 @@ +#ifndef __CGTRACE_H +#define __CGTRACE_H + +#define AVENRUN_MAX 10 + +struct cpuacct_load { + unsigned long run[AVENRUN_MAX][3]; + unsigned int avenrun_index; + unsigned int avenrun_n; + unsigned int knid; +}; + +/* cal load from kernel */ +#define FSHIFT 11 /* nr of bits of precision */ +#define FIXED_1 (1<> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +static unsigned long cal_load_int(unsigned long avenrun) +{ + unsigned long load_tmp = (avenrun + FIXED_1/200) << 0; + return LOAD_INT(load_tmp); +} + +static unsigned long cal_load_frac(unsigned long avenrun) +{ + unsigned long load_tmp = (avenrun + FIXED_1/200) << 0; + return LOAD_FRAC(load_tmp); +} +#endif diff --git a/source/tools/detect/cgtool/memcg_show/Makefile b/source/tools/detect/cgtool/memcg_show/Makefile new file mode 100644 index 00000000..975632b0 --- /dev/null +++ b/source/tools/detect/cgtool/memcg_show/Makefile @@ -0,0 +1,3 @@ +target = memcg_show + +include $(SRC)/mk/sh.mk diff --git a/source/tools/detect/cgtool/memcg_show/memcg_show.sh b/source/tools/detect/cgtool/memcg_show/memcg_show.sh new file mode 100755 index 00000000..e06ffc66 --- /dev/null +++ b/source/tools/detect/cgtool/memcg_show/memcg_show.sh @@ -0,0 +1,152 @@ +#!/bin/bash + +memcg_dir="/sys/fs/cgroup/memory" +interval=10 +times=5 +unit=1 +tmp_dir="memcgshow_tmp" + +# arguments +usage() +{ + echo "memcg_show: statistics of usage,rss,cache... of each memcg" + echo "Usage:" + echo " sysak cgtool memcg_show [OPTION...] " + echo " -h, help information" + echo " -i, detection time interval, default: ${interval}s" + echo " -t, detection times, default: ${times} times" + echo " -u, [B/KB/MB/G], default: MB" + echo " -d, memcg dir, default: $memcg_dir" + echo " " + echo "Examples:" + echo " sysak cgtool memcg_show" + echo " sysak cgtool memcg_show -i 60 -t 10 -u G" +} + +memstat() +{ + local stat_dir=$1 + local result_file=$2 + + local usage=`cat $stat_dir/memory.usage_in_bytes` + local memswusage=`cat $stat_dir/memory.memsw.usage_in_bytes` + local kmemusage=`cat $stat_dir/memory.kmem.usage_in_bytes` + local total_cache=`cat $stat_dir/memory.stat |grep total_cache |awk -F" " '{print $2}'` + local total_rss=`cat $stat_dir/memory.stat |grep total_rss |sed -n 1p |awk -F" " '{print $2}'` + local total_swap=`cat $stat_dir/memory.stat |grep total_swap |sed -n 1p |awk -F" " '{print $2}'` + local real_use=$(($total_cache + $total_rss + $total_swap)) + + + # memory.usage_in_bytes = cache + rss + # real_use = cache + rss + swap + # memory.memsw.usage_in_bytes = memory.usage_in_bytes + swap + echo "$stat_dir $usage $total_rss $total_cache $total_swap $real_use $kmemusage $memswusage" >> $result_file +} + +memstat_all() +{ + local dir_n=0 + local stat_dir=$1 + local result_file=$2 + + for file in `ls $stat_dir` + do + if [ -d "$stat_dir/$file" ]; then + memstat_all $stat_dir/$file $result_file + dir_n=$(($dir_n + 1)) + fi + done + + if [ $dir_n -eq 0 ]; then + memstat $stat_dir $result_file + fi +} + +output() +{ + lines=`cat ${tmp_dir}/0.result | wc -l` + for ((i=1; i<=$lines; i++)) + do + echo "==============================================" + + memcg=`cat ${tmp_dir}/0.result | sed -n ${i}p |awk -F" " '{print $1}'` + echo ${memcg//)/\\\\} + + n=2 + for name in usage rss cache swap cache+rss+swap kmemusage memswusage + do + out="$name:" + for ((j=0; j<$times; j++)) + do + data=`cat ${tmp_dir}/${j}.result | grep -w $memcg |awk -F' ' -vx=$n '{print $x}'` + if [ $? -ne 0 ]; then + continue + fi + + if [ "X$data" != "X0" ]; then + data=$(($data/$unit)) + fi + + out="$out $data" + done + + echo $out + n=$(($n+1)) + done + + echo "" + done + + rm -rf $tmp_dir +} + +while getopts 'i:t:u:d:h' OPT; do + case $OPT in + "h") + usage + exit 0 + ;; + "i") + interval=$OPTARG + ;; + "t") + times=$OPTARG + ;; + "u") + if [ "X$OPTARG" == "XB" ]; then + unit=1 + elif [ "X$OPTARG" == "XKB" ]; then + unit=1024 + elif [ "X$OPTARG" == "XMB" ]; then + unit=$((1024*1024)) + elif [ "X$OPTARG" == "XG" ]; then + unit=$((1024*1024*1024)) + else + echo "Parameter error, -u $OPTARG, expect [B/KB/MB/G]" + exit 1 + fi + ;; + "d") + memcg_dir=$OPTARG + ;; + *) + usage + exit 0 + ;; + esac +done + +# collect data +rm -rf $tmp_dir; mkdir -p $tmp_dir +for ((i=0; i<$times; i++)) +do + memstat_all $memcg_dir ${tmp_dir}/${i}.result + + # add \ to change memcg from $memcg_dir/system\x2dpolicy to $memcg_dir/system)x2dpolicy + sed -e 's:\\:):g' -i ${tmp_dir}/${i}.result + + sleep $interval +done + +# output data +output diff --git a/source/tools/detect/cgtool/memcg_usage/Makefile b/source/tools/detect/cgtool/memcg_usage/Makefile new file mode 100644 index 00000000..3b8501db --- /dev/null +++ b/source/tools/detect/cgtool/memcg_usage/Makefile @@ -0,0 +1,9 @@ +newdirs += . +csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) +bpfsrcs += $(wildcard *.bpf.c) + +CFLAGS += -static + +target = memcg_usage + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/cgtool/memcg_usage/memcg_usage.bpf.c b/source/tools/detect/cgtool/memcg_usage/memcg_usage.bpf.c new file mode 100644 index 00000000..d16e3af3 --- /dev/null +++ b/source/tools/detect/cgtool/memcg_usage/memcg_usage.bpf.c @@ -0,0 +1,91 @@ +#include "../cgtoollib_bpf.h" +#include "../cgtool_comm.h" +#include "memcg_usage.h" + +struct bpf_map_def SEC("maps") usage_hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(unsigned long), + .value_size = sizeof(struct memcg_usage), + .max_entries = 256, +}; + +SEC("kprobe/mem_cgroup_try_charge") +int kprobe_mem_cgroup_try_charge(struct pt_regs *ctx) +{ + struct mm_struct___MEMCG *mm = (struct mm_struct___MEMCG *)PT_REGS_PARM2(ctx); + unsigned long pid_tgid = bpf_get_current_pid_tgid(); + struct memcg_usage usage = {0}; + struct memcg_usage *usage_up; + struct task_struct *tk; + + if (bpf_core_read(&tk, sizeof(struct task_struct *), &mm->owner)) + return 0; + + usage_up = bpf_map_lookup_elem(&usage_hash_map, &pid_tgid); + if (usage_up == NULL) { + usage.ptid = pid_tgid; + if (bpf_core_read(&usage.comm, sizeof(usage.comm), &tk->comm)) + return 0; + + bpf_map_update_elem(&usage_hash_map, &pid_tgid, &usage, BPF_ANY); + } else { + if (bpf_core_read(&usage_up->comm, sizeof(usage_up->comm), &tk->comm)) + return 0; + } + + return 0; +} + +SEC("kprobe/mem_cgroup_charge") +int kprobe_mem_cgroup_charge(struct pt_regs *ctx) +{ + struct mm_struct___MEMCG *mm = (struct mm_struct___MEMCG *)PT_REGS_PARM2(ctx); + unsigned long pid_tgid = bpf_get_current_pid_tgid(); + struct memcg_usage usage = {0}; + struct memcg_usage *usage_up; + struct task_struct *tk; + + if (bpf_core_read(&tk, sizeof(struct task_struct *), &mm->owner)) + return 0; + + usage_up = bpf_map_lookup_elem(&usage_hash_map, &pid_tgid); + if (usage_up == NULL) { + usage.ptid = pid_tgid; + if (bpf_core_read(&usage.comm, sizeof(usage.comm), &tk->comm)) + return 0; + + bpf_map_update_elem(&usage_hash_map, &pid_tgid, &usage, BPF_ANY); + } else { + if (bpf_core_read(&usage_up->comm, sizeof(usage_up->comm), &tk->comm)) + return 0; + } + + return 0; +} + +SEC("kprobe/try_charge") +int kprobe_try_charge(struct pt_regs *ctx) +{ + struct mem_cgroup *memcg = (struct mem_cgroup *)PT_REGS_PARM1(ctx); + unsigned long pid_tgid = bpf_get_current_pid_tgid(); + struct memcg_usage *usage_up; + struct cgroup_subsys_state css; + struct cgroup___MEMCG *cgrp; + + usage_up = bpf_map_lookup_elem(&usage_hash_map, &pid_tgid); + if (usage_up != NULL) { + if (bpf_core_read(&css, sizeof(struct cgroup_subsys_state), &memcg->css)) + return 0; + if (bpf_core_read(&cgrp, sizeof(struct cgroup___MEMCG *), &css.cgroup)) + return 0; + usage_up->knid = get_knid_by_cgroup(cgrp); + if (usage_up->knid == 0) + return 0; + + usage_up->pgsize += (unsigned int)PT_REGS_PARM3(ctx); + } + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/detect/cgtool/memcg_usage/memcg_usage.c b/source/tools/detect/cgtool/memcg_usage/memcg_usage.c new file mode 100644 index 00000000..a2f111bb --- /dev/null +++ b/source/tools/detect/cgtool/memcg_usage/memcg_usage.c @@ -0,0 +1,321 @@ +#include +#include +#include +#include +#include +#include +#include +#include "memcg_usage.h" +#include "memcg_usage.skel.h" +#include "../cgtoollib.h" + +const char *argp_program_version = "cgtrace usage 1.0"; + +static const char argp_program_doc[] = + "\n Tracing memory usage for the memory cgroup\n" + ; + +static const struct argp_option usage_options[] = { + {"timeout", 't', "time", 0, "time out"}, + {"dir", 'f', "dir", 0, "cgroup dir"}, + {"btf", 'b', "BTF_PATH", 0, "Specify path of the custom btf"}, + {"debug", 'd', NULL, 0, "Enable libbpf debug output"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +static struct env +{ + int timeout; + char *dir; + bool debug; + char *btf_custom_path; +} env = { + .debug = false, + .btf_custom_path = NULL, +}; + +static struct memcg_usage_bpf *obj = NULL; +static struct memcg_mess* mess = NULL; + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (!env.debug) + return 0; + return vfprintf(stderr, format, args); +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + + switch (key) + { + case 't': + env.timeout = atoi(arg); + break; + case 'f': + env.dir = arg; + break; + case 'd': + env.debug = true; + break; + case 'b': + env.btf_custom_path = arg; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static int add_taskinfo(struct memcg_mess *m, struct memcg_usage usage) +{ + struct task_info *info_tmp = m->info; + struct task_info *p = (struct task_info *)malloc(sizeof(struct task_info)); + if (p == NULL) { + printf("malloc task info failed\n"); + return -ENOMEM; + } + + p->pid = usage.ptid >> 32; + p->tid = (unsigned int)usage.ptid; + strncpy(p->comm, usage.comm, sizeof(p->comm) - 1); + p->comm[sizeof(p->comm) - 1] = '\0'; + p->pgsize = usage.pgsize; + p->next = NULL; + + if (info_tmp == NULL) { + m->info = p; + } + else { + while (info_tmp->next != NULL) + info_tmp = info_tmp->next; + + info_tmp->next = p; + } + + m->task_num++; + + return 0; +} + +static struct memcg_mess* mess_list_find(int key) +{ + struct memcg_mess *mess_tmp = mess; + + while (mess_tmp != NULL) { + if (mess_tmp->knid == key) + return mess_tmp; + + mess_tmp = mess_tmp->next; + } + + return NULL; +} + +static int mess_list_insert(struct memcg_usage usage) +{ + int ret = 0; + struct memcg_mess *mess_tmp; + struct memcg_mess *m; + + m = (struct memcg_mess *)malloc(sizeof(struct memcg_mess)); + if (m == NULL) { + printf("malloc mess failed\n"); + return -ENOMEM; + } + + m->task_num = 0; + m->info = NULL; + m->knid = usage.knid; + m->next = NULL; + ret = add_taskinfo(m, usage); + if (ret < 0) + goto out_free; + + if (mess == NULL) + mess = m; + else { + mess_tmp = mess; + while (mess_tmp->next != NULL) + mess_tmp = mess_tmp->next; + + mess_tmp->next = m; + } + + return ret; + +out_free: + free(m); + return ret; +} + +void mess_list_free() +{ + struct memcg_mess *mess_tmp; + struct task_info *info_tmp; + + while (mess != NULL) { + /* free task info of mess */ + while (mess->info != NULL) { + info_tmp = mess->info; + mess->info = mess->info->next; + free(info_tmp); + } + + /* free mess */ + mess_tmp = mess; + mess = mess->next; + free(mess_tmp); + } +} + +static int usage_restore() +{ + int fd = bpf_map__fd(obj->maps.usage_hash_map); + unsigned long key, next_key; + struct memcg_usage usage; + struct memcg_mess *m; + int ret = 0; + + // calculate the number of tasks for each memcg + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &usage); + + m = mess_list_find(usage.knid); + if (m == NULL) { + ret = mess_list_insert(usage); + if (ret < 0) + return ret; + } + else { + ret = add_taskinfo(m, usage); + if (ret < 0) + return ret; + } + + key = next_key; + } + + return ret; +} + +static void free_map(void) +{ + int fd = bpf_map__fd(obj->maps.usage_hash_map); + unsigned long key, next_key; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_delete_elem(fd, &next_key); + key = next_key; + } +} + +static void usage_show(unsigned int knid) +{ + struct memcg_mess *mess_tmp = mess; + struct task_info *info_tmp; + char dir[PATH_MAX]; + bool find = false; + + while (mess_tmp != NULL) { + if (knid == -1 || mess_tmp->knid == knid) + find = true; + else + goto next; + + if (get_dir_by_knid(mess_tmp->knid, "memory", dir, sizeof(dir)) < 0) { + printf("can't get cgroup dir by knid:%u\n", mess_tmp->knid); + goto next; + } + + printf("task number:%d cgroup dir:%s", mess_tmp->task_num, dir); + printf(" PID TID COMM PGSIZE\n"); + printf("-----------------------------------\n"); + + /* list task info of mess */ + info_tmp = mess_tmp->info; + while (info_tmp != NULL) { + printf("%-6u %-6u %-16s %u\n", info_tmp->pid, info_tmp->tid, info_tmp->comm, info_tmp->pgsize); + info_tmp = info_tmp->next; + } + printf("\n"); +next: + mess_tmp = mess_tmp->next; + } + + if (find == false) { + printf("can't get memory usage in %s. ", env.dir); + printf("check whether the [dir] parameter is correct.\n"); + } +} + +static void alarm_stop(int signo) +{ + unsigned int knid = -1; + + if (env.dir != NULL) + knid = get_knid_by_dir(env.dir); + + if (usage_restore() == 0) + usage_show(knid); + + free_map(); + mess_list_free(); +} + +int main(int argc, char **argv) +{ + int err; + struct bpf_program *prog; + static const struct argp argp = { + .options = usage_options, + .parser = parse_arg, + .doc = argp_program_doc, + .args_doc = NULL, + }; + + bump_memlock_rlimit(); + libbpf_set_print(libbpf_print_fn); + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + obj = memcg_usage_bpf__open(); + if (!obj) + { + printf("failed to open BPF object\n"); + return 1; + } + + bpf_object__for_each_program(prog, obj->obj) { + if (!find_ksym_by_name(bpf_program__name(prog))) + bpf_program__set_autoload(prog, false); + } + + err = memcg_usage_bpf__load(obj); + if (err) + { + printf("failed to load BPF object: %s\n", strerror(-err)); + goto cleanup; + } + err = memcg_usage_bpf__attach(obj); + if (err) + { + printf("failed to attach BPF programs: %s\n", strerror(-err)); + goto cleanup; + } + + if (env.timeout == 0) + env.timeout = -1; + + signal(SIGINT, alarm_stop); + signal(SIGALRM, alarm_stop); + alarm(env.timeout); + sleep(env.timeout + 1); + +cleanup: + memcg_usage_bpf__destroy(obj); + return 0; +} diff --git a/source/tools/detect/cgtool/memcg_usage/memcg_usage.h b/source/tools/detect/cgtool/memcg_usage/memcg_usage.h new file mode 100644 index 00000000..da1c74a6 --- /dev/null +++ b/source/tools/detect/cgtool/memcg_usage/memcg_usage.h @@ -0,0 +1,28 @@ +#ifndef __CGTRACE_H +#define __CGTRACE_H + +#define TASK_COMM_LEN 16 + +struct memcg_usage { + char comm[TASK_COMM_LEN]; + unsigned int pgsize; + int knid; + unsigned long ptid; +}; + +struct task_info { + unsigned int pid; + unsigned int tid; + char comm[TASK_COMM_LEN]; + unsigned int pgsize; + struct task_info *next; +}; + +struct memcg_mess { + int knid; + struct task_info *info; + unsigned int task_num; + struct memcg_mess *next; +}; + +#endif -- Gitee