diff --git a/rpm/sysak.service b/rpm/sysak.service index 4379ea4c05488085a735688e32f25a5e31a28bf3..447ecd207aabe57ae2ee30ebfcb09992de148048 100644 --- a/rpm/sysak.service +++ b/rpm/sysak.service @@ -7,7 +7,7 @@ Restart=always RestartSec=10 CPUQuota=30% - MemoryLimit=60M + MemoryLimit=300M ExecStart=/usr/local/sysak/.sysak_components/tools/dist/app/beeQ/run.sh ExecStop=kill -9 $(pidof unity-mon) ExecReload=kill -1 $(pidof unity-mon) diff --git a/source/lib/uapi/Makefile b/source/lib/uapi/Makefile index b92c5bdc3c5dd5cc0577a892fc990fdfd2f4a455..d267d7a7b02dce65cdaa523a78b12f6af03124d3 100644 --- a/source/lib/uapi/Makefile +++ b/source/lib/uapi/Makefile @@ -1,6 +1,7 @@ SOURCE := $(shell find . -name "*.c") OBJS :=$(patsubst %.c,%.o,$(SOURCE)) STATIC_OBJS := $(addprefix $(OBJPATH)/,$(OBJS)) +LIBS += -L /usr/lib64 -l:libelf.a libsysak: $(OBJ_LIB_PATH)/libsysak.a @@ -10,6 +11,6 @@ $(OBJ_LIB_PATH)/libsysak.a: $(STATIC_OBJS) $(STATIC_OBJS): $(OBJS) $(OBJS): %.o : %.c - gcc -c -o $(OBJPATH)/$@ $< -I$(SRC)/lib/uapi/include + gcc -c -o $(OBJPATH)/$@ $< -I$(SRC)/lib/uapi/include $(LIBS) diff --git a/source/lib/uapi/include/kcore_utils.h b/source/lib/uapi/include/kcore_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..a877ed9c9e7d3b5f347775c560ac53f68475cb2a --- /dev/null +++ b/source/lib/uapi/include/kcore_utils.h @@ -0,0 +1,70 @@ +#ifndef __KCORE_UTILS_H +#define __KCORE_UTLIS_H + +#include +#include +#include + +#define BUFF_MAX 4096 +#define MAX_KCORE_ELF_HEADER_SIZE 32768 + +#ifdef DEBUG +#define LOG_DEBUG(...) fprintf(stderr, __VA_ARGS__) +#else +#define LOG_DEBUG(...) do { } while (0) +#endif /* DEBUG */ + +#define LOG_INFO(...) fprintf(stdout, __VA_ARGS__) +#define LOG_WARN(...) fprintf(stderr, __VA_ARGS__) +#define LOG_ERROR(...) fprintf(stderr, __VA_ARGS__) + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +/* struct to record the kcore elf file data*/ +struct proc_kcore_data { + unsigned int flags; + unsigned int segments; + char *elf_header; + size_t header_size; + Elf64_Phdr *load64; + Elf64_Phdr *notes64; + Elf32_Phdr *load32; + Elf32_Phdr *notes32; + void *vmcoreinfo; + unsigned int size_vmcoreinfo; +}; + + +/** + * lookup_kernel_symbol - look up kernel symbol address from /proc/kallsyms + * + * @symbol_name: kernel symbol name to look up. + * @return: the address of the kernel symbol. + * + */ +uintptr_t lookup_kernel_symbol(const char *symbol_name); + +/* prepare_btf_file - check exist btf file, if not exist, download it */ +char *prepare_btf_file(); + +/* open /proc/kcore and read necessary data to interpret kcore */ +int kcore_init(); + +/* close /proc/kcore and do some cleanup */ +void kcore_uninit(); + +/** + * kcore_readmem - read data of certain kernel address from kcore + * + * @kvaddr: kernel address to read. + * @buf: buf for readed data. + * @size: size of the data to read. + * @return: size of the data beeing read if success. + * + * Note: must call after kcore_init() + */ +ssize_t kcore_readmem(unsigned long kvaddr, void *buf, ssize_t size); + + +#endif \ No newline at end of file diff --git a/source/lib/uapi/kcore_utils.c b/source/lib/uapi/kcore_utils.c new file mode 100644 index 0000000000000000000000000000000000000000..e58b45c21151eae4508d706367b3036a5b02ed71 --- /dev/null +++ b/source/lib/uapi/kcore_utils.c @@ -0,0 +1,295 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kcore_utils.h" + +#define LEN (128) + +static struct proc_kcore_data proc_kcore_data = { 0 }; +static struct proc_kcore_data *pkd = &proc_kcore_data; + +static int kcore_fd = 0; + +/* + * Routines of kcore, i.e., /proc/kcore + */ +uintptr_t lookup_kernel_symbol(const char *symbol_name) +{ + const char *kallsyms_file = "/proc/kallsyms"; + FILE *fp; + char line[BUFF_MAX]; + char *pos; + uintptr_t addr = -1UL; + + fp = fopen(kallsyms_file, "r"); + if (fp == NULL) { + perror("fopen: /proc/kallsyms"); + return -1; + } + + while (fgets(line, BUFF_MAX, fp)) { + if ((pos = strstr(line, symbol_name)) == NULL) + continue; + + /* Remove trailing newline */ + line[strcspn(line, "\n")] = '\0'; + + /* Exact match */ + if (pos == line || !isspace(*(pos - 1))) + continue; + if (!strcmp(pos, symbol_name)) { + addr = strtoul(line, NULL, 16); + break; + } + } + + if (addr == -1UL) + LOG_ERROR("failed to lookup symbol: %s\n", symbol_name); + + fclose(fp); + return addr; +} + +static int kcore_elf_init() +{ + Elf64_Ehdr *elf64; + Elf64_Phdr *load64; + Elf64_Phdr *notes64; + char eheader[MAX_KCORE_ELF_HEADER_SIZE]; + size_t load_size, notes_size; + + if (read(kcore_fd, eheader, MAX_KCORE_ELF_HEADER_SIZE) != + MAX_KCORE_ELF_HEADER_SIZE) { + perror("read: /proc/kcore ELF header"); + return -1; + } + + elf64 = (Elf64_Ehdr *)&eheader[0]; + notes64 = (Elf64_Phdr *)&eheader[sizeof(Elf64_Ehdr)]; + load64 = (Elf64_Phdr *)&eheader[sizeof(Elf64_Ehdr) + + sizeof(Elf64_Phdr)]; + + pkd->segments = elf64->e_phnum - 1; + + notes_size = load_size = 0; + if (notes64->p_type == PT_NOTE) + notes_size = notes64->p_offset + notes64->p_filesz; + if (notes64->p_type == PT_LOAD) + load_size = (unsigned long)(load64+(elf64->e_phnum)) - + (unsigned long)elf64; + + pkd->header_size = MAX(notes_size, load_size); + if (!pkd->header_size) + pkd->header_size = MAX_KCORE_ELF_HEADER_SIZE; + + if ((pkd->elf_header = (char *)malloc(pkd->header_size)) == NULL) { + perror("malloc: /proc/kcore ELF header"); + return -1; + } + + memcpy(&pkd->elf_header[0], &eheader[0], pkd->header_size); + pkd->notes64 = (Elf64_Phdr *)&pkd->elf_header[sizeof(Elf64_Ehdr)]; + pkd->load64 = (Elf64_Phdr *)&pkd->elf_header[sizeof(Elf64_Ehdr) + + sizeof(Elf64_Phdr)]; + + return 0; +} + +int kcore_init() +{ + if ((kcore_fd = open("/proc/kcore", O_RDONLY)) < 0) { + perror("open: /proc/kcore"); + return -1; + } + + if (kcore_elf_init()) + goto failed; + + return 0; + +failed: + close(kcore_fd); + return -1; +} + +void kcore_uninit(void) +{ + if (pkd->elf_header) + free(pkd->elf_header); + if (kcore_fd > 0) + close(kcore_fd); +} + +/* + * We may accidentally access invalid pfns on some kernels + * like 4.9, due to known bugs. Just skip it. + */ +ssize_t kcore_readmem(unsigned long kvaddr, void *buf, ssize_t size) +{ + Elf64_Phdr *lp64; + unsigned long offset = -1UL; + ssize_t read_size; + unsigned int i; + + for (i = 0; i < pkd->segments; i++) { + lp64 = pkd->load64 + i; + if ((kvaddr >= lp64->p_vaddr) && + (kvaddr < (lp64->p_vaddr + lp64->p_memsz))) { + offset = (off_t)(kvaddr - lp64->p_vaddr) + + (off_t)lp64->p_offset; + break; + } + } + if (i == pkd->segments) { + for (i = 0; i < pkd->segments; i++) { + lp64 = pkd->load64 + i; + LOG_DEBUG("%2d: [0x%lx, 0x%lx)\n", i, lp64->p_vaddr, + lp64->p_vaddr + lp64->p_memsz); + } + //printf("invalid kvaddr 0x%lx\n", kvaddr); + goto failed; + } + + if (lseek(kcore_fd, offset, SEEK_SET) < 0) { + perror("lseek: /proc/kcore"); + goto failed; + } + + read_size = read(kcore_fd, buf, size); + if (read_size < size) { + perror("read: /proc/kcore"); + goto failed; + } + + return read_size; + +failed: + return -1; +} + +static void stripWhiteSpace(char *str) +{ + char tmp_str[strlen(str)]; + int i, j = 0; + + for (i = 0; str[i] != '\0'; i++) { + if (str[i] != ' ' && str[i] != '\t' + && str[i] != '\n') { + tmp_str[j] = str[i]; + j++; + } + } + + tmp_str[j] = '\0'; + strcpy(str, tmp_str); + + return; +} + +static int do_cmd(const char *cmd, char *result, int len) +{ + FILE *res; + char region[LEN] = {0}; + char *str; + + res = popen(cmd, "r"); + if (res == NULL) { + printf("get region id failed\n"); + return -1; + } + + if (feof(res)) { + printf("cmd line end\n"); + return 0; + } + fgets(region, sizeof(region)-1, res); + str = region; + stripWhiteSpace(str); + /* skip \n */ + strncpy(result, str, len - 1); + result[len - 1] = '\0'; + pclose(res); + return 0; +} + +static int download_btf() +{ + char region[LEN] = {0}; + char arch[LEN] = {0}; + char kernel[LEN] = {0}; + char dw[LEN+LEN] = {0}; + char timeout[LEN] = "-internal"; + char sysak_path[LEN] = "/boot"; + char *curl_cmd = "curl -s --connect-timeout 2 http://100.100.100.200/latest/meta-data/region-id 2>&1"; + char *arch_cmd = "uname -m"; + char *kernel_cmd = "uname -r"; + char *tmp; + + do_cmd(curl_cmd, region, LEN); + if (!strstr(region,"cn-")) { + strcpy(region, "cn-hangzhou"); + memset(timeout, 0, sizeof(timeout)); + } + + do_cmd(arch_cmd, arch, LEN); + + do_cmd(kernel_cmd, kernel, LEN); + + if((tmp = getenv("SYSAK_WORK_PATH")) != NULL) + { + memset(sysak_path, 0, sizeof(sysak_path)); + strcpy(sysak_path, tmp); + strcat(sysak_path, "/tools/"); + strcat(sysak_path, kernel); + } + + snprintf(dw, LEN + LEN + LEN, "wget -T 5 -t 2 -q -O %s/vmlinux-%s https://sysom-cn-%s.oss-cn-%s%s.aliyuncs.com/home/hive/btf/%s/vmlinux-%s", sysak_path, kernel, ®ion[3],®ion[3], timeout,arch, kernel); + + do_cmd(dw, kernel, LEN); + return 0; +} + +static int check_btf_file(char *btf) +{ + struct stat fstat; + int ret = 0; + + ret = stat(btf, &fstat); + if (ret) + return -1; + if (fstat.st_size < 10*1024) + return -1; + + return 0; +} + +char *prepare_btf_file() +{ + static char btf[LEN] = {0}; + char ver[LEN] = {0}; + char *cmd = "uname -r"; + + do_cmd(cmd, ver, LEN); + + if (getenv("SYSAK_WORK_PATH") != NULL) + sprintf(btf,"%s/tools/%s/vmlinux-%s", getenv("SYSAK_WORK_PATH"), ver, ver); + else + sprintf(btf,"/boot/vmlinux-%s", ver); + + if (check_btf_file(btf)) { + download_btf(); + }; + + if (check_btf_file(btf)) { + LOG_ERROR("btf file:%s not found \n", btf); + return NULL; + } + + return btf; +} \ No newline at end of file diff --git a/source/mk/csrc.mk b/source/mk/csrc.mk index 8701dc77e9f58e4eaeca71a4824b2757a5260940..e3fdf57c3f9cb6a6063603c121518c2af76e2fc0 100644 --- a/source/mk/csrc.mk +++ b/source/mk/csrc.mk @@ -1,6 +1,6 @@ objs := $(foreach n, $(mods), $(OBJPATH)/$(n)) -CFLAGS += $(EXTRA_CFLAGS) -I$(SRC)/lib/uapi/include +CFLAGS += $(EXTRA_CFLAGS) $(INCLUDES) -I$(SRC)/lib/uapi/include LDFLAGS += $(EXTRA_LDFLAGS) ifeq ($(KERNEL_DEPEND), Y) diff --git a/source/tools/detect/mem/imc_latency/Makefile b/source/tools/detect/mem/imc_latency/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..282f8bfda3eed0884e233ad5b23a4f9db6cd8dd6 --- /dev/null +++ b/source/tools/detect/mem/imc_latency/Makefile @@ -0,0 +1,4 @@ +target := imc_latency +mods := imc_latency.o + +include $(SRC)/mk/csrc.mk diff --git a/source/tools/detect/mem/imc_latency/README.md b/source/tools/detect/mem/imc_latency/README.md new file mode 100644 index 0000000000000000000000000000000000000000..90c46acd3eb97677185b6b2331ca9c6006e63cfa --- /dev/null +++ b/source/tools/detect/mem/imc_latency/README.md @@ -0,0 +1,82 @@ +# imc_latency + +基于PMU事件的DDR内存访问延迟,用于检查微架构层级是否存在内存竞争。 + +## Usgae + +### 使用用例 + +```bash +Sample: + +imc_latency -f /dev/stdout #输出日子到控制台 +imc_latency -d 15 -i 20 # 每15秒采集一次 输出20次采集结果 +``` + +### 结果说明 + +一次的采集结果如下,输出的的指标类型由read_latency(rlat)和write_latency(wlat),指标的level有socket和channel两种级别。 + +- SOCKET_LEVEL: socket层级的读写内存延迟,通过对channel级的指标求平均得到。 +- CHANNEL_LEVEL:channel级别的读写内存延迟 + +```bash +[TIME-STAMP] 2023-07-10 07:06:17 +[SOCKET_LEVEL] + 0 1 + rlat 13.75 14.37 + wlat 39.37 37.49 +[CHANNEL_LEVEL]-[SOCKET-0] + 0 1 2 3 4 5 6 7 8 9 10 11 + rlat 14.37 13.75 0.00 13.75 13.75 0.00 13.12 13.75 0.00 14.37 13.75 0.00 + wlat 40.62 39.99 0.00 39.37 38.74 0.00 40.62 39.37 0.00 39.99 38.74 0.00 +[CHANNEL_LEVEL]-[SOCKET-1] + 0 1 2 3 4 5 6 7 8 9 10 11 + rlat 15.00 13.75 0.00 13.75 13.75 0.00 13.75 14.37 0.00 14.37 14.37 0.00 + wlat 38.12 37.49 0.00 36.87 36.87 0.00 38.12 38.12 0.00 38.12 37.49 0.00 +``` + +## 原理与限制 + +基于IMC的PMU组件实现,需要硬件支持。目前仅支持Intel的Ice Lake(ICX)、Sky Lake(SKX)、Cascade Lake以及Sapphire Rapids(SPR)等微架构。 + +| 微架构 | 代号 | cpu-model编号 | +| --------------- | ---- | ------------- | +| Sapphire Rapids | SPR | 143 | +| Ice Lake | ICX | 106/108 | +| Cascade Lake | | 106 | +| Sky Lake-X | SKX | 85 | + +### 检查是否支持 + +可以使用`lscpu`命令,通过CPU的`Model`字段查看微架构类型,检查硬件是否支持。 + +```bash +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +CPU(s): 128 +On-line CPU(s) list: 0-127 +Thread(s) per core: 2 +Core(s) per socket: 32 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +BIOS Vendor ID: Intel(R) Corporation +CPU family: 6 +Model: 106 +Model name: Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz +BIOS Model name: Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz +Stepping: 6 +CPU MHz: 3500.000 +CPU max MHz: 3500.0000 +CPU min MHz: 800.0000 +BogoMIPS: 5800.00 +Virtualization: VT-x +L1d cache: 48K +L1i cache: 32K +L2 cache: 1280K +L3 cache: 49152K +NUMA node0 CPU(s): 0-31,64-95 +NUMA node1 CPU(s): 32-63,96-127 +``` diff --git a/source/tools/detect/mem/imc_latency/imc_latency.c b/source/tools/detect/mem/imc_latency/imc_latency.c new file mode 100644 index 0000000000000000000000000000000000000000..e6f9340c2bbf028deb1ab824a076a00fa55381c9 --- /dev/null +++ b/source/tools/detect/mem/imc_latency/imc_latency.c @@ -0,0 +1,906 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "imc_latency.h" + +// #define DEBUG + +const char* argp_program_version = "imc_latency 0.1"; +const char argp_program_doc[] = + "Detect the memory latency based on IMC PMU.\n" + "\n" + + "USAGE: imc_latency [--help] [-d DELAY] [-i ITERATION] [-f LOGFILE]\n" + "\n" + + "EXAMPLES:\n" + " imc_latency # run forever, display the memory latency.\n" + " imc_latency -f foo.log # log to foo.log.\n"; + +static const struct argp_option opts[] = { + {"delay", 'd', "DELAY", 0, "Sample peroid, default is 3 seconds"}, + {"iter", 'i', "ITERATION", 0, "Output times, default run forever"}, + {"logfile", 'f', "LOGFILE", 0, + "Logfile for result, default /var/log/sysak/imc_latency/imc_latency.log"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +struct Env { + uint32_t max_cpuid; + int32_t cpu_model; + int32_t cpu_family; + int32_t cpu_stepping; + bool vm; + int64_t nr_cpu; + int64_t nr_socket; + int64_t nr_core; + int64_t nr_channel; + int64_t* socket_ref_core; + int64_t nr_iter; + int64_t delay; +} env = {.vm = false, .nr_iter = INT64_MAX, .delay = DEFAUlT_PEROID}; + +record before, after; +time_t before_ts = 0, after_ts = 0; +imc_pmu* pmus = 0; +char log_dir[FILE_PATH_LEN] = "/var/log/sysak/imc_latency"; +char default_log_path[FILE_PATH_LEN] = + "/var/log/sysak/imc_latency/imc_latency.log"; +char* log_path = 0; +FILE* log_fp = 0; +bool exiting = false; + +static void sigint_handler(int signo) { exiting = 1; } + +/* if out of range or no number found return nonzero */ +static int parse_long(const char* str, long* retval) { + int err = 0; + char* endptr; + errno = 0; + long val = strtol(str, &endptr, 10); + + /* Check for various possible errors */ + if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN)) || + (errno != 0 && val == 0)) { + fprintf(stderr, "Failed parse val.\n"); + err = errno; + return err; + } + + if (endptr == str) return err = -1; + *retval = val; + return err; +} + +static error_t parse_arg(int key, char* arg, struct argp_state* state) { + int err = 0; + long val; + switch (key) { + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + case 'd': + err = parse_long(arg, &val); + if (err || val <= 0) { + fprintf(stderr, "Failed parse delay.\n"); + argp_usage(state); + } + + env.delay = val; + break; + case 'i': + err = parse_long(arg, &val); + if (err || val <= 0) { + fprintf(stderr, "Failed parse iteration-num.\n"); + argp_usage(state); + } + env.nr_iter = val; + env.nr_iter++; + break; + case 'f': + log_path = arg; + break; + case ARGP_KEY_ARG: + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +static int prepare_directory(char* path) { + int ret; + + ret = mkdir(path, 0777); + if (ret < 0 && errno != EEXIST) + return errno; + else + return 0; +} + +static FILE* open_logfile() { + FILE* f = 0; + if (!log_path) { + log_path = default_log_path; + } + + f = fopen(log_path, "w"); + + return f; +} + +int64_t read_sys_file(char* path, bool slient) { + int64_t val; + FILE* fp = fopen(path, "r"); + if (!fp) { + if (!slient) fprintf(stderr, "Failed open sys-file: %s\n", path); + return -1; + } + + fscanf(fp, "%ld\n", &val); +#ifdef DEBUG + fprintf(stderr, "read from=%s val=%ld\n", path, val); +#endif + if (fp) fclose(fp); + return val; +} + +static int write_reg(imc_event* ev, uint64_t val) { + int err = 0; + if (ev->fd >= 0) { + close(ev->fd); + ev->fd = -1; + } + + ev->attr.config = ev->fixed ? 0xff : val; + + if ((ev->fd = syscall(SYS_perf_event_open, &ev->attr, -1, ev->core_id, -1, + 0)) <= 0) { + fprintf(stderr, "Linux Perf: Error on programming PMU %d:%s\n", + ev->pmu_id, strerror(errno)); + fprintf(stderr, "config: 0x%llx config1: 0x%llx config2: 0x%llx\n", + ev->attr.config, ev->attr.config1, ev->attr.config2); + if (errno == EMFILE) fprintf(stderr, "%s", ULIMIT_RECOMMENDATION); + + return -1; + } + return err; +} + +static uint64_t read_reg(imc_event* ev) { + uint64_t result = 0; + if (ev->fd >= 0) { + int status = read(ev->fd, &result, sizeof(result)); + if (status != sizeof(result)) { + fprintf( + stderr, + "PCM Error: failed to read from Linux perf handle %d PMU %d\n", + ev->fd, ev->pmu_id); + } + } + return result; +} + +static bool is_cpu_online(int cpu_id) { + char path[BUF_SIZE]; + uint64_t val; + bool res = false; + + snprintf(path, BUF_SIZE, "/sys/devices/system/cpu/cpu%d/online", cpu_id); + + FILE* fp = fopen(path, "r"); + if (!fp) { + fprintf(stderr, "Failed open %s.\n", path); + goto cleanup; + } + + val = read_sys_file(path, true); + if (val == UINT64_MAX) { + goto cleanup; + } + + res = true; + +cleanup: + if (fp) fclose(fp); + return res; +} + +int64_t read_core_id(int cpu_id) { + char core_id_path[BUF_SIZE]; + int64_t val = -1; + + snprintf(core_id_path, BUF_SIZE, + "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu_id); + val = read_sys_file(core_id_path, true); + return val; +} + +int64_t read_physical_package_id(int cpu_id) { + char pkg_id_path[BUF_SIZE]; + + int64_t val = -1; + + snprintf(pkg_id_path, BUF_SIZE, + "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", + cpu_id); + val = read_sys_file(pkg_id_path, true); + + return val; +} + +static int get_topology(int id, struct topology_ent* ent) { + int err = 0; + ent->core_id = read_core_id(id); + ent->socket_id = read_physical_package_id(id); + if (ent->core_id == -1 || ent->socket_id == -1) { +#ifdef DEBUG + fprintf(stderr, "get coreid=%d socket_id=%d\n", ent->core_id, + ent->socket_id); +#endif + err = -1; + } + + return err; +} + +static int discovery_topology() { + int err = 0, i = 0; + struct topology_ent* topo = 0; + + env.nr_cpu = sysconf(_SC_NPROCESSORS_CONF); + + if (env.nr_cpu < 0) { + fprintf(stderr, "Failed get nr_cpu.\n"); + err = -1; + goto cleanup; + } + + topo = calloc(env.nr_cpu, sizeof(struct topology_ent)); + if (!topo) { + fprintf(stderr, "Faile calloc topology memory.\n"); + err = -1; + goto cleanup; + } + + int64_t max_skt_id = 0; + int64_t max_core_id = 0; + for (i = 0; i < env.nr_cpu; i++) { + err = get_topology(i, topo + i); + if (err) { + fprintf(stderr, "Failed get topology cpuid:%d\n", i); + goto cleanup; + } + + max_skt_id = + max_skt_id > topo[i].socket_id ? max_skt_id : topo[i].socket_id; + max_core_id = + max_core_id > topo[i].core_id ? max_core_id : topo[i].core_id; + } + + env.nr_socket = max_skt_id + 1; + env.nr_core = max_core_id + 1; + + env.socket_ref_core = calloc(env.nr_socket, sizeof(int64_t)); + if (!env.socket_ref_core) { + fprintf(stderr, "Failed calloc socket_ref_core. nr_socket=%d\n", + env.nr_socket); + err = -1; + goto cleanup; + } + + for (i = 0; i < env.nr_cpu; i++) { + if (!is_cpu_online(i)) continue; + env.socket_ref_core[topo[i].socket_id] = i; + } + +cleanup: + if (topo) free(topo); + topo = 0; + return err; +} + +static void cpuid_1(int leaf, CPUID_INFO* info) { + __asm__ __volatile__("cpuid" + : "=a"(info->reg.eax), "=b"(info->reg.ebx), + "=c"(info->reg.ecx), "=d"(info->reg.edx) + : "a"(leaf)); +} + +void cpuid_2(const unsigned leaf, const unsigned subleaf, CPUID_INFO* info) { + __asm__ __volatile__("cpuid" + : "=a"(info->reg.eax), "=b"(info->reg.ebx), + "=c"(info->reg.ecx), "=d"(info->reg.edx) + : "a"(leaf), "c"(subleaf)); +} + +static bool detect_model() { + char buffer[1024]; + union { + char cbuf[16]; + int ibuf[16 / sizeof(int)]; + } buf; + + CPUID_INFO cpuinfo; + + bzero(buffer, 1024); + bzero(buf.cbuf, 16); + cpuid_1(0, &cpuinfo); + + buf.ibuf[0] = cpuinfo.array[1]; + buf.ibuf[1] = cpuinfo.array[3]; + buf.ibuf[2] = cpuinfo.array[2]; + + if (strncmp(buf.cbuf, "GenuineIntel", 4 * 3) != 0) { + fprintf(stderr, "Not intel cpu.\n"); + return false; + } + + env.max_cpuid = cpuinfo.array[0]; + + cpuid_1(1, &cpuinfo); + env.cpu_family = (((cpuinfo.array[0]) >> 8) & 0xf) | + ((cpuinfo.array[0] & 0xf00000) >> 16); + env.cpu_model = (((cpuinfo.array[0]) & 0xf0) >> 4) | + ((cpuinfo.array[0] & 0xf0000) >> 12); + env.cpu_stepping = cpuinfo.array[0] & 0x0f; + + if (cpuinfo.reg.ecx & (1UL << 31UL)) { + env.vm = true; + fprintf(stderr, + "WARN: Detected a hypervisor/virtualization technology. Some " + "metrics might not be available due to configuration or " + "availability of virtual hardware features.\n"); + } + + if (env.cpu_family != 6) { + fprintf(stderr, "Unsupport CPU Family: %d\n", env.cpu_family); + return false; + } + + return true; +} + +bool is_model_support() { + switch (env.cpu_model) { + case NEHALEM: + env.cpu_model = NEHALEM_EP; + break; + case ATOM_2: + env.cpu_model = ATOM; + break; + case HASWELL_ULT: + case HASWELL_2: + env.cpu_model = HASWELL; + break; + case BROADWELL_XEON_E3: + env.cpu_model = BROADWELL; + break; + case ICX_D: + env.cpu_model = ICX; + break; + case CML_1: + env.cpu_model = CML; + break; + case ICL_1: + env.cpu_model = ICL; + break; + case TGL_1: + env.cpu_model = TGL; + break; + case ADL_1: + env.cpu_model = ADL; + break; + case RPL_1: + case RPL_2: + case RPL_3: + env.cpu_model = RPL; + break; + } + + return (env.cpu_model == ICX || env.cpu_model == SPR || + env.cpu_model == SKX); +} + +uint32_t* get_ddr_latency_metric_config() { + uint32_t* cfgs = 0; + cfgs = calloc(4, sizeof(uint32_t)); + if (!cfgs) { + fprintf(stderr, "Failed calloc cfgs memory.\n"); + return NULL; + } + + if (ICX == env.cpu_model || SPR == env.cpu_model) { + cfgs[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ occupancy pch 0 + cfgs[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ Insert.pch 0 + cfgs[2] = MC_CH_PCI_PMON_CTL_EVENT(0x82) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Occupancy pch 0 + cfgs[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM WPQ Insert.pch 0 + } else { + cfgs[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ occupancy + cfgs[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ Insert + cfgs[2] = MC_CH_PCI_PMON_CTL_EVENT(0x81) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Occupancy + cfgs[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Insert + } + + return cfgs; +} + +struct perf_event_attr init_perf_event_attr(bool group) { + struct perf_event_attr e; + bzero(&e, sizeof(struct perf_event_attr)); + e.type = -1; // must be set up later + e.size = sizeof(e); + e.config = -1; // must be set up later + e.read_format = group ? PERF_FORMAT_GROUP + : 0; /* PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_GROUP ; */ + return e; +} + +void init_imc_event(imc_event* event, int pmu_id, int core_id, bool fixed) { + struct perf_event_attr attr = init_perf_event_attr(false); + attr.type = pmu_id; + event->attr = attr; + event->fixed = fixed; + event->pmu_id = pmu_id; + event->core_id = core_id; + event->fd = -1; +} + +void init_imc_reggrp(imc_reg_group* grp, int socket_id, int pmu_id) { + int i = 0; +#ifdef DEBUG + + fprintf(stderr, "Init imc reg group: socketid=%d pmuid=%d\n", socket_id, + pmu_id); +#endif + init_imc_event(&grp->fixed_ev, pmu_id, env.socket_ref_core[socket_id], + true); + + for (i = 0; i < GENERAL_REG_NUM; i++) { + init_imc_event(&grp->general_ev[i], pmu_id, + env.socket_ref_core[socket_id], false); + } +} + +imc_pmu* init_imc_pmus(int64_t* pmu_ids, int64_t size) { + int skt_id = 0; + int pmu_id = 0; + + imc_pmu* pmus = calloc(env.nr_socket, sizeof(imc_pmu)); + + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + pmus[skt_id].reg_groups = calloc(size, sizeof(imc_reg_group)); + pmus[skt_id].socket_id = skt_id; + pmus[skt_id].nr_grp = size; + + for (pmu_id = 0; pmu_id < size; pmu_id++) { + init_imc_reggrp(&pmus[skt_id].reg_groups[pmu_id], skt_id, + pmu_ids[pmu_id]); + } + } + + return pmus; +} + +void program_imc(uint32_t* cfgs, imc_pmu* pmus) { + int skt_id = 0; + int pmu_id = 0; + int idx = 0; + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + imc_pmu* pmu = pmus + skt_id; + for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) { + imc_reg_group* grp = pmu->reg_groups + pmu_id; + /* enabel and reset fixed counter(DRAM clock) */ + write_reg(&grp->fixed_ev, MC_CH_PCI_PMON_FIXED_CTL_EN); + write_reg(&grp->fixed_ev, MC_CH_PCI_PMON_FIXED_CTL_EN + + MC_CH_PCI_PMON_FIXED_CTL_RST); + for (idx = 0; idx < GENERAL_REG_NUM; idx++) { + uint64_t event = cfgs[idx]; + if (SPR == env.cpu_model) { + write_reg(&grp->general_ev[idx], event); + } else { + write_reg(&grp->general_ev[idx], MC_CH_PCI_PMON_CTL_EN); + write_reg(&grp->general_ev[idx], + MC_CH_PCI_PMON_CTL_EN | event); + } + } + } + } +} + +socket_record* alloc_socket_record() { + int skt_id = 0; + socket_record* rec = calloc(env.nr_socket, sizeof(socket_record)); + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + rec[skt_id].channel_record_arr = + calloc(env.nr_channel, sizeof(channel_record)); + } + return rec; +} + +void free_socket_record(socket_record* rec) { + int skt_id = 0; + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + free(rec[skt_id].channel_record_arr); + } + free(rec); +} + +void init_data() { + before.socket_record_arr = alloc_socket_record(); + after.socket_record_arr = alloc_socket_record(); +} + +void free_data() { + free_socket_record(before.socket_record_arr); + free_socket_record(after.socket_record_arr); +} + +int64_t get_perf_pmuid(int num) { + int64_t id = -1; + char imc_path[BUF_SIZE]; + + if (num != -1) { + snprintf(imc_path, BUF_SIZE, + "/sys/bus/event_source/devices/uncore_imc_%d/type", num); + } else { + snprintf(imc_path, BUF_SIZE, + "/sys/bus/event_source/devices/uncore_imc/type"); + } + + id = read_sys_file(imc_path, true); + + return id; +} + +static int64_t* enumerate_imc_PMUs() { + int64_t* pmu_ids = 0; + int idx = 0, i = 0; + + pmu_ids = calloc(MAX_IMC_ID, sizeof(int64_t)); + + if (!pmu_ids) { + fprintf(stderr, "Failed calloc pmu ids memory.\n"); + return NULL; + } + + for (i = -1; i <= MAX_IMC_ID; ++i) { + int64_t pmu_id = get_perf_pmuid(i); + if (pmu_id != -1) pmu_ids[idx++] = pmu_id; + } + + env.nr_channel = idx; + +cleanup: + + if (env.nr_channel == 0 && pmu_ids) { + free(pmu_ids); + pmu_ids = 0; + } + + return pmu_ids; +} + +static int init_env() { + int err = 0; + int64_t* pmu_ids = 0; + uint32_t* cfgs = 0; + + // check model + if (!detect_model()) { + fprintf(stderr, "Failed detect model.\n"); + err = -1; + goto cleanup; + } + + if (!is_model_support()) { + fprintf(stderr, "Unsupport model.\n"); + err = -1; + goto cleanup; + } + + // get core/socket info + err = discovery_topology(); + if (err) { + fprintf(stderr, "Failed discovery topology.\n"); + err = -1; + goto cleanup; + } + + // get all imc-pmu id + pmu_ids = enumerate_imc_PMUs(); + if (!pmu_ids) { + fprintf(stderr, "Failed enumerate imc pmus.\n"); + err = -1; + goto cleanup; + } + + cfgs = get_ddr_latency_metric_config(); + if (!cfgs) { + fprintf(stderr, "Failed enumerate imc pmus.\n"); + err = -1; + goto cleanup; + } + + // init pmu + pmus = init_imc_pmus(pmu_ids, env.nr_channel); + + // write pmu register + program_imc(cfgs, pmus); + + // init data + init_data(); +#ifdef DEBUG + fprintf(stderr, "nr_socket=%d nr_core=%d nr_cpu=%d nr_channel=%d \n", + env.nr_socket, env.nr_core, env.nr_cpu, env.nr_channel); + int i = 0; + for (i = 0; i < env.nr_socket; i++) { + fprintf(stderr, "socket%d-ref cpu=%d\n", i, env.socket_ref_core[i]); + } +#endif + +cleanup: + + if (pmu_ids) { + free(pmu_ids); + pmu_ids = 0; + } + + if (cfgs) { + free(cfgs); + cfgs = 0; + } + + return err; +} + +void read_imc() { + int skt_id = 0, pmu_id = 0, counter_id = 0; + after_ts = time(0); + + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + imc_pmu* pmu = pmus + skt_id; + socket_record* socket_ev = &after.socket_record_arr[skt_id]; + for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) { + imc_reg_group* grp = pmu->reg_groups + pmu_id; + channel_record* channel_ev = + &after.socket_record_arr[skt_id].channel_record_arr[pmu_id]; + /* enabel and reset fixed counter(DRAM clock) */ + if (pmu_id == 0) { + socket_ev->dram_clock = read_reg(&grp->fixed_ev); + if (env.cpu_model == ICX || env.cpu_model == SNOWRIDGE) { + socket_ev->dram_clock = 2 * socket_ev->dram_clock; + } + } + + channel_ev->rpq_occ = read_reg(&grp->general_ev[RPQ_OCC]); + channel_ev->rpq_ins = read_reg(&grp->general_ev[RPQ_INS]); + channel_ev->wpq_occ = read_reg(&grp->general_ev[WPQ_OCC]); + channel_ev->wpq_ins = read_reg(&grp->general_ev[WPQ_INS]); + + socket_ev->rpq_occ += channel_ev->rpq_occ; + socket_ev->rpq_ins += channel_ev->rpq_ins; + socket_ev->wpq_occ += channel_ev->wpq_occ; + socket_ev->wpq_ins += channel_ev->wpq_ins; + } + } + + if (before_ts) { + double delta = after_ts - before_ts; + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + socket_record* before_socket_ev = &before.socket_record_arr[skt_id]; + socket_record* after_socket_ev = &after.socket_record_arr[skt_id]; + imc_pmu* pmu = pmus + skt_id; + double dram_speed = + (after_socket_ev->dram_clock - before_socket_ev->dram_clock) / + (delta * (double)1e9); + + for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) { + channel_record* before_channel_ev = + &before_socket_ev->channel_record_arr[pmu_id]; + channel_record* after_channel_ev = + &after_socket_ev->channel_record_arr[pmu_id]; + + if (after_channel_ev->rpq_ins - before_channel_ev->rpq_ins > + 0) { + after_channel_ev->read_latency = + (after_channel_ev->rpq_occ - + before_channel_ev->rpq_occ) / + (after_channel_ev->rpq_ins - + before_channel_ev->rpq_ins) / + dram_speed; + } + + if (after_channel_ev->wpq_ins - before_channel_ev->wpq_ins > + 0) { + after_channel_ev->write_latency = + (after_channel_ev->wpq_occ - + before_channel_ev->wpq_occ) / + (after_channel_ev->wpq_ins - + before_channel_ev->wpq_ins) / + dram_speed; + } + } + + if (after_socket_ev->rpq_ins - before_socket_ev->rpq_ins > 0) { + after_socket_ev->read_latency = + (after_socket_ev->rpq_occ - before_socket_ev->rpq_occ) / + (after_socket_ev->rpq_ins - before_socket_ev->rpq_ins) / + dram_speed; + } + + if (after_socket_ev->wpq_ins - before_socket_ev->wpq_ins > 0) { + after_socket_ev->write_latency = + (after_socket_ev->wpq_occ - before_socket_ev->wpq_occ) / + (after_socket_ev->wpq_ins - before_socket_ev->wpq_ins) / + dram_speed; + } + } + } +} + +static char* ts2str(time_t ts, char* buf, int size) { + struct tm* t = gmtime(&ts); + strftime(buf, size, "%Y-%m-%d %H:%M:%S", t); + return buf; +} + +static void output_ts(FILE* dest) { + char stime_str[BUF_SIZE] = {0}; + time_t now = time(0); + fprintf(dest, "[TIME-STAMP] %s\n", ts2str(now, stime_str, BUF_SIZE)); +} + +static void output_socket_lat(FILE* dest) { + int32_t socket_id = 0; + + fprintf(dest, "%s\n", "[SOCKET_LEVEL]"); + // fprintf(dest, "%8s%16s%16s\n", "socket", "rlat", "wlat"); + fprintf(dest, "%8s", ""); + + for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { + fprintf(dest, "%8d", socket_id); + } + fprintf(dest, "\n"); + + fprintf(dest, "%8s", "rlat"); + for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { + socket_record* srec = &after.socket_record_arr[socket_id]; + fprintf(dest, "%8.2lf", srec->read_latency); + } + fprintf(dest, "\n"); + + fprintf(dest, "%8s", "wlat"); + for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { + socket_record* srec = &after.socket_record_arr[socket_id]; + fprintf(dest, "%8.2lf", srec->write_latency); + } + fprintf(dest, "\n"); +} + +static void output_channel_lat(FILE* dest) { + int32_t socket_id = 0, channel_id = 0; + for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { + char socket_name[32]; + snprintf(socket_name, 32, "%d", socket_id); + + socket_record* srec = &after.socket_record_arr[socket_id]; + + fprintf(dest, "[CHANNEL_LEVEL]-[SOCKET-%d]\n", socket_id); + fprintf(dest, "%8s", ""); + for (channel_id = 0; channel_id < env.nr_channel; channel_id++) { + fprintf(dest, "%8d", channel_id); + } + fprintf(dest, "\n"); + + fprintf(dest, "%8s", "rlat"); + for (channel_id = 0; channel_id < env.nr_channel; channel_id++) { + channel_record* crec = &srec->channel_record_arr[channel_id]; + fprintf(dest, "%8.2lf", crec->read_latency); + } + fprintf(dest, "\n"); + + fprintf(dest, "%8s", "wlat"); + for (channel_id = 0; channel_id < env.nr_channel; channel_id++) { + channel_record* crec = &srec->channel_record_arr[channel_id]; + fprintf(dest, "%8.2lf", crec->write_latency); + } + fprintf(dest, "\n"); + } +} + +void swap_record() { + /* swap data */ + socket_record* tmp = before.socket_record_arr; + before.socket_record_arr = after.socket_record_arr; + after.socket_record_arr = tmp; + + /* clear after data */ + free_socket_record(after.socket_record_arr); + after.socket_record_arr = alloc_socket_record(); + + /* reset before timestamp */ + before_ts = after_ts; +} + +static void output_split(FILE* dest) { fprintf(dest, "\n"); } +static void collect_data() { + int32_t socket_id = 0, channel_id = 0, line_num = 0; + read_imc(); + + if (before_ts) { + output_ts(log_fp); + output_socket_lat(log_fp); + output_channel_lat(log_fp); + output_split(log_fp); + fflush(log_fp); + } + + swap_record(); +} + +static void clean_env(void) { free_data(); } + +int main(int argc, char** argv) { + int err; + /* parse args */ + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + + err = argp_parse(&argp, argc, argv, 0, 0, 0); + if (err) { + fprintf(stderr, "Failed parse args.\n"); + return -1; + } + + prepare_directory(log_dir); + log_fp = open_logfile(); + if (!log_fp) { + fprintf(stderr, "Failed open log file.\n"); + return -1; + } + + if (signal(SIGINT, sigint_handler) == SIG_ERR) { + fprintf(stderr, "Failed set signal handler.\n"); + return -errno; + } + + err = init_env(); + if (err) { + fprintf(stderr, "Init env error.\n"); + return -1; + } + + while (env.nr_iter-- && !exiting) { + collect_data(); + sleep(env.delay); + } + + clean_env(); +} diff --git a/source/tools/detect/mem/imc_latency/imc_latency.h b/source/tools/detect/mem/imc_latency/imc_latency.h new file mode 100644 index 0000000000000000000000000000000000000000..d2b6387ebaa8c50a07487878519b62d9856c6159 --- /dev/null +++ b/source/tools/detect/mem/imc_latency/imc_latency.h @@ -0,0 +1,156 @@ +#ifndef IMC_LATENCY_H +#define IMC_LATENCY_H + +#include +#include +#include +#include + +#define ULIMIT_RECOMMENDATION \ + ("try executing 'ulimit -n 1000000' to increase the limit on the number " \ + "of open files.\n") + +typedef union CPUID_INFO { + int array[4]; + struct { + unsigned int eax, ebx, ecx, edx; + } reg; +} CPUID_INFO; + +enum INTEL_CPU_MODEL { + NEHALEM_EP = 26, + NEHALEM = 30, + ATOM = 28, + ATOM_2 = 53, + CENTERTON = 54, + BAYTRAIL = 55, + AVOTON = 77, + CHERRYTRAIL = 76, + APOLLO_LAKE = 92, + GEMINI_LAKE = 122, + DENVERTON = 95, + SNOWRIDGE = 134, + CLARKDALE = 37, + WESTMERE_EP = 44, + NEHALEM_EX = 46, + WESTMERE_EX = 47, + SANDY_BRIDGE = 42, + JAKETOWN = 45, + IVY_BRIDGE = 58, + HASWELL = 60, + HASWELL_ULT = 69, + HASWELL_2 = 70, + IVYTOWN = 62, + HASWELLX = 63, + BROADWELL = 61, + BROADWELL_XEON_E3 = 71, + BDX_DE = 86, + SKL_UY = 78, + KBL = 158, + KBL_1 = 142, + CML = 166, + CML_1 = 165, + ICL = 126, + ICL_1 = 125, + RKL = 167, + TGL = 140, + TGL_1 = 141, + ADL = 151, + ADL_1 = 154, + RPL = 0xb7, + RPL_1 = 0xba, + RPL_2 = 0xbf, + RPL_3 = 0xbe, + BDX = 79, + KNL = 87, + SKL = 94, + SKX = 85, + ICX_D = 108, + ICX = 106, + SPR = 143, + END_OF_MODEL_LIST = 0x0ffff +}; + +#define MC_CH_PCI_PMON_CTL_EVENT(x) (x << 0) +#define MC_CH_PCI_PMON_CTL_UMASK(x) (x << 8) +#define MC_CH_PCI_PMON_CTL_RST (1 << 17) +#define MC_CH_PCI_PMON_CTL_EDGE_DET (1 << 18) +#define MC_CH_PCI_PMON_CTL_EN (1 << 22) +#define MC_CH_PCI_PMON_CTL_INVERT (1 << 23) +#define MC_CH_PCI_PMON_CTL_THRESH(x) (x << 24UL) +#define MC_CH_PCI_PMON_FIXED_CTL_RST (1 << 19) +#define MC_CH_PCI_PMON_FIXED_CTL_EN (1 << 22) +#define UNC_PMON_UNIT_CTL_FRZ_EN (1 << 16) +#define UNC_PMON_UNIT_CTL_RSV ((1 << 16) + (1 << 17)) + +#define RPQ_OCC 0 +#define RPQ_INS 1 +#define WPQ_OCC 2 +#define WPQ_INS 3 + +#define BUF_SIZE 1024 +#define MAX_IMC_ID 100 +#define GENERAL_REG_NUM 4 +#define FIXED_REG_NUM 1 +#define FILE_PATH_LEN 256 +#define DEFAUlT_PEROID 3 + +typedef struct imc_event_t { + struct perf_event_attr attr; + int fd; + int core_id; + int pmu_id; + bool fixed; +} imc_event; + +typedef struct imc_reg_group_t { + imc_event general_ev[GENERAL_REG_NUM]; + imc_event fixed_ev; + int pmu_id; +} imc_reg_group; + +typedef struct imc_pmu_t { + imc_reg_group* reg_groups; + int socket_id; + int nr_grp; +} imc_pmu; + +struct topology_ent { + int64_t cpu_id; + int64_t core_id; + int64_t socket_id; +}; + +typedef struct event { + uint64_t rpq_occ; + uint64_t rpq_ins; + uint64_t wpq_occ; + uint64_t wpq_ins; + uint64_t dram_speed; +} event; + +typedef struct channel_record { + uint64_t rpq_occ; + uint64_t rpq_ins; + uint64_t wpq_occ; + uint64_t wpq_ins; + double read_latency; + double write_latency; +} channel_record; + +typedef struct socket_record { + channel_record* channel_record_arr; + uint64_t rpq_occ; + uint64_t rpq_ins; + uint64_t wpq_occ; + uint64_t wpq_ins; + double read_latency; + double write_latency; + uint64_t dram_clock; +} socket_record; + +typedef struct record { + socket_record* socket_record_arr; +} record; + +#endif diff --git a/source/tools/detect/mem/memcgoffline/Makefile b/source/tools/detect/mem/memcgoffline/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..de3835b51a9f458b13b6ae9d280a8d44b474eb3f --- /dev/null +++ b/source/tools/detect/mem/memcgoffline/Makefile @@ -0,0 +1,7 @@ +target := memcgoffline +LIBS += -L ${OBJ_LIB_PATH}/lib -l:libcoolbpf.a -l:libsysak.a -lelf -lz +INCLUDES += -I$(SRC)/tools/detect/mem/memcgoffline/include +LDFLAGS += -Wall $(LIBS) +mods := memcg_iter.o memcgoffline.o + +include $(SRC)/mk/csrc.mk \ No newline at end of file diff --git a/source/tools/detect/mem/memcgoffline/include/btfparse.h b/source/tools/detect/mem/memcgoffline/include/btfparse.h new file mode 100644 index 0000000000000000000000000000000000000000..84204f540b000444299df0947cec670c0d896889 --- /dev/null +++ b/source/tools/detect/mem/memcgoffline/include/btfparse.h @@ -0,0 +1,39 @@ + + +#ifndef __BTF_PARSE_H +#define __BTF_PARSE_H + + + +/** + * btf_load: load btf from btf_custom_path + * + * @btf_custom_path: path of btf file + */ +struct btf *btf_load(char *btf_custom_path); +typedef unsigned int uint32_t; + +struct member_attribute +{ + uint32_t size; // size of structure's member + uint32_t real_size; // real_size mean + uint32_t offset; // offset of member in strucutre +}; + +/** + * btf_find_struct_member - find struct btfid by structure's name + * + * @btf: + * @struct_name: name of struct + * @member_name: name of structure's member + * @return: NULL mean error, get error number from errno. + * + * Note: Remember to free pointer of struct member_attribute + */ +struct member_attribute *btf_find_struct_member(struct btf *btf, char *struct_name, char *member_name); + +int btf_get_member_offset(struct btf *btf, char *name, char *member_name); +void btf__free(struct btf *btf); + +#endif + diff --git a/source/tools/detect/mem/memcgoffline/include/memcg_iter.h b/source/tools/detect/mem/memcgoffline/include/memcg_iter.h new file mode 100644 index 0000000000000000000000000000000000000000..300a82b05d1281d70f154e6f407fe2388253acd6 --- /dev/null +++ b/source/tools/detect/mem/memcgoffline/include/memcg_iter.h @@ -0,0 +1,36 @@ +#ifndef __MEMCG_ITER_H_ +#define __MEMCG_ITER_H_ + +#include "btfparse.h" + +#define PATH_MAX (2048) +#define LEN (255) +#define CSS_DYING (1 << 4) /* css is dying */ + +/* iterator function of "for_each_mem_cgroup" */ +unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev, + struct btf* handle); + +/* find out and set root_mem_cgroup from kallsyms*/ +int memcg_iter_init(); + +/* Iter all memory cgroups, must call after memcg_iter_init() */ +#define for_each_mem_cgroup(iter, start, btf) \ + for (iter = _mem_cgroup_iter(start, (unsigned long)NULL, btf); \ + iter != (unsigned long)NULL; \ + iter = _mem_cgroup_iter(start, iter, btf)) + +/* + * get member offset of certain struct, need to read from btf file, + * (don't call it in loop which may cause huge overhead) + */ +struct member_attribute *get_offset_no_cache(char *struct_name, + char *member_name, struct btf *handle); + +int get_member_offset(char *struct_name, char *member_name, + struct btf *handle); + +void memcg_get_name(unsigned long memcg, char *name, + int len, struct btf *btf_handle); + +#endif \ No newline at end of file diff --git a/source/tools/detect/mem/memcgoffline/memcg_iter.c b/source/tools/detect/mem/memcgoffline/memcg_iter.c new file mode 100644 index 0000000000000000000000000000000000000000..541d0367ffd805d37336b440cc367a4f1f57d582 --- /dev/null +++ b/source/tools/detect/mem/memcgoffline/memcg_iter.c @@ -0,0 +1,291 @@ +#include +#include +#include +#include +#include +#include + +#include "memcg_iter.h" +#include "kcore_utils.h" + +static unsigned long root_mem_cgroup; + +struct member_attribute *get_offset_no_cache(char *struct_name, + char *member_name, struct btf *handle) +{ + struct member_attribute *att; + + att = btf_find_struct_member(handle, struct_name, member_name); + if (!att) { + return NULL; + } + + att->offset = att->offset/8; + + return att; +} + +int get_member_offset(char *struct_name, char *member_name, struct btf *handle) +{ + char prefix[LEN] = "struct "; + + strcat(prefix, struct_name); + + return btf_get_member_offset(handle, prefix, member_name)/8; +} + +static unsigned long _css_next_child(unsigned long pos, unsigned long parent, + struct btf *btf_handle) +{ + struct member_attribute *att, *att2; + unsigned long next; + + att = get_offset_no_cache("cgroup_subsys_state", "sibling", btf_handle); + if (!att) + return 0; + + att2 = get_offset_no_cache("cgroup_subsys_state", "children", btf_handle); + if (!att2) + return 0; + + if(!pos) { + kcore_readmem(parent + att2->offset, &next, sizeof(next)); + next = next - att->offset; + } else { + kcore_readmem(pos + att->offset, &next, sizeof(next)); + next = next - att->offset; + } + + if(next + att->offset != parent + att2->offset) + return next; + + return 0; +} + +unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev, + struct btf *btf_handle) +{ + struct member_attribute *att, *att2; + unsigned long css, root_css; + unsigned long memcg = 0; + unsigned long pos = 0; + unsigned long next = 0; + unsigned long tmp1, tmp2; + + if(!root) + root = root_mem_cgroup; + if(!prev) + return root; + + //printf("root:%lx, prev:%lx\n", root, prev); + + att = get_offset_no_cache("mem_cgroup", "css", btf_handle); + if (!att) + return 0; + + att2 = get_offset_no_cache("cgroup_subsys_state", "parent", btf_handle); + if (!att2) + return 0; + + pos = prev; + //kcore_readmem(pos + att->offset, &css, sizeof(css)); + css = pos + att->offset; + + //kcore_readmem(root+att->offset, &root_css, sizeof(root_css)); + root_css = root + att->offset; + next = _css_next_child(0, css, btf_handle); + if(!next) + { + tmp1 = css; + while(tmp1 != root_css) + { + kcore_readmem(tmp1 + att2->offset, &tmp2, sizeof(tmp2)); + next = _css_next_child(tmp1, tmp2, btf_handle); + if(next) + break; + tmp1 = tmp2; + } + } + + if(!next) + return 0; + + memcg = next - att->offset; + + return memcg; +} + +int memcg_iter_init() +{ + unsigned long tmp; + size_t size; + + tmp = lookup_kernel_symbol("root_mem_cgroup"); + if (tmp == (unsigned long )-1) { + LOG_ERROR("unable to get root_mem_cgroup\n"); + return -1; + } else { + size = kcore_readmem(tmp, &root_mem_cgroup, 8); + if (size < 8) { + LOG_ERROR("get incorrect address where root_mem_cgroup point to\n"); + return -1; + } + } + + return 0; +} + +static int prepend(char **buffer, int *buflen, const char *str, + int namelen, int off) +{ + *buflen -= namelen + off; + if (*buflen < 0) + return -1; + *buffer -= namelen + off; + if (off) + **buffer = '/'; + memcpy(*buffer + off, str, namelen); + return 0; +} + +static int cgroup_path(unsigned long cgrp, char *buf, + int buflen, struct btf *btf_handle) +{ + int ret = -1; + char *start; + unsigned long cgp; + char tmpname[PATH_MAX]; + struct member_attribute *cg_pa_att, *cg_name_att; + struct member_attribute *cgn_name_attr; + + cg_pa_att = get_offset_no_cache("cgroup", "parent", btf_handle); + if (!cg_pa_att) + return -1; + + cg_name_att = get_offset_no_cache("cgroup", "name", btf_handle); + if (!cg_name_att) + return -1; + + cgn_name_attr = get_offset_no_cache("cgroup_name", "name", btf_handle); + if (!cgn_name_attr) + return -1; + + + kcore_readmem(cgrp + cg_pa_att->offset, &cgp, sizeof(cgp)); + if (!cgp) { + if (strncpy(buf, "/", buflen) == NULL) + return -1; + return 0; + } + + start = buf + buflen - 1; + *start = '\0'; + + do { + int len; + unsigned long name; + + kcore_readmem(cgrp + cg_name_att->offset, &name, sizeof(name)); + + name += cgn_name_attr->offset; + kcore_readmem(name, tmpname,sizeof(tmpname)); + + len = strlen(tmpname); + if ((start -= len) < buf) + goto out; + + memcpy(start, tmpname, len); + + if (--start < buf) + goto out; + + *start = '/'; + cgrp = cgp; + + kcore_readmem(cgp + cg_pa_att->offset, &cgp, sizeof(cgp)); + + } while (cgp); + + ret = 0; + memmove(buf, start, buf + buflen - start); +out: + return ret; +} + +void memcg_get_name(unsigned long memcg, char *name, + int len, struct btf *btf_handle) +{ + char *end; + int pos; + unsigned long cg, knname; + char subname[257]; + struct member_attribute *att; + + memset(subname, 0, sizeof(subname)); + att = get_offset_no_cache("mem_cgroup", "css", btf_handle); + if (!att) + return; + + cg = memcg + att->offset; + + att = get_offset_no_cache("cgroup_subsys_state", "cgroup", btf_handle); + if (!att) + return; + + kcore_readmem(cg + att->offset, &cg, sizeof(cg)); + +#ifdef LINUX_310 + if (!cg) + return; + cgroup_path(cg, name, PATH_MAX); + end = name+strlen("/sys/fs/cgroup/memory/"); + memmove(end, name, strlen(name)+1); + prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0); +#else + unsigned long kn; + unsigned long pkn; + int kn_name_offset, kn_pa_offset; + + att = get_offset_no_cache("cgroup", "kn", btf_handle); + if (!att) + return; + + kcore_readmem(cg + att->offset, &kn, sizeof(kn)); + + if (!cg || !kn) + return; + + end = name + len - 1; + prepend(&end, &len, "\0", 1, 0); + pkn = kn; + + kn_name_offset = get_member_offset("kernfs_node", "name", btf_handle); + if (kn_name_offset < 0) + return; + + kn_pa_offset = get_member_offset("kernfs_node", "parent", btf_handle); + if (kn_pa_offset < 0) + return; + + while (pkn) { + kcore_readmem(pkn + kn_name_offset, &knname, sizeof(knname)); + kcore_readmem(knname, subname, sizeof(subname)); + + pos = prepend(&end, &len, subname, strlen(subname), 0); + if (pos) + break; + + kcore_readmem(pkn + kn_pa_offset, &kn, sizeof(kn)); + if ((pkn == kn) || !kn) + break; + pos = prepend(&end, &len, "/", 1, 0); + if (pos) + break; + pkn = kn; + } + + prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0); + + memmove(name, end, strlen(end) + 1); +#endif +} \ No newline at end of file diff --git a/source/tools/detect/mem/memcgoffline/memcgoffline.c b/source/tools/detect/mem/memcgoffline/memcgoffline.c new file mode 100644 index 0000000000000000000000000000000000000000..16fe17db6a9a1a07ba9605ade1e676fabb2f28fd --- /dev/null +++ b/source/tools/detect/mem/memcgoffline/memcgoffline.c @@ -0,0 +1,187 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kcore_utils.h" +#include "memcg_iter.h" + +static struct btf *btf_handle = NULL; +int total_memcg_num = 0; + +struct environment { + int print_cg_num; /* unused */ +} env = { + .print_cg_num = 10000, +}; + +static int caculate_offline(unsigned long start_memcg) +{ + int offline_num = 0; + unsigned long css, css_flags, cnt, iter = 0; + long refcnt_value; + unsigned int flags_value; + char fileName[PATH_MAX]; + struct member_attribute *css_attr, *css_flag_attr, *refcnt_attr; + struct member_attribute *cnt_attr; + + css_attr = get_offset_no_cache("mem_cgroup", "css", btf_handle); + if (!css_attr) { + LOG_ERROR("get css offset of mem_cgroup failed!\n"); + return -1; + } + + css_flag_attr = get_offset_no_cache("cgroup_subsys_state", + "flags", btf_handle); + if (!css_flag_attr) { + LOG_ERROR("get flags offset of cgroup_subsys_state failed!\n"); + return -1; + } + + refcnt_attr = get_offset_no_cache("cgroup_subsys_state", + "refcnt", btf_handle); + if (!refcnt_attr) { + LOG_ERROR("get refcnt offset of cgroup_subsys_state failed!\n"); + return -1; + } + + cnt_attr = get_offset_no_cache("percpu_ref", "count", btf_handle); + if (!cnt_attr) { + LOG_ERROR("get cnt offset of percpu_ref failed!\n"); + return -1; + } + + for_each_mem_cgroup(iter, start_memcg, btf_handle) { + css = iter + css_attr->offset; + css_flags = css + css_flag_attr->offset; + + kcore_readmem(css_flags, &flags_value, sizeof(flags_value)); + + if (flags_value & CSS_DYING) { + cnt = css + refcnt_attr->offset + cnt_attr->offset; + + offline_num++; + kcore_readmem(cnt, &refcnt_value, sizeof(refcnt_value)); + + if (env.print_cg_num > 0) { + memcg_get_name(iter, fileName, PATH_MAX, btf_handle); + printf("cgroup path:%s\trefcount=%ld\n", fileName, refcnt_value); + env.print_cg_num--; + } + } + total_memcg_num++; + } + + return offline_num; +} + +static void show_usage(char *prog) +{ + const char *str = + " Usage: %s [OPTIONS]\n" + " Options:\n" + " -n PRINT_MAX_CG_NUM Max offline memcg paths to printf(default 10000)\n" + " -h HELP help\n" + " \n" + + " EXAMPLE:\n " + " memcgoffline # display number of offline memcg and all their paths.\n" + " memcgoffline -n 10 # display number of offline memcg and " + "10 of offline memcg paths.\n" + ; + + fprintf(stderr, str, prog); + exit(EXIT_FAILURE); +} + +static int parse_args(int argc, char **argv, struct environment *env) +{ + int c, option_index; + char *prog_name = "memcgoffline"; + + for (;;) { + c = getopt_long(argc, argv, "n:h", NULL, &option_index); + if (c == -1) + break; + + switch (c) { + case 'n': + env->print_cg_num = (int)strtol(optarg, NULL, 10); + if (!errno) + return -errno; + break; + case 'h': + show_usage(prog_name); /* would exit */ + break; + default: + show_usage(prog_name); + } + } + + return 0; +} + +struct btf *btf_init() +{ + char *btf_path; + + btf_path = prepare_btf_file(); + if (!btf_path) + return NULL; + + return btf_load(btf_path); +} + +void btf_uninit(struct btf *btf) +{ + return btf__free(btf); +} + +int main(int argc, char *argp[]) +{ + int offline_memcg = 0, ret = 0; + + ret = parse_args(argc, argp, &env); + if (ret) { + LOG_ERROR("parse arg error!\n"); + return -1; + } + + btf_handle = btf_init(); + if (!btf_handle) { + LOG_ERROR("btf init failed!\n"); + return -1; + } + + ret = kcore_init(); + if (ret) { + LOG_ERROR("kcore init failed!\n"); + goto uninit_btf; + } + + ret = memcg_iter_init(); + if (ret) { + LOG_ERROR("memcg_iter_init failed!\n"); + goto uninit_kcore; + } + + offline_memcg = caculate_offline((unsigned long)NULL); + if (offline_memcg < 0) { + LOG_ERROR("caculate offline memcg failed!\n"); + ret = offline_memcg; + goto uninit_kcore; + } + printf("Offline memory cgroup num: %d\n", offline_memcg); + printf("Total memory cgroup num: %d\n", total_memcg_num); + +uninit_kcore: + kcore_uninit(); +uninit_btf: + btf_uninit(btf_handle); + + return ret; +} diff --git a/source/tools/detect/mem/podmem/entry/pagealloc.py b/source/tools/detect/mem/podmem/entry/pagealloc.py index 4b6066b2860027a13f5fde1af09c99194b316afa..5e0ef2b16a4258fc80f5f245a82e28aadd907742 100644 --- a/source/tools/detect/mem/podmem/entry/pagealloc.py +++ b/source/tools/detect/mem/podmem/entry/pagealloc.py @@ -197,7 +197,8 @@ def get_info(meminfo, result,cid): meminfo["podinfo"][podname] = {} meminfo["podinfo"][podname]["podname"] = podname meminfo["podinfo"][podname]["podns"] = podns - meminfo["podinfo"][podname]["mem"] = 0 + meminfo["podinfo"][podname]["rxmem"] = 0 + meminfo["podinfo"][podname]["txmem"] = 0 return podname def pagemem_scan(meminfo, ns): @@ -241,7 +242,10 @@ def pagemem_check(meminfo,ns): pid = info[1] task_pid = task+"-"+pid rx = int(line_list[2]) - tx = int(line_list[3]) + if line.find("LISTEN") >= 0: + tx = 0 + else: + tx = int(line_list[3]) rx_mem += rx tx_mem += tx if task_pid not in memTask.keys(): diff --git a/source/tools/detect/mem/podmem/entry/podmem.py b/source/tools/detect/mem/podmem/entry/podmem.py index 1ce46eae7e14a81ae82a4a921220b4e3d6777b8a..8b875a2053f59fa97192c8a33d951d7124072b6f 100644 --- a/source/tools/detect/mem/podmem/entry/podmem.py +++ b/source/tools/detect/mem/podmem/entry/podmem.py @@ -412,7 +412,7 @@ def handle_args(podinfo, argv): print("-j: dump result to json file (sysak podmem -s -j ./test.json)") print("-r: set sample rate ,default set to 1 (sysak podmem -s -r 2)") print("-t: output filecache top ,default for top 10 (sysak podmem -s -t 20)") - print("-m: analysis pod recv-Q memory") + print("-m: analysis pod recv-Q and send-Q memory") sys.exit(2) elif opt == '-r': cmdline['rate'] = int(arg) diff --git a/source/tools/monitor/oomkill/kill.c b/source/tools/monitor/oomkill/kill.c index 7000dc48b155a20972d54314a86b4745047d6e53..11e88db622c6ff94db08220c504493ffbe618b44 100644 --- a/source/tools/monitor/oomkill/kill.c +++ b/source/tools/monitor/oomkill/kill.c @@ -427,6 +427,8 @@ procinfo_t find_largest_process(const poll_loop_args_t* args) */ void kill_process(const poll_loop_args_t* args, int sig, const procinfo_t* victim) { + char cmdline[512]; + if (victim->pid <= 0) { warn("Could not find a process to kill. Sleeping 1 second.\n"); if (args->notify) { @@ -444,10 +446,12 @@ void kill_process(const poll_loop_args_t* args, int sig, const procinfo_t* victi } else if (sig == 0) { sig_name = "0 (no-op signal)"; } + get_cmdline(victim->pid, cmdline, sizeof(cmdline)); // sig == 0 is used as a self-test during startup. Don't notify the user. if (sig != 0 || enable_debug) { warn("sending %s to process %d uid %d \"%s\": badness %ld, VmRSS %lld MiB adj:%d\n", sig_name, victim->pid, victim->uid, victim->name, victim->badness, victim->VmRSSkiB / 1024, victim->oom_score_adj); + warn("process cmdline:%s\n", cmdline); } int res = kill_wait(args, victim->pid, sig); diff --git a/source/tools/monitor/oomkill/meminfo.c b/source/tools/monitor/oomkill/meminfo.c index 4f02e886208b9cc47a9d896b708b578d86fcabf6..18bab6b1e072aa2e71431ba7ef42c673164fb67b 100644 --- a/source/tools/monitor/oomkill/meminfo.c +++ b/source/tools/monitor/oomkill/meminfo.c @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include #include "globals.h" #include "meminfo.h" @@ -253,6 +256,44 @@ int get_comm(int pid, char* out, size_t outlen) return 0; } +int get_cmdline(int pid, char *out, int out_len) { + char cmdline_file[128] = {0}; + int fd; + int i; + ssize_t nread, total = 0; + + sprintf(cmdline_file, "%s/%d/cmdline", procdir_path, pid); + + fd = open(cmdline_file, O_RDONLY); + if (fd == -1) { + printf("Failed to open %s\n", cmdline_file); + return -1; + } + + while ((nread = read(fd, out + total, out_len - total - 1)) > 0) { + total += nread; + } + + close(fd); + + if (total == -1) { + printf("Failed to read %s\n", cmdline_file); + return -1; + } + + // Replace '\0' with spaces when arguments are not separated by '\0' + for (i = 0; i < total; i++) { + if (out[i] == '\0') { + out[i] = ' '; + } + } + + out[total] = '\0'; + + return 0; +} + + // Get the effective uid (EUID) of `pid`. // Returns the uid (>= 0) or -errno on error. int get_uid(int pid) diff --git a/source/tools/monitor/oomkill/meminfo.h b/source/tools/monitor/oomkill/meminfo.h index 22df1ef5af066bc7063138c140fe49ceabe3a732..4572b402065d40e874c9f478a0a9632c947cca6f 100644 --- a/source/tools/monitor/oomkill/meminfo.h +++ b/source/tools/monitor/oomkill/meminfo.h @@ -61,6 +61,7 @@ int get_oom_score(int pid); int get_oom_score_adj(const int pid, int* out); long long get_vm_rss_kib(int pid); int get_comm(int pid, char* out, size_t outlen); +int get_cmdline(int pid, char *out, int out_len); int get_uid(int pid); #endif diff --git a/source/tools/monitor/unity/collector/loop.lua b/source/tools/monitor/unity/collector/loop.lua index 47e793ded8ae19fe1eb7a5740ca2b9d2206d828f..68b19a557bd4eedd50ed18326bd38de9c4e89ba6 100644 --- a/source/tools/monitor/unity/collector/loop.lua +++ b/source/tools/monitor/unity/collector/loop.lua @@ -16,7 +16,7 @@ local CguardDaemon = require("collector.guard.guardDaemon") local CguardSelfStat = require("collector.guard.guardSelfStat") local CpostPlugin = require("collector.postPlugin.postPlugin") local CforkRun = require("collector.execEngine.forkRun") ----local CpodFilter = require("collector.podMan.podFilter") +local CpodFilter = require("collector.podMan.podFilter") local CpodsAll = require("collector.podMan.podsAll") local Cloop = class("loop") @@ -52,11 +52,15 @@ function Cloop:loadLuaPlugin(res, proc_path, procffi) end end if res.container then - ---self._procs[c] = CpodFilter.new(res, self._proto, procffi, proc_path) - ---self._names[c] = "podFilter" - self._procs[c] = CpodsAll.new(res, self._proto, procffi, proc_path) - self._names[c] = "podMon" - + if res.container.mode == "cgroup" then + --print("mods1="..res.container.mode) + self._procs[c] = CpodFilter.new(res, self._proto, procffi, proc_path) + self._names[c] = "podFilter" + else + --print("mods2="..res.container.mode) + self._procs[c] = CpodsAll.new(res, self._proto, procffi, proc_path) + self._names[c] = "podMon" + end end print("add " .. system:keyCount(self._procs) .. " lua plugin.") end diff --git a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h b/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h index f42a07526cf9de13fde141c620607b025c5c5a42..e3cc818b1fcbfeaa908ec4549a5d1d213a731176 100644 --- a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h +++ b/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h @@ -1,5 +1,5 @@ -#ifndef UNITY_SAMPLE_H -#define UNITY_SAMPLE_H +#ifndef UNITY_IMC_LATENCY_H +#define UNITY_IMC_LATENCY_H #include #include @@ -123,4 +123,4 @@ struct topology_ent { int64_t socket_id; }; -#endif // UNITY_SAMPLE_H +#endif diff --git a/source/tools/monitor/unity/collector/proc_stat.lua b/source/tools/monitor/unity/collector/proc_stat.lua index e5834d75d8f3ad3d49670a55bb71205005fa3b78..dbdec60beadc78c5404c8dedf41e94cc57e299ef 100644 --- a/source/tools/monitor/unity/collector/proc_stat.lua +++ b/source/tools/monitor/unity/collector/proc_stat.lua @@ -14,6 +14,9 @@ function CprocStat:_init_(proto, pffi, mnt, pFile) CvProc._init_(self, proto, pffi, mnt,pFile or "proc/stat") self._funs = self:setupTable() self._cpuArr = {} + self._total_warn = 0 + self._sys_warn = 0 + self._user_warn = 0 end function CprocStat:_cpuHead() @@ -23,6 +26,12 @@ end function CprocStat:_procCpu(now, last) if last then + local user_thresh = 40 + local sys_thresh = 25 + local total_thresh = 55 + local user_util = 0 + local sys_util = 0 + local warn = 0 local vs = {} local sum = 0 local index = self:_cpuHead() @@ -37,10 +46,39 @@ function CprocStat:_procCpu(now, last) local total = tonumber(sum) for i = 1, #vs do local v = tonumber(vs[i]) + + --for warn events + if index[i] == "user" or index[i] == "nice" then + user_util = user_util + v*100.0/total + end + if index[i] == "sys" or index[i] == "softirq" then + sys_util = sys_util + v*100.0/total + end + if index[i] == "idle" then + total_util = 100 - (v*100.0/total) + end + local cell = {name=index[i], value=tonumber(v * 100.0 / total)} table.insert(res, cell) end table.insert(res, {name="total", value=total}) + --warn events + if user_util > user_thresh then + self._user_warn = self._user_warn + 1 + end + local cell0 = {name="usr_warn", value=self._user_warn} + table.insert(res, cell0) + if sys_util > sys_thresh then + self._sys_warn = self._sys_warn + 1 + end + local cell1 = {name="sys_warn", value=self._sys_warn} + table.insert(res, cell1) + if total_util > user_thresh then + self._total_warn = self._total_warn + 1 + end + local cell2 = {name="total_warn", value=self._total_warn} + table.insert(res, cell2) + return res end end diff --git a/source/tools/monitor/unity/etc/base.yaml b/source/tools/monitor/unity/etc/base.yaml index c784e6423989083471f048adcbc76669d7e6cd32..9350c344463efd2157cbe1163a849c6e24a1c578 100644 --- a/source/tools/monitor/unity/etc/base.yaml +++ b/source/tools/monitor/unity/etc/base.yaml @@ -49,92 +49,92 @@ plugins: metrics: - - title: sysak_proc_cpu_total + title: sysom_proc_cpu_total from: cpu_total head: mode help: "cpu usage info for total." type: "gauge" - - title: sysak_proc_cpus + - title: sysom_proc_cpus from: cpus head: mode help: "cpu usage info for per-cpu." type: "gauge" - - title: sysak_proc_sirq + - title: sysom_proc_sirq from: sirq head: type help: "system soft irq times." type: "gauge" - - title: sysak_proc_stat_counters + - title: sysom_proc_stat_counters from: stat_counters head: counter help: "system state counter." type: "gauge" - - title: sysak_proc_meminfo + - title: sysom_proc_meminfo from: meminfo head: value help: "meminfo from /proc/meminfo." type: "gauge" - - title: sysak_proc_vmstat + - title: sysom_proc_vmstat from: vmstat head: value help: "vmstat info from /proc/vmstat." type: "gauge" - - title: sysak_proc_self_statm + - title: sysom_proc_self_statm from: self_statm head: value help: "statm info from /proc/self/statm." type: "gauge" - - title: sysak_proc_networks + - title: sysom_proc_networks from: networks head: counter help: "networks info from /proc/net/dev." type: "gauge" - - title: sysak_proc_disks + - title: sysom_proc_disks from: disks head: counter help: "disk info from /proc/diskstats." type: "gauge" - - title: sysak_proc_pkt_status + - title: sysom_proc_pkt_status from: pkt_status head: counter help: "net status info from /proc/net/snmp and /proc/net/status." type: "gauge" - - title: sysak_fs_stat + - title: sysom_fs_stat from: fs_stat head: counter help: "file system information." type: "gauge" - - title: sysak_sock_stat + - title: sysom_sock_stat from: sock_stat head: value help: "sock stat counters from /proc/net/sockstat" type: "gauge" - - title: sysak_proc_schedstat + - title: sysom_proc_schedstat from: proc_schedstat head: value help: "schedule state of percpu." type: "gauge" - - title: sysak_proc_loadavg + - title: sysom_proc_loadavg from: proc_loadavg head: value help: "loadavg of system from /proc/loadavg" type: "gauge" - - title: sysak_proc_buddyinfo + - title: sysom_proc_buddyinfo from: buddyinfo head: value help: "buddyinfo of system from /proc/buddyinfo" type: "gauge" - - title: sysak_IOMonIndForDisksIO + - title: sysom_IOMonIndForDisksIO from: IOMonIndForDisksIO head: value help: "Disk IO indicators and abnormal events" type: "gauge" - - title: sysak_IOMonIndForSystemIO + - title: sysom_IOMonIndForSystemIO from: IOMonIndForSystemIO head: value help: "System indicators and abnormal events about IO" type: "gauge" - - title: sysak_IOMonDiagLog + - title: sysom_IOMonDiagLog from: IOMonDiagLog head: value help: "Diagnose log for IO exception" @@ -144,87 +144,87 @@ metrics: head: value help: "nosched/irqoff:sys and irqoff hold cpu and didn't scheduling" type: "gauge" - - title: sysak_cpu_dist + - title: sysom_cpu_dist from: cpu_dist head: value help: "task cpu sched dist." type: "gauge" - - title: sysak_net_health_hist + - title: sysom_net_health_hist from: net_health_hist head: value help: "net_health_hist" type: "gauge" - - title: sysak_net_health_count + - title: sysom_net_health_count from: net_health_count head: value help: "net_health_count" type: "gauge" - - title: sysak_net_retrans_count + - title: sysom_net_retrans_count from: net_retrans_count head: value help: "net_retrans_count" type: "gauge" - - title: sysak_gpuinfo + - title: sysom_gpuinfo from: gpuinfo head: value help: "gpuinfo of system from nvidia-smi" type: "gauge" - - title: sysak_uname + - title: sysom_uname from: uname head: value help: "uname info" type: "gauge" - - title: sysak_uptime + - title: sysom_uptime from: uptime head: value help: "uptime from /proc/uptime" type: "gauge" - - title: sysak_system_release + - title: sysom_system_release from: system_release head: value help: "system_release from /etc/os-release" type: "gauge" - - title: sysak_cgroups + - title: sysom_cgroups from: cgroups head: value help: "cgroup number." type: "gauge" - - title: sysak_per_sirqs + - title: sysom_per_sirqs from: per_sirqs head: value help: "per_sirqs." type: "gauge" - - title: sysak_softnets + - title: sysom_softnets from: softnets head: value help: "cgroup number." type: "gauge" - - title: sysak_interrupts + - title: sysom_interrupts from: interrupts head: value help: "interrupts." type: "gauge" - - title: sysak_net_ip_count + - title: sysom_net_ip_count from: net_ip_count head: value help: "net snmp net_ip_count" type: "gauge" - - title: sysak_net_icmp_count + - title: sysom_net_icmp_count from: net_icmp_count head: value help: "net snmp net_icmp_count" type: "gauge" - - title: sysak_net_udp_count + - title: sysom_net_udp_count from: net_udp_count head: value help: "net snmp net_udp_count" type: "gauge" - - title: sysak_net_tcp_count + - title: sysom_net_tcp_count from: net_tcp_count head: value help: "net snmp net_tcp_count" type: "gauge" - - title: sysak_net_tcp_ext_count + - title: sysom_net_tcp_ext_count from: net_tcp_ext_count head: value help: "net stat net_tcp_ext_count" diff --git a/source/tools/monitor/unity/etc/group.yaml b/source/tools/monitor/unity/etc/group.yaml index 3208ecc2089cf7b950584ddd1448c0af01955678..6e3d830285bb20d511982ee766d76a5f1ec6cf5c 100644 --- a/source/tools/monitor/unity/etc/group.yaml +++ b/source/tools/monitor/unity/etc/group.yaml @@ -62,33 +62,33 @@ plugins: description: "summary retrans out put." metrics: - - title: sysak_proc_pkt_status + - title: sysom_proc_pkt_status from: pkt_status head: counter help: "net status info from /proc/net/snmp and /proc/net/status." type: "gauge" - - title: sysak_net_health_hist + - title: sysom_net_health_hist from: net_health_hist head: value help: "net_health_hist" type: "gauge" - - title: sysak_net_health_count + - title: sysom_net_health_count from: net_health_count head: value help: "net_health_count" type: "gauge" - - title: sysak_net_retrans_count + - title: sysom_net_retrans_count from: net_retrans_count head: value help: "net_retrans_count" type: "gauge" - - title: sysak_virtout_dist + - title: sysom_virtout_dist from: virtout_dist head: value - help: "sysak_virtout_dist" + help: "sysom_virtout_dist" type: "gauge" - - title: sysak_retrans + - title: sysom_retrans from: retrans head: value - help: "sysak_retrans" + help: "sysom_retrans" type: "gauge" \ No newline at end of file diff --git a/source/tools/monitor/unity/etc/k8s.yaml b/source/tools/monitor/unity/etc/k8s.yaml index f73d003b926edac23391bc3505c4930c01c71ddc..bddd986428f66b37318da80e98e8c08c43f5dc90 100644 --- a/source/tools/monitor/unity/etc/k8s.yaml +++ b/source/tools/monitor/unity/etc/k8s.yaml @@ -25,7 +25,11 @@ outline: container: mode: "pods" - luaPlugin: ["cg_cpu_cfs_quota","cg_mem_drcm_glob_latency","cg_memory_util","cg_cpu_stat_sample", "cg_cpuacct_stat","cg_memory_drcm_latency", "cg_memory_fail_cnt","cg_memory_dcmp_latency"] + #mode:"cgroup" + #"cg_cpuacct_stat" is a substitute of cg_cpuacct_proc_stat + luaPlugin: ["cg_memory_fail_cnt", "cg_memory_util", "cg_memory_dcmp_latency", + "cg_memory_drcm_latency", "cg_cpuacct_wait_latency", "cg_cpuacct_proc_stat", + "cg_cpu_stat", "cg_pmu_events", "cg_cpu_cfs_quota", "cg_mem_drcm_glob_latency"] directCgPath: - "/" - "/kubepods.slice" @@ -277,13 +281,12 @@ metrics: head: value help: "sysom_cg_memory_util" type: "gauge" - - title: sysom_cg_memgdrcm_latency + - title: sysom_cg_mem_glob_drcm_latency from: cgGlbDrcmLatency head: value help: "sysom global memory latency" type: "gauge" - - - title: sysom_cg_memdrcm_latency + - title: sysom_cg_mem_drcm_latency from: cg_memdrcm_latency head: value help: "sysom_cg_memdrcm_latency" @@ -293,6 +296,26 @@ metrics: head: value help: "sysom_cg_memmcmp_latency" type: "gauge" + - title: sysom_cg_mem_dcmp_latency + from: cg_memdcmp_latency + head: value + help: "sysom_cg_mem_dcmp_latency" + type: "gauge" + - title: sysom_cg_cpuacct_wait_latency + from: cg_wait_latency + head: value + help: "sysom_cg_cpuacct_wait_latency" + type: "gauge" + - title: sysom_cg_cpuacct_proc_stat + from: cg_cpuacct_proc_stat + head: value + help: "sysom_cg_cpuacct_proc_stat" + type: "gauge" + - title: sysom_cg_cpu_quota + from: cgCpuQuota + head: value + help: "quota_us,peroid_us and quota/period" + type: "gauge" - title: sysom_cg_cpu_stat from: cg_cpu_stat head: value @@ -303,8 +326,8 @@ metrics: head: value help: "cpuacct/cpuacct.stat" type: "gauge" - - title: sysom_cg_cfs_quota - from: cgCpuQuota + - title: sysom_cg_pmu_events + from: pmu_cg_events head: value - help: "cfs quota" + help: "pmu events of cgroups" type: "gauge"