diff --git a/rpm/sysak.service b/rpm/sysak.service
index 4379ea4c05488085a735688e32f25a5e31a28bf3..447ecd207aabe57ae2ee30ebfcb09992de148048 100644
--- a/rpm/sysak.service
+++ b/rpm/sysak.service
@@ -7,7 +7,7 @@
   Restart=always
   RestartSec=10
   CPUQuota=30%
-  MemoryLimit=60M
+  MemoryLimit=300M
   ExecStart=/usr/local/sysak/.sysak_components/tools/dist/app/beeQ/run.sh
   ExecStop=kill -9 $(pidof unity-mon)
   ExecReload=kill -1 $(pidof unity-mon)
diff --git a/source/lib/uapi/Makefile b/source/lib/uapi/Makefile
index b92c5bdc3c5dd5cc0577a892fc990fdfd2f4a455..d267d7a7b02dce65cdaa523a78b12f6af03124d3 100644
--- a/source/lib/uapi/Makefile
+++ b/source/lib/uapi/Makefile
@@ -1,6 +1,7 @@
 SOURCE := $(shell find . -name "*.c")
 OBJS :=$(patsubst %.c,%.o,$(SOURCE))
 STATIC_OBJS := $(addprefix $(OBJPATH)/,$(OBJS))
+LIBS += -L /usr/lib64 -l:libelf.a
 
 libsysak: $(OBJ_LIB_PATH)/libsysak.a
 
@@ -10,6 +11,6 @@ $(OBJ_LIB_PATH)/libsysak.a: $(STATIC_OBJS)
 $(STATIC_OBJS): $(OBJS)
 
 $(OBJS): %.o : %.c
-	gcc -c -o $(OBJPATH)/$@ $< -I$(SRC)/lib/uapi/include
+	gcc -c -o $(OBJPATH)/$@ $< -I$(SRC)/lib/uapi/include $(LIBS)
 
 
diff --git a/source/lib/uapi/include/kcore_utils.h b/source/lib/uapi/include/kcore_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..a877ed9c9e7d3b5f347775c560ac53f68475cb2a
--- /dev/null
+++ b/source/lib/uapi/include/kcore_utils.h
@@ -0,0 +1,70 @@
+#ifndef __KCORE_UTILS_H
+#define __KCORE_UTLIS_H 
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <elf.h>
+
+#define BUFF_MAX		4096
+#define MAX_KCORE_ELF_HEADER_SIZE   32768
+
+#ifdef DEBUG
+#define LOG_DEBUG(...)	fprintf(stderr, __VA_ARGS__)
+#else
+#define LOG_DEBUG(...)	do { } while (0)
+#endif /* DEBUG */
+
+#define LOG_INFO(...)	fprintf(stdout, __VA_ARGS__)
+#define LOG_WARN(...)	fprintf(stderr, __VA_ARGS__)
+#define LOG_ERROR(...)	fprintf(stderr, __VA_ARGS__)
+
+#define MIN(a,b)       (((a)<(b))?(a):(b))
+#define MAX(a,b)       (((a)>(b))?(a):(b))
+
+/* struct to record the kcore elf file data*/
+struct proc_kcore_data {
+	unsigned int flags;
+	unsigned int segments;
+	char *elf_header;
+	size_t header_size;
+	Elf64_Phdr *load64;
+	Elf64_Phdr *notes64;
+	Elf32_Phdr *load32;
+	Elf32_Phdr *notes32;
+	void *vmcoreinfo;
+	unsigned int size_vmcoreinfo;
+};
+
+
+/**
+ * lookup_kernel_symbol - look up kernel symbol address from /proc/kallsyms
+ * 
+ * @symbol_name: kernel symbol name to look up.
+ * @return: the address of the kernel symbol. 
+ * 
+ */
+uintptr_t lookup_kernel_symbol(const char *symbol_name);
+
+/* prepare_btf_file - check exist btf file, if not exist, download it */
+char *prepare_btf_file();
+
+/* open /proc/kcore and read necessary data to interpret kcore */
+int kcore_init();
+
+/* close /proc/kcore and do some cleanup */
+void kcore_uninit();
+
+/**
+ * kcore_readmem - read data of certain kernel address from kcore
+ * 
+ * @kvaddr: kernel address to read.
+ * @buf: buf for readed data.
+ * @size: size of the data to read. 
+ * @return: size of the data beeing read if success.
+ * 
+ * Note: must call after kcore_init()
+ */
+ssize_t kcore_readmem(unsigned long kvaddr, void *buf, ssize_t size);
+
+
+#endif
\ No newline at end of file
diff --git a/source/lib/uapi/kcore_utils.c b/source/lib/uapi/kcore_utils.c
new file mode 100644
index 0000000000000000000000000000000000000000..e58b45c21151eae4508d706367b3036a5b02ed71
--- /dev/null
+++ b/source/lib/uapi/kcore_utils.c
@@ -0,0 +1,295 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <fcntl.h>
+
+#include "kcore_utils.h"
+
+#define LEN             (128)
+
+static struct proc_kcore_data proc_kcore_data = { 0 };
+static struct proc_kcore_data *pkd = &proc_kcore_data;
+
+static int kcore_fd = 0;
+
+/*
+ * Routines of kcore, i.e., /proc/kcore
+ */
+uintptr_t lookup_kernel_symbol(const char *symbol_name)
+{
+	const char *kallsyms_file = "/proc/kallsyms";
+	FILE *fp;
+	char line[BUFF_MAX];
+	char *pos;
+	uintptr_t addr = -1UL;
+
+	fp = fopen(kallsyms_file, "r");
+	if (fp == NULL) {
+		perror("fopen: /proc/kallsyms");
+		return -1;
+	}
+
+	while (fgets(line, BUFF_MAX, fp)) {
+		if ((pos = strstr(line, symbol_name)) == NULL)
+			continue;
+
+		/* Remove trailing newline */
+		line[strcspn(line, "\n")] = '\0';
+
+		/* Exact match */
+		if (pos == line || !isspace(*(pos - 1)))
+			continue;
+		if (!strcmp(pos, symbol_name)) {
+			addr = strtoul(line, NULL, 16);
+			break;
+		}
+	}
+
+	if (addr == -1UL)
+		LOG_ERROR("failed to lookup symbol: %s\n", symbol_name);
+
+	fclose(fp);
+	return addr;
+}
+
+static int kcore_elf_init()
+{
+	Elf64_Ehdr *elf64;
+	Elf64_Phdr *load64;
+	Elf64_Phdr *notes64;
+	char eheader[MAX_KCORE_ELF_HEADER_SIZE];
+	size_t load_size, notes_size;
+
+	if (read(kcore_fd, eheader, MAX_KCORE_ELF_HEADER_SIZE) !=
+			MAX_KCORE_ELF_HEADER_SIZE) {
+		perror("read: /proc/kcore ELF header");
+		return -1;
+	}
+
+	elf64 = (Elf64_Ehdr *)&eheader[0];
+	notes64 = (Elf64_Phdr *)&eheader[sizeof(Elf64_Ehdr)];
+	load64 = (Elf64_Phdr *)&eheader[sizeof(Elf64_Ehdr) +
+					sizeof(Elf64_Phdr)];
+
+	pkd->segments = elf64->e_phnum - 1;
+
+	notes_size = load_size = 0;
+	if (notes64->p_type == PT_NOTE)
+		notes_size = notes64->p_offset + notes64->p_filesz;
+	if (notes64->p_type == PT_LOAD)
+		load_size = (unsigned long)(load64+(elf64->e_phnum)) -
+				(unsigned long)elf64;
+
+	pkd->header_size = MAX(notes_size, load_size);
+	if (!pkd->header_size)
+		pkd->header_size = MAX_KCORE_ELF_HEADER_SIZE;
+
+	if ((pkd->elf_header = (char *)malloc(pkd->header_size)) == NULL) {
+		perror("malloc: /proc/kcore ELF header");
+		return -1;
+	}
+
+	memcpy(&pkd->elf_header[0], &eheader[0], pkd->header_size);
+	pkd->notes64 = (Elf64_Phdr *)&pkd->elf_header[sizeof(Elf64_Ehdr)];
+	pkd->load64 = (Elf64_Phdr *)&pkd->elf_header[sizeof(Elf64_Ehdr) +
+						     sizeof(Elf64_Phdr)];
+
+	return 0;
+}
+
+int kcore_init() 
+{
+    if ((kcore_fd = open("/proc/kcore", O_RDONLY)) < 0) {
+		perror("open: /proc/kcore");
+		return -1;
+	}
+
+    if (kcore_elf_init())
+		goto failed;
+
+    return 0;
+    
+failed:
+	close(kcore_fd);
+	return -1;
+}
+
+void kcore_uninit(void)
+{
+	if (pkd->elf_header)
+		free(pkd->elf_header);
+	if (kcore_fd > 0)
+		close(kcore_fd);
+}
+
+/*
+ * We may accidentally access invalid pfns on some kernels
+ * like 4.9, due to known bugs. Just skip it.
+ */
+ssize_t kcore_readmem(unsigned long kvaddr, void *buf, ssize_t size)
+{
+	Elf64_Phdr *lp64;
+	unsigned long offset = -1UL;
+	ssize_t read_size;
+	unsigned int i;
+
+	for (i = 0; i < pkd->segments; i++) {
+		lp64 = pkd->load64 + i;
+		if ((kvaddr >= lp64->p_vaddr) &&
+			(kvaddr < (lp64->p_vaddr + lp64->p_memsz))) {
+			offset = (off_t)(kvaddr - lp64->p_vaddr) +
+					(off_t)lp64->p_offset;
+			break;
+		}
+	}
+	if (i == pkd->segments) {
+		for (i = 0; i < pkd->segments; i++) {
+			lp64 = pkd->load64 + i;
+			LOG_DEBUG("%2d: [0x%lx, 0x%lx)\n", i, lp64->p_vaddr,
+					lp64->p_vaddr + lp64->p_memsz);
+		}
+		//printf("invalid kvaddr 0x%lx\n", kvaddr);
+		goto failed;
+	}
+
+	if (lseek(kcore_fd, offset, SEEK_SET) < 0) {
+		perror("lseek: /proc/kcore");
+		goto failed;
+	}
+
+	read_size = read(kcore_fd, buf, size);
+	if (read_size < size) {
+		perror("read: /proc/kcore");
+		goto failed;
+	}
+
+	return read_size;
+
+failed:
+	return -1;
+}
+
+static void stripWhiteSpace(char *str)
+{   
+    char tmp_str[strlen(str)];
+    int i, j = 0;
+
+    for (i = 0; str[i] != '\0'; i++) {
+        if (str[i] != ' ' && str[i] != '\t' 
+                && str[i] != '\n') {
+            tmp_str[j] = str[i];
+            j++;
+        }
+    }
+
+    tmp_str[j] = '\0';
+    strcpy(str, tmp_str);
+
+    return;
+}
+
+static int do_cmd(const char *cmd, char *result, int len)
+{
+    FILE *res;
+    char region[LEN] = {0};
+    char *str;
+
+    res = popen(cmd, "r");
+    if (res == NULL) {
+        printf("get region id failed\n");
+        return -1;
+    }
+
+    if (feof(res)) {
+        printf("cmd line end\n");
+        return 0;
+    }
+    fgets(region, sizeof(region)-1, res);
+    str = region;
+    stripWhiteSpace(str);
+    /* skip \n */
+    strncpy(result, str, len - 1);
+    result[len - 1] = '\0';
+    pclose(res);
+    return 0;
+}
+
+static int download_btf()
+{
+    char region[LEN] = {0};
+    char arch[LEN] = {0};
+    char kernel[LEN] = {0};
+    char dw[LEN+LEN] = {0};
+    char timeout[LEN] = "-internal";
+    char sysak_path[LEN] = "/boot";
+    char *curl_cmd = "curl -s --connect-timeout 2 http://100.100.100.200/latest/meta-data/region-id 2>&1";
+    char *arch_cmd = "uname -m";
+    char *kernel_cmd = "uname -r";
+    char *tmp;
+
+    do_cmd(curl_cmd, region, LEN);
+    if (!strstr(region,"cn-")) {
+        strcpy(region, "cn-hangzhou");
+        memset(timeout, 0, sizeof(timeout));
+    }
+
+    do_cmd(arch_cmd, arch, LEN);
+
+    do_cmd(kernel_cmd, kernel, LEN);
+
+    if((tmp = getenv("SYSAK_WORK_PATH")) != NULL)
+    {
+        memset(sysak_path, 0, sizeof(sysak_path));
+        strcpy(sysak_path, tmp);
+        strcat(sysak_path, "/tools/");
+        strcat(sysak_path, kernel);
+    }
+
+    snprintf(dw, LEN + LEN + LEN, "wget -T 5 -t 2 -q -O %s/vmlinux-%s https://sysom-cn-%s.oss-cn-%s%s.aliyuncs.com/home/hive/btf/%s/vmlinux-%s", sysak_path, kernel, &region[3],&region[3], timeout,arch, kernel);
+
+    do_cmd(dw, kernel, LEN);
+    return 0;
+}
+
+static int check_btf_file(char *btf)
+{
+    struct stat fstat;
+    int ret = 0;
+
+    ret = stat(btf, &fstat);
+    if (ret)
+        return -1;
+    if (fstat.st_size < 10*1024)
+        return -1;
+
+    return 0;
+}
+
+char *prepare_btf_file()
+{
+    static char btf[LEN] = {0};
+    char ver[LEN] = {0};
+    char *cmd = "uname -r";
+
+    do_cmd(cmd, ver, LEN);
+
+    if (getenv("SYSAK_WORK_PATH") != NULL)
+        sprintf(btf,"%s/tools/%s/vmlinux-%s", getenv("SYSAK_WORK_PATH"), ver, ver);
+    else
+        sprintf(btf,"/boot/vmlinux-%s", ver);
+
+    if (check_btf_file(btf)) {
+        download_btf();
+    };
+
+    if (check_btf_file(btf)) {
+        LOG_ERROR("btf file:%s not found \n", btf);
+        return NULL;
+    }
+
+    return btf;
+}
\ No newline at end of file
diff --git a/source/mk/csrc.mk b/source/mk/csrc.mk
index 8701dc77e9f58e4eaeca71a4824b2757a5260940..e3fdf57c3f9cb6a6063603c121518c2af76e2fc0 100644
--- a/source/mk/csrc.mk
+++ b/source/mk/csrc.mk
@@ -1,6 +1,6 @@
 objs := $(foreach n, $(mods), $(OBJPATH)/$(n))
 
-CFLAGS += $(EXTRA_CFLAGS) -I$(SRC)/lib/uapi/include
+CFLAGS += $(EXTRA_CFLAGS) $(INCLUDES) -I$(SRC)/lib/uapi/include
 LDFLAGS += $(EXTRA_LDFLAGS)
 
 ifeq ($(KERNEL_DEPEND), Y)
diff --git a/source/tools/detect/mem/imc_latency/Makefile b/source/tools/detect/mem/imc_latency/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..282f8bfda3eed0884e233ad5b23a4f9db6cd8dd6
--- /dev/null
+++ b/source/tools/detect/mem/imc_latency/Makefile
@@ -0,0 +1,4 @@
+target := imc_latency
+mods := imc_latency.o
+
+include $(SRC)/mk/csrc.mk
diff --git a/source/tools/detect/mem/imc_latency/README.md b/source/tools/detect/mem/imc_latency/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..90c46acd3eb97677185b6b2331ca9c6006e63cfa
--- /dev/null
+++ b/source/tools/detect/mem/imc_latency/README.md
@@ -0,0 +1,82 @@
+# imc_latency
+
+基于PMU事件的DDR内存访问延迟，用于检查微架构层级是否存在内存竞争。
+
+## Usgae
+
+### 使用用例
+
+```bash
+Sample:
+
+imc_latency -f /dev/stdout #输出日子到控制台
+imc_latency -d 15 -i 20  # 每15秒采集一次 输出20次采集结果
+```
+
+### 结果说明
+
+一次的采集结果如下，输出的的指标类型由read_latency(rlat)和write_latency(wlat)，指标的level有socket和channel两种级别。
+
+- SOCKET_LEVEL： socket层级的读写内存延迟，通过对channel级的指标求平均得到。
+- CHANNEL_LEVEL：channel级别的读写内存延迟
+  
+```bash
+[TIME-STAMP] 2023-07-10 07:06:17
+[SOCKET_LEVEL]
+               0       1
+    rlat   13.75   14.37
+    wlat   39.37   37.49
+[CHANNEL_LEVEL]-[SOCKET-0]
+               0       1       2       3       4       5       6       7       8       9      10      11
+    rlat   14.37   13.75    0.00   13.75   13.75    0.00   13.12   13.75    0.00   14.37   13.75    0.00
+    wlat   40.62   39.99    0.00   39.37   38.74    0.00   40.62   39.37    0.00   39.99   38.74    0.00
+[CHANNEL_LEVEL]-[SOCKET-1]
+               0       1       2       3       4       5       6       7       8       9      10      11
+    rlat   15.00   13.75    0.00   13.75   13.75    0.00   13.75   14.37    0.00   14.37   14.37    0.00
+    wlat   38.12   37.49    0.00   36.87   36.87    0.00   38.12   38.12    0.00   38.12   37.49    0.00
+```
+
+## 原理与限制
+
+基于IMC的PMU组件实现，需要硬件支持。目前仅支持Intel的Ice Lake（ICX）、Sky Lake（SKX）、Cascade Lake以及Sapphire Rapids(SPR)等微架构。
+
+| 微架构          | 代号 | cpu-model编号 |
+| --------------- | ---- | ------------- |
+| Sapphire Rapids | SPR  | 143           |
+| Ice Lake        | ICX  | 106/108       |
+| Cascade Lake    |      | 106           |
+| Sky Lake-X      | SKX  | 85            |
+
+### 检查是否支持
+
+可以使用`lscpu`命令，通过CPU的`Model`字段查看微架构类型，检查硬件是否支持。
+
+```bash
+Architecture:        x86_64
+CPU op-mode(s):      32-bit, 64-bit
+Byte Order:          Little Endian
+CPU(s):              128
+On-line CPU(s) list: 0-127
+Thread(s) per core:  2
+Core(s) per socket:  32
+Socket(s):           2
+NUMA node(s):        2
+Vendor ID:           GenuineIntel
+BIOS Vendor ID:      Intel(R) Corporation
+CPU family:          6
+Model:               106
+Model name:          Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz
+BIOS Model name:     Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz
+Stepping:            6
+CPU MHz:             3500.000
+CPU max MHz:         3500.0000
+CPU min MHz:         800.0000
+BogoMIPS:            5800.00
+Virtualization:      VT-x
+L1d cache:           48K
+L1i cache:           32K
+L2 cache:            1280K
+L3 cache:            49152K
+NUMA node0 CPU(s):   0-31,64-95
+NUMA node1 CPU(s):   32-63,96-127
+```
diff --git a/source/tools/detect/mem/imc_latency/imc_latency.c b/source/tools/detect/mem/imc_latency/imc_latency.c
new file mode 100644
index 0000000000000000000000000000000000000000..e6f9340c2bbf028deb1ab824a076a00fa55381c9
--- /dev/null
+++ b/source/tools/detect/mem/imc_latency/imc_latency.c
@@ -0,0 +1,906 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <strings.h>
+#include <signal.h>
+#include <unistd.h>
+#include <memory.h>
+#include <errno.h>
+#include <argp.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <linux/types.h>
+
+#include "imc_latency.h"
+
+// #define DEBUG
+
+const char* argp_program_version = "imc_latency 0.1";
+const char argp_program_doc[] =
+    "Detect the memory latency based on IMC PMU.\n"
+    "\n"
+
+    "USAGE: imc_latency [--help] [-d DELAY] [-i ITERATION] [-f LOGFILE]\n"
+    "\n"
+
+    "EXAMPLES:\n"
+    "    imc_latency            # run forever, display the memory latency.\n"
+    "    imc_latency -f foo.log   # log to foo.log.\n";
+
+static const struct argp_option opts[] = {
+    {"delay", 'd', "DELAY", 0, "Sample peroid, default is 3 seconds"},
+    {"iter", 'i', "ITERATION", 0, "Output times, default run forever"},
+    {"logfile", 'f', "LOGFILE", 0,
+     "Logfile for result, default /var/log/sysak/imc_latency/imc_latency.log"},
+    {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"},
+    {},
+};
+
+struct Env {
+    uint32_t max_cpuid;
+    int32_t cpu_model;
+    int32_t cpu_family;
+    int32_t cpu_stepping;
+    bool vm;
+    int64_t nr_cpu;
+    int64_t nr_socket;
+    int64_t nr_core;
+    int64_t nr_channel;
+    int64_t* socket_ref_core;
+    int64_t nr_iter;
+    int64_t delay;
+} env = {.vm = false, .nr_iter = INT64_MAX, .delay = DEFAUlT_PEROID};
+
+record before, after;
+time_t before_ts = 0, after_ts = 0;
+imc_pmu* pmus = 0;
+char log_dir[FILE_PATH_LEN] = "/var/log/sysak/imc_latency";
+char default_log_path[FILE_PATH_LEN] =
+    "/var/log/sysak/imc_latency/imc_latency.log";
+char* log_path = 0;
+FILE* log_fp = 0;
+bool exiting = false;
+
+static void sigint_handler(int signo) { exiting = 1; }
+
+/* if out of range or no number found return nonzero */
+static int parse_long(const char* str, long* retval) {
+    int err = 0;
+    char* endptr;
+    errno = 0;
+    long val = strtol(str, &endptr, 10);
+
+    /* Check for various possible errors */
+    if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN)) ||
+        (errno != 0 && val == 0)) {
+        fprintf(stderr, "Failed parse val.\n");
+        err = errno;
+        return err;
+    }
+
+    if (endptr == str) return err = -1;
+    *retval = val;
+    return err;
+}
+
+static error_t parse_arg(int key, char* arg, struct argp_state* state) {
+    int err = 0;
+    long val;
+    switch (key) {
+        case 'h':
+            argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+            break;
+        case 'd':
+            err = parse_long(arg, &val);
+            if (err || val <= 0) {
+                fprintf(stderr, "Failed parse delay.\n");
+                argp_usage(state);
+            }
+
+            env.delay = val;
+            break;
+        case 'i':
+            err = parse_long(arg, &val);
+            if (err || val <= 0) {
+                fprintf(stderr, "Failed parse iteration-num.\n");
+                argp_usage(state);
+            }
+            env.nr_iter = val;
+            env.nr_iter++;
+            break;
+        case 'f':
+            log_path = arg;
+            break;
+        case ARGP_KEY_ARG:
+            break;
+        default:
+            return ARGP_ERR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+static int prepare_directory(char* path) {
+    int ret;
+
+    ret = mkdir(path, 0777);
+    if (ret < 0 && errno != EEXIST)
+        return errno;
+    else
+        return 0;
+}
+
+static FILE* open_logfile() {
+    FILE* f = 0;
+    if (!log_path) {
+        log_path = default_log_path;
+    }
+
+    f = fopen(log_path, "w");
+
+    return f;
+}
+
+int64_t read_sys_file(char* path, bool slient) {
+    int64_t val;
+    FILE* fp = fopen(path, "r");
+    if (!fp) {
+        if (!slient) fprintf(stderr, "Failed open sys-file: %s\n", path);
+        return -1;
+    }
+
+    fscanf(fp, "%ld\n", &val);
+#ifdef DEBUG
+    fprintf(stderr, "read from=%s val=%ld\n", path, val);
+#endif
+    if (fp) fclose(fp);
+    return val;
+}
+
+static int write_reg(imc_event* ev, uint64_t val) {
+    int err = 0;
+    if (ev->fd >= 0) {
+        close(ev->fd);
+        ev->fd = -1;
+    }
+
+    ev->attr.config = ev->fixed ? 0xff : val;
+
+    if ((ev->fd = syscall(SYS_perf_event_open, &ev->attr, -1, ev->core_id, -1,
+                          0)) <= 0) {
+        fprintf(stderr, "Linux Perf: Error on programming PMU %d:%s\n",
+                ev->pmu_id, strerror(errno));
+        fprintf(stderr, "config: 0x%llx config1: 0x%llx config2: 0x%llx\n",
+                ev->attr.config, ev->attr.config1, ev->attr.config2);
+        if (errno == EMFILE) fprintf(stderr, "%s", ULIMIT_RECOMMENDATION);
+
+        return -1;
+    }
+    return err;
+}
+
+static uint64_t read_reg(imc_event* ev) {
+    uint64_t result = 0;
+    if (ev->fd >= 0) {
+        int status = read(ev->fd, &result, sizeof(result));
+        if (status != sizeof(result)) {
+            fprintf(
+                stderr,
+                "PCM Error: failed to read from Linux perf handle %d PMU %d\n",
+                ev->fd, ev->pmu_id);
+        }
+    }
+    return result;
+}
+
+static bool is_cpu_online(int cpu_id) {
+    char path[BUF_SIZE];
+    uint64_t val;
+    bool res = false;
+
+    snprintf(path, BUF_SIZE, "/sys/devices/system/cpu/cpu%d/online", cpu_id);
+
+    FILE* fp = fopen(path, "r");
+    if (!fp) {
+        fprintf(stderr, "Failed open %s.\n", path);
+        goto cleanup;
+    }
+
+    val = read_sys_file(path, true);
+    if (val == UINT64_MAX) {
+        goto cleanup;
+    }
+
+    res = true;
+
+cleanup:
+    if (fp) fclose(fp);
+    return res;
+}
+
+int64_t read_core_id(int cpu_id) {
+    char core_id_path[BUF_SIZE];
+    int64_t val = -1;
+
+    snprintf(core_id_path, BUF_SIZE,
+             "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu_id);
+    val = read_sys_file(core_id_path, true);
+    return val;
+}
+
+int64_t read_physical_package_id(int cpu_id) {
+    char pkg_id_path[BUF_SIZE];
+
+    int64_t val = -1;
+
+    snprintf(pkg_id_path, BUF_SIZE,
+             "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
+             cpu_id);
+    val = read_sys_file(pkg_id_path, true);
+
+    return val;
+}
+
+static int get_topology(int id, struct topology_ent* ent) {
+    int err = 0;
+    ent->core_id = read_core_id(id);
+    ent->socket_id = read_physical_package_id(id);
+    if (ent->core_id == -1 || ent->socket_id == -1) {
+#ifdef DEBUG
+        fprintf(stderr, "get coreid=%d socket_id=%d\n", ent->core_id,
+                ent->socket_id);
+#endif
+        err = -1;
+    }
+
+    return err;
+}
+
+static int discovery_topology() {
+    int err = 0, i = 0;
+    struct topology_ent* topo = 0;
+
+    env.nr_cpu = sysconf(_SC_NPROCESSORS_CONF);
+
+    if (env.nr_cpu < 0) {
+        fprintf(stderr, "Failed get nr_cpu.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    topo = calloc(env.nr_cpu, sizeof(struct topology_ent));
+    if (!topo) {
+        fprintf(stderr, "Faile calloc topology memory.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    int64_t max_skt_id = 0;
+    int64_t max_core_id = 0;
+    for (i = 0; i < env.nr_cpu; i++) {
+        err = get_topology(i, topo + i);
+        if (err) {
+            fprintf(stderr, "Failed get topology cpuid:%d\n", i);
+            goto cleanup;
+        }
+
+        max_skt_id =
+            max_skt_id > topo[i].socket_id ? max_skt_id : topo[i].socket_id;
+        max_core_id =
+            max_core_id > topo[i].core_id ? max_core_id : topo[i].core_id;
+    }
+
+    env.nr_socket = max_skt_id + 1;
+    env.nr_core = max_core_id + 1;
+
+    env.socket_ref_core = calloc(env.nr_socket, sizeof(int64_t));
+    if (!env.socket_ref_core) {
+        fprintf(stderr, "Failed calloc socket_ref_core. nr_socket=%d\n",
+                env.nr_socket);
+        err = -1;
+        goto cleanup;
+    }
+
+    for (i = 0; i < env.nr_cpu; i++) {
+        if (!is_cpu_online(i)) continue;
+        env.socket_ref_core[topo[i].socket_id] = i;
+    }
+
+cleanup:
+    if (topo) free(topo);
+    topo = 0;
+    return err;
+}
+
+static void cpuid_1(int leaf, CPUID_INFO* info) {
+    __asm__ __volatile__("cpuid"
+                         : "=a"(info->reg.eax), "=b"(info->reg.ebx),
+                           "=c"(info->reg.ecx), "=d"(info->reg.edx)
+                         : "a"(leaf));
+}
+
+void cpuid_2(const unsigned leaf, const unsigned subleaf, CPUID_INFO* info) {
+    __asm__ __volatile__("cpuid"
+                         : "=a"(info->reg.eax), "=b"(info->reg.ebx),
+                           "=c"(info->reg.ecx), "=d"(info->reg.edx)
+                         : "a"(leaf), "c"(subleaf));
+}
+
+static bool detect_model() {
+    char buffer[1024];
+    union {
+        char cbuf[16];
+        int ibuf[16 / sizeof(int)];
+    } buf;
+
+    CPUID_INFO cpuinfo;
+
+    bzero(buffer, 1024);
+    bzero(buf.cbuf, 16);
+    cpuid_1(0, &cpuinfo);
+
+    buf.ibuf[0] = cpuinfo.array[1];
+    buf.ibuf[1] = cpuinfo.array[3];
+    buf.ibuf[2] = cpuinfo.array[2];
+
+    if (strncmp(buf.cbuf, "GenuineIntel", 4 * 3) != 0) {
+        fprintf(stderr, "Not intel cpu.\n");
+        return false;
+    }
+
+    env.max_cpuid = cpuinfo.array[0];
+
+    cpuid_1(1, &cpuinfo);
+    env.cpu_family = (((cpuinfo.array[0]) >> 8) & 0xf) |
+                     ((cpuinfo.array[0] & 0xf00000) >> 16);
+    env.cpu_model = (((cpuinfo.array[0]) & 0xf0) >> 4) |
+                    ((cpuinfo.array[0] & 0xf0000) >> 12);
+    env.cpu_stepping = cpuinfo.array[0] & 0x0f;
+
+    if (cpuinfo.reg.ecx & (1UL << 31UL)) {
+        env.vm = true;
+        fprintf(stderr,
+                "WARN: Detected a hypervisor/virtualization technology. Some "
+                "metrics might not be available due to configuration or "
+                "availability of virtual hardware features.\n");
+    }
+
+    if (env.cpu_family != 6) {
+        fprintf(stderr, "Unsupport CPU Family: %d\n", env.cpu_family);
+        return false;
+    }
+
+    return true;
+}
+
+bool is_model_support() {
+    switch (env.cpu_model) {
+        case NEHALEM:
+            env.cpu_model = NEHALEM_EP;
+            break;
+        case ATOM_2:
+            env.cpu_model = ATOM;
+            break;
+        case HASWELL_ULT:
+        case HASWELL_2:
+            env.cpu_model = HASWELL;
+            break;
+        case BROADWELL_XEON_E3:
+            env.cpu_model = BROADWELL;
+            break;
+        case ICX_D:
+            env.cpu_model = ICX;
+            break;
+        case CML_1:
+            env.cpu_model = CML;
+            break;
+        case ICL_1:
+            env.cpu_model = ICL;
+            break;
+        case TGL_1:
+            env.cpu_model = TGL;
+            break;
+        case ADL_1:
+            env.cpu_model = ADL;
+            break;
+        case RPL_1:
+        case RPL_2:
+        case RPL_3:
+            env.cpu_model = RPL;
+            break;
+    }
+
+    return (env.cpu_model == ICX || env.cpu_model == SPR ||
+            env.cpu_model == SKX);
+}
+
+uint32_t* get_ddr_latency_metric_config() {
+    uint32_t* cfgs = 0;
+    cfgs = calloc(4, sizeof(uint32_t));
+    if (!cfgs) {
+        fprintf(stderr, "Failed calloc cfgs memory.\n");
+        return NULL;
+    }
+
+    if (ICX == env.cpu_model || SPR == env.cpu_model) {
+        cfgs[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM RPQ occupancy pch 0
+        cfgs[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) +
+                  MC_CH_PCI_PMON_CTL_UMASK(1);  // DRAM RPQ Insert.pch 0
+        cfgs[2] = MC_CH_PCI_PMON_CTL_EVENT(0x82) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM WPQ Occupancy pch 0
+        cfgs[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) +
+                  MC_CH_PCI_PMON_CTL_UMASK(1);  // DRAM WPQ Insert.pch 0
+    } else {
+        cfgs[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM RPQ occupancy
+        cfgs[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM RPQ Insert
+        cfgs[2] = MC_CH_PCI_PMON_CTL_EVENT(0x81) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM WPQ Occupancy
+        cfgs[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM WPQ Insert
+    }
+
+    return cfgs;
+}
+
+struct perf_event_attr init_perf_event_attr(bool group) {
+    struct perf_event_attr e;
+    bzero(&e, sizeof(struct perf_event_attr));
+    e.type = -1;  // must be set up later
+    e.size = sizeof(e);
+    e.config = -1;  // must be set up later
+    e.read_format = group ? PERF_FORMAT_GROUP
+                          : 0; /* PERF_FORMAT_TOTAL_TIME_ENABLED |
+      PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_GROUP ; */
+    return e;
+}
+
+void init_imc_event(imc_event* event, int pmu_id, int core_id, bool fixed) {
+    struct perf_event_attr attr = init_perf_event_attr(false);
+    attr.type = pmu_id;
+    event->attr = attr;
+    event->fixed = fixed;
+    event->pmu_id = pmu_id;
+    event->core_id = core_id;
+    event->fd = -1;
+}
+
+void init_imc_reggrp(imc_reg_group* grp, int socket_id, int pmu_id) {
+    int i = 0;
+#ifdef DEBUG
+
+    fprintf(stderr, "Init imc reg group: socketid=%d pmuid=%d\n", socket_id,
+            pmu_id);
+#endif
+    init_imc_event(&grp->fixed_ev, pmu_id, env.socket_ref_core[socket_id],
+                   true);
+
+    for (i = 0; i < GENERAL_REG_NUM; i++) {
+        init_imc_event(&grp->general_ev[i], pmu_id,
+                       env.socket_ref_core[socket_id], false);
+    }
+}
+
+imc_pmu* init_imc_pmus(int64_t* pmu_ids, int64_t size) {
+    int skt_id = 0;
+    int pmu_id = 0;
+
+    imc_pmu* pmus = calloc(env.nr_socket, sizeof(imc_pmu));
+
+    for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+        pmus[skt_id].reg_groups = calloc(size, sizeof(imc_reg_group));
+        pmus[skt_id].socket_id = skt_id;
+        pmus[skt_id].nr_grp = size;
+
+        for (pmu_id = 0; pmu_id < size; pmu_id++) {
+            init_imc_reggrp(&pmus[skt_id].reg_groups[pmu_id], skt_id,
+                            pmu_ids[pmu_id]);
+        }
+    }
+
+    return pmus;
+}
+
+void program_imc(uint32_t* cfgs, imc_pmu* pmus) {
+    int skt_id = 0;
+    int pmu_id = 0;
+    int idx = 0;
+    for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+        imc_pmu* pmu = pmus + skt_id;
+        for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) {
+            imc_reg_group* grp = pmu->reg_groups + pmu_id;
+            /* enabel and reset fixed counter(DRAM clock) */
+            write_reg(&grp->fixed_ev, MC_CH_PCI_PMON_FIXED_CTL_EN);
+            write_reg(&grp->fixed_ev, MC_CH_PCI_PMON_FIXED_CTL_EN +
+                                          MC_CH_PCI_PMON_FIXED_CTL_RST);
+            for (idx = 0; idx < GENERAL_REG_NUM; idx++) {
+                uint64_t event = cfgs[idx];
+                if (SPR == env.cpu_model) {
+                    write_reg(&grp->general_ev[idx], event);
+                } else {
+                    write_reg(&grp->general_ev[idx], MC_CH_PCI_PMON_CTL_EN);
+                    write_reg(&grp->general_ev[idx],
+                              MC_CH_PCI_PMON_CTL_EN | event);
+                }
+            }
+        }
+    }
+}
+
+socket_record* alloc_socket_record() {
+    int skt_id = 0;
+    socket_record* rec = calloc(env.nr_socket, sizeof(socket_record));
+    for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+        rec[skt_id].channel_record_arr =
+            calloc(env.nr_channel, sizeof(channel_record));
+    }
+    return rec;
+}
+
+void free_socket_record(socket_record* rec) {
+    int skt_id = 0;
+    for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+        free(rec[skt_id].channel_record_arr);
+    }
+    free(rec);
+}
+
+void init_data() {
+    before.socket_record_arr = alloc_socket_record();
+    after.socket_record_arr = alloc_socket_record();
+}
+
+void free_data() {
+    free_socket_record(before.socket_record_arr);
+    free_socket_record(after.socket_record_arr);
+}
+
+int64_t get_perf_pmuid(int num) {
+    int64_t id = -1;
+    char imc_path[BUF_SIZE];
+
+    if (num != -1) {
+        snprintf(imc_path, BUF_SIZE,
+                 "/sys/bus/event_source/devices/uncore_imc_%d/type", num);
+    } else {
+        snprintf(imc_path, BUF_SIZE,
+                 "/sys/bus/event_source/devices/uncore_imc/type");
+    }
+
+    id = read_sys_file(imc_path, true);
+
+    return id;
+}
+
+static int64_t* enumerate_imc_PMUs() {
+    int64_t* pmu_ids = 0;
+    int idx = 0, i = 0;
+
+    pmu_ids = calloc(MAX_IMC_ID, sizeof(int64_t));
+
+    if (!pmu_ids) {
+        fprintf(stderr, "Failed calloc pmu ids memory.\n");
+        return NULL;
+    }
+
+    for (i = -1; i <= MAX_IMC_ID; ++i) {
+        int64_t pmu_id = get_perf_pmuid(i);
+        if (pmu_id != -1) pmu_ids[idx++] = pmu_id;
+    }
+
+    env.nr_channel = idx;
+
+cleanup:
+
+    if (env.nr_channel == 0 && pmu_ids) {
+        free(pmu_ids);
+        pmu_ids = 0;
+    }
+
+    return pmu_ids;
+}
+
+static int init_env() {
+    int err = 0;
+    int64_t* pmu_ids = 0;
+    uint32_t* cfgs = 0;
+
+    // check model
+    if (!detect_model()) {
+        fprintf(stderr, "Failed detect model.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    if (!is_model_support()) {
+        fprintf(stderr, "Unsupport model.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    // get core/socket info
+    err = discovery_topology();
+    if (err) {
+        fprintf(stderr, "Failed discovery topology.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    // get all imc-pmu id
+    pmu_ids = enumerate_imc_PMUs();
+    if (!pmu_ids) {
+        fprintf(stderr, "Failed enumerate imc pmus.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    cfgs = get_ddr_latency_metric_config();
+    if (!cfgs) {
+        fprintf(stderr, "Failed enumerate imc pmus.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    // init pmu
+    pmus = init_imc_pmus(pmu_ids, env.nr_channel);
+
+    // write pmu register
+    program_imc(cfgs, pmus);
+
+    // init data
+    init_data();
+#ifdef DEBUG
+    fprintf(stderr, "nr_socket=%d nr_core=%d nr_cpu=%d nr_channel=%d \n",
+            env.nr_socket, env.nr_core, env.nr_cpu, env.nr_channel);
+    int i = 0;
+    for (i = 0; i < env.nr_socket; i++) {
+        fprintf(stderr, "socket%d-ref cpu=%d\n", i, env.socket_ref_core[i]);
+    }
+#endif
+
+cleanup:
+
+    if (pmu_ids) {
+        free(pmu_ids);
+        pmu_ids = 0;
+    }
+
+    if (cfgs) {
+        free(cfgs);
+        cfgs = 0;
+    }
+
+    return err;
+}
+
+void read_imc() {
+    int skt_id = 0, pmu_id = 0, counter_id = 0;
+    after_ts = time(0);
+
+    for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+        imc_pmu* pmu = pmus + skt_id;
+        socket_record* socket_ev = &after.socket_record_arr[skt_id];
+        for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) {
+            imc_reg_group* grp = pmu->reg_groups + pmu_id;
+            channel_record* channel_ev =
+                &after.socket_record_arr[skt_id].channel_record_arr[pmu_id];
+            /* enabel and reset fixed counter(DRAM clock) */
+            if (pmu_id == 0) {
+                socket_ev->dram_clock = read_reg(&grp->fixed_ev);
+                if (env.cpu_model == ICX || env.cpu_model == SNOWRIDGE) {
+                    socket_ev->dram_clock = 2 * socket_ev->dram_clock;
+                }
+            }
+
+            channel_ev->rpq_occ = read_reg(&grp->general_ev[RPQ_OCC]);
+            channel_ev->rpq_ins = read_reg(&grp->general_ev[RPQ_INS]);
+            channel_ev->wpq_occ = read_reg(&grp->general_ev[WPQ_OCC]);
+            channel_ev->wpq_ins = read_reg(&grp->general_ev[WPQ_INS]);
+
+            socket_ev->rpq_occ += channel_ev->rpq_occ;
+            socket_ev->rpq_ins += channel_ev->rpq_ins;
+            socket_ev->wpq_occ += channel_ev->wpq_occ;
+            socket_ev->wpq_ins += channel_ev->wpq_ins;
+        }
+    }
+
+    if (before_ts) {
+        double delta = after_ts - before_ts;
+        for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+            socket_record* before_socket_ev = &before.socket_record_arr[skt_id];
+            socket_record* after_socket_ev = &after.socket_record_arr[skt_id];
+            imc_pmu* pmu = pmus + skt_id;
+            double dram_speed =
+                (after_socket_ev->dram_clock - before_socket_ev->dram_clock) /
+                (delta * (double)1e9);
+
+            for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) {
+                channel_record* before_channel_ev =
+                    &before_socket_ev->channel_record_arr[pmu_id];
+                channel_record* after_channel_ev =
+                    &after_socket_ev->channel_record_arr[pmu_id];
+
+                if (after_channel_ev->rpq_ins - before_channel_ev->rpq_ins >
+                    0) {
+                    after_channel_ev->read_latency =
+                        (after_channel_ev->rpq_occ -
+                         before_channel_ev->rpq_occ) /
+                        (after_channel_ev->rpq_ins -
+                         before_channel_ev->rpq_ins) /
+                        dram_speed;
+                }
+
+                if (after_channel_ev->wpq_ins - before_channel_ev->wpq_ins >
+                    0) {
+                    after_channel_ev->write_latency =
+                        (after_channel_ev->wpq_occ -
+                         before_channel_ev->wpq_occ) /
+                        (after_channel_ev->wpq_ins -
+                         before_channel_ev->wpq_ins) /
+                        dram_speed;
+                }
+            }
+
+            if (after_socket_ev->rpq_ins - before_socket_ev->rpq_ins > 0) {
+                after_socket_ev->read_latency =
+                    (after_socket_ev->rpq_occ - before_socket_ev->rpq_occ) /
+                    (after_socket_ev->rpq_ins - before_socket_ev->rpq_ins) /
+                    dram_speed;
+            }
+
+            if (after_socket_ev->wpq_ins - before_socket_ev->wpq_ins > 0) {
+                after_socket_ev->write_latency =
+                    (after_socket_ev->wpq_occ - before_socket_ev->wpq_occ) /
+                    (after_socket_ev->wpq_ins - before_socket_ev->wpq_ins) /
+                    dram_speed;
+            }
+        }
+    }
+}
+
+static char* ts2str(time_t ts, char* buf, int size) {
+    struct tm* t = gmtime(&ts);
+    strftime(buf, size, "%Y-%m-%d %H:%M:%S", t);
+    return buf;
+}
+
+static void output_ts(FILE* dest) {
+    char stime_str[BUF_SIZE] = {0};
+    time_t now = time(0);
+    fprintf(dest, "[TIME-STAMP] %s\n", ts2str(now, stime_str, BUF_SIZE));
+}
+
+static void output_socket_lat(FILE* dest) {
+    int32_t socket_id = 0;
+
+    fprintf(dest, "%s\n", "[SOCKET_LEVEL]");
+    // fprintf(dest, "%8s%16s%16s\n", "socket", "rlat", "wlat");
+    fprintf(dest, "%8s", "");
+
+    for (socket_id = 0; socket_id < env.nr_socket; socket_id++) {
+        fprintf(dest, "%8d", socket_id);
+    }
+    fprintf(dest, "\n");
+
+    fprintf(dest, "%8s", "rlat");
+    for (socket_id = 0; socket_id < env.nr_socket; socket_id++) {
+        socket_record* srec = &after.socket_record_arr[socket_id];
+        fprintf(dest, "%8.2lf", srec->read_latency);
+    }
+    fprintf(dest, "\n");
+
+    fprintf(dest, "%8s", "wlat");
+    for (socket_id = 0; socket_id < env.nr_socket; socket_id++) {
+        socket_record* srec = &after.socket_record_arr[socket_id];
+        fprintf(dest, "%8.2lf", srec->write_latency);
+    }
+    fprintf(dest, "\n");
+}
+
+static void output_channel_lat(FILE* dest) {
+    int32_t socket_id = 0, channel_id = 0;
+    for (socket_id = 0; socket_id < env.nr_socket; socket_id++) {
+        char socket_name[32];
+        snprintf(socket_name, 32, "%d", socket_id);
+
+        socket_record* srec = &after.socket_record_arr[socket_id];
+
+        fprintf(dest, "[CHANNEL_LEVEL]-[SOCKET-%d]\n", socket_id);
+        fprintf(dest, "%8s", "");
+        for (channel_id = 0; channel_id < env.nr_channel; channel_id++) {
+            fprintf(dest, "%8d", channel_id);
+        }
+        fprintf(dest, "\n");
+
+        fprintf(dest, "%8s", "rlat");
+        for (channel_id = 0; channel_id < env.nr_channel; channel_id++) {
+            channel_record* crec = &srec->channel_record_arr[channel_id];
+            fprintf(dest, "%8.2lf", crec->read_latency);
+        }
+        fprintf(dest, "\n");
+
+        fprintf(dest, "%8s", "wlat");
+        for (channel_id = 0; channel_id < env.nr_channel; channel_id++) {
+            channel_record* crec = &srec->channel_record_arr[channel_id];
+            fprintf(dest, "%8.2lf", crec->write_latency);
+        }
+        fprintf(dest, "\n");
+    }
+}
+
+void swap_record() {
+    /* swap data */
+    socket_record* tmp = before.socket_record_arr;
+    before.socket_record_arr = after.socket_record_arr;
+    after.socket_record_arr = tmp;
+
+    /* clear after data */
+    free_socket_record(after.socket_record_arr);
+    after.socket_record_arr = alloc_socket_record();
+
+    /* reset before timestamp */
+    before_ts = after_ts;
+}
+
+static void output_split(FILE* dest) { fprintf(dest, "\n"); }
+static void collect_data() {
+    int32_t socket_id = 0, channel_id = 0, line_num = 0;
+    read_imc();
+
+    if (before_ts) {
+        output_ts(log_fp);
+        output_socket_lat(log_fp);
+        output_channel_lat(log_fp);
+        output_split(log_fp);
+        fflush(log_fp);
+    }
+
+    swap_record();
+}
+
+static void clean_env(void) { free_data(); }
+
+int main(int argc, char** argv) {
+    int err;
+    /* parse args */
+    static const struct argp argp = {
+        .options = opts,
+        .parser = parse_arg,
+        .doc = argp_program_doc,
+    };
+
+    err = argp_parse(&argp, argc, argv, 0, 0, 0);
+    if (err) {
+        fprintf(stderr, "Failed parse args.\n");
+        return -1;
+    }
+
+    prepare_directory(log_dir);
+    log_fp = open_logfile();
+    if (!log_fp) {
+        fprintf(stderr, "Failed open log file.\n");
+        return -1;
+    }
+
+    if (signal(SIGINT, sigint_handler) == SIG_ERR) {
+        fprintf(stderr, "Failed set signal handler.\n");
+        return -errno;
+    }
+
+    err = init_env();
+    if (err) {
+        fprintf(stderr, "Init env error.\n");
+        return -1;
+    }
+    
+    while (env.nr_iter-- && !exiting) {
+        collect_data();
+        sleep(env.delay);
+    }
+
+    clean_env();
+}
diff --git a/source/tools/detect/mem/imc_latency/imc_latency.h b/source/tools/detect/mem/imc_latency/imc_latency.h
new file mode 100644
index 0000000000000000000000000000000000000000..d2b6387ebaa8c50a07487878519b62d9856c6159
--- /dev/null
+++ b/source/tools/detect/mem/imc_latency/imc_latency.h
@@ -0,0 +1,156 @@
+#ifndef IMC_LATENCY_H
+#define IMC_LATENCY_H
+
+#include <linux/types.h>
+#include <stdbool.h>
+#include <linux/perf_event.h>
+#include <stdint.h>
+
+#define ULIMIT_RECOMMENDATION                                                 \
+    ("try executing 'ulimit -n 1000000' to increase the limit on the number " \
+     "of open files.\n")
+
+typedef union CPUID_INFO {
+    int array[4];
+    struct {
+        unsigned int eax, ebx, ecx, edx;
+    } reg;
+} CPUID_INFO;
+
+enum INTEL_CPU_MODEL {
+    NEHALEM_EP = 26,
+    NEHALEM = 30,
+    ATOM = 28,
+    ATOM_2 = 53,
+    CENTERTON = 54,
+    BAYTRAIL = 55,
+    AVOTON = 77,
+    CHERRYTRAIL = 76,
+    APOLLO_LAKE = 92,
+    GEMINI_LAKE = 122,
+    DENVERTON = 95,
+    SNOWRIDGE = 134,
+    CLARKDALE = 37,
+    WESTMERE_EP = 44,
+    NEHALEM_EX = 46,
+    WESTMERE_EX = 47,
+    SANDY_BRIDGE = 42,
+    JAKETOWN = 45,
+    IVY_BRIDGE = 58,
+    HASWELL = 60,
+    HASWELL_ULT = 69,
+    HASWELL_2 = 70,
+    IVYTOWN = 62,
+    HASWELLX = 63,
+    BROADWELL = 61,
+    BROADWELL_XEON_E3 = 71,
+    BDX_DE = 86,
+    SKL_UY = 78,
+    KBL = 158,
+    KBL_1 = 142,
+    CML = 166,
+    CML_1 = 165,
+    ICL = 126,
+    ICL_1 = 125,
+    RKL = 167,
+    TGL = 140,
+    TGL_1 = 141,
+    ADL = 151,
+    ADL_1 = 154,
+    RPL = 0xb7,
+    RPL_1 = 0xba,
+    RPL_2 = 0xbf,
+    RPL_3 = 0xbe,
+    BDX = 79,
+    KNL = 87,
+    SKL = 94,
+    SKX = 85,
+    ICX_D = 108,
+    ICX = 106,
+    SPR = 143,
+    END_OF_MODEL_LIST = 0x0ffff
+};
+
+#define MC_CH_PCI_PMON_CTL_EVENT(x) (x << 0)
+#define MC_CH_PCI_PMON_CTL_UMASK(x) (x << 8)
+#define MC_CH_PCI_PMON_CTL_RST (1 << 17)
+#define MC_CH_PCI_PMON_CTL_EDGE_DET (1 << 18)
+#define MC_CH_PCI_PMON_CTL_EN (1 << 22)
+#define MC_CH_PCI_PMON_CTL_INVERT (1 << 23)
+#define MC_CH_PCI_PMON_CTL_THRESH(x) (x << 24UL)
+#define MC_CH_PCI_PMON_FIXED_CTL_RST (1 << 19)
+#define MC_CH_PCI_PMON_FIXED_CTL_EN (1 << 22)
+#define UNC_PMON_UNIT_CTL_FRZ_EN (1 << 16)
+#define UNC_PMON_UNIT_CTL_RSV ((1 << 16) + (1 << 17))
+
+#define RPQ_OCC 0
+#define RPQ_INS 1
+#define WPQ_OCC 2
+#define WPQ_INS 3
+
+#define BUF_SIZE 1024
+#define MAX_IMC_ID 100
+#define GENERAL_REG_NUM 4
+#define FIXED_REG_NUM 1
+#define FILE_PATH_LEN 256
+#define DEFAUlT_PEROID 3
+
+typedef struct imc_event_t {
+    struct perf_event_attr attr;
+    int fd;
+    int core_id;
+    int pmu_id;
+    bool fixed;
+} imc_event;
+
+typedef struct imc_reg_group_t {
+    imc_event general_ev[GENERAL_REG_NUM];
+    imc_event fixed_ev;
+    int pmu_id;
+} imc_reg_group;
+
+typedef struct imc_pmu_t {
+    imc_reg_group* reg_groups;
+    int socket_id;
+    int nr_grp;
+} imc_pmu;
+
+struct topology_ent {
+    int64_t cpu_id;
+    int64_t core_id;
+    int64_t socket_id;
+};
+
+typedef struct event {
+    uint64_t rpq_occ;
+    uint64_t rpq_ins;
+    uint64_t wpq_occ;
+    uint64_t wpq_ins;
+    uint64_t dram_speed;
+} event;
+
+typedef struct channel_record {
+    uint64_t rpq_occ;
+    uint64_t rpq_ins;
+    uint64_t wpq_occ;
+    uint64_t wpq_ins;
+    double read_latency;
+    double write_latency;
+} channel_record;
+
+typedef struct socket_record {
+    channel_record* channel_record_arr;
+    uint64_t rpq_occ;
+    uint64_t rpq_ins;
+    uint64_t wpq_occ;
+    uint64_t wpq_ins;
+    double read_latency;
+    double write_latency;
+    uint64_t dram_clock;
+} socket_record;
+
+typedef struct record {
+    socket_record* socket_record_arr;
+} record;
+
+#endif
diff --git a/source/tools/detect/mem/memcgoffline/Makefile b/source/tools/detect/mem/memcgoffline/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..de3835b51a9f458b13b6ae9d280a8d44b474eb3f
--- /dev/null
+++ b/source/tools/detect/mem/memcgoffline/Makefile
@@ -0,0 +1,7 @@
+target := memcgoffline
+LIBS += -L ${OBJ_LIB_PATH}/lib -l:libcoolbpf.a -l:libsysak.a -lelf -lz
+INCLUDES += -I$(SRC)/tools/detect/mem/memcgoffline/include
+LDFLAGS += -Wall $(LIBS)
+mods := memcg_iter.o memcgoffline.o
+
+include $(SRC)/mk/csrc.mk
\ No newline at end of file
diff --git a/source/tools/detect/mem/memcgoffline/include/btfparse.h b/source/tools/detect/mem/memcgoffline/include/btfparse.h
new file mode 100644
index 0000000000000000000000000000000000000000..84204f540b000444299df0947cec670c0d896889
--- /dev/null
+++ b/source/tools/detect/mem/memcgoffline/include/btfparse.h
@@ -0,0 +1,39 @@
+
+
+#ifndef __BTF_PARSE_H
+#define __BTF_PARSE_H
+
+
+
+/**
+ * btf_load: load btf from btf_custom_path
+ * 
+ * @btf_custom_path: path of btf file
+ */
+struct btf *btf_load(char *btf_custom_path);
+typedef unsigned int uint32_t;
+
+struct member_attribute
+{
+    uint32_t size;      // size of structure's member
+    uint32_t real_size; // real_size mean 
+    uint32_t offset;    // offset of member in strucutre
+};
+
+/**
+ * btf_find_struct_member - find struct btfid by structure's name
+ * 
+ * @btf: 
+ * @struct_name: name of struct
+ * @member_name: name of structure's member
+ * @return: NULL mean error, get error number from errno. 
+ * 
+ * Note: Remember to free pointer of struct member_attribute
+ */
+struct member_attribute *btf_find_struct_member(struct btf *btf, char *struct_name, char *member_name);
+
+int btf_get_member_offset(struct btf *btf, char *name, char *member_name);
+void btf__free(struct btf *btf);
+
+#endif
+
diff --git a/source/tools/detect/mem/memcgoffline/include/memcg_iter.h b/source/tools/detect/mem/memcgoffline/include/memcg_iter.h
new file mode 100644
index 0000000000000000000000000000000000000000..300a82b05d1281d70f154e6f407fe2388253acd6
--- /dev/null
+++ b/source/tools/detect/mem/memcgoffline/include/memcg_iter.h
@@ -0,0 +1,36 @@
+#ifndef __MEMCG_ITER_H_
+#define __MEMCG_ITER_H_
+
+#include "btfparse.h"
+
+#define PATH_MAX        (2048)
+#define LEN             (255)
+#define CSS_DYING       (1 << 4)     /* css is dying */
+
+/* iterator function of "for_each_mem_cgroup" */
+unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev,
+                struct btf* handle);
+
+/* find out and set root_mem_cgroup from kallsyms*/
+int memcg_iter_init();
+
+/* Iter all memory cgroups, must call after memcg_iter_init() */
+#define for_each_mem_cgroup(iter, start, btf)           \
+    for (iter = _mem_cgroup_iter(start, (unsigned long)NULL, btf);  \
+         iter != (unsigned long)NULL;              \
+         iter = _mem_cgroup_iter(start, iter, btf))
+
+/* 
+ * get member offset of certain struct, need to read from btf file,
+ * (don't call it in loop which may cause huge overhead)
+ */
+struct member_attribute *get_offset_no_cache(char *struct_name, 
+                            char *member_name, struct btf *handle);
+
+int get_member_offset(char *struct_name, char *member_name, 
+        struct btf *handle);
+
+void memcg_get_name(unsigned long memcg, char *name,
+                int len, struct btf *btf_handle);
+
+#endif
\ No newline at end of file
diff --git a/source/tools/detect/mem/memcgoffline/memcg_iter.c b/source/tools/detect/mem/memcgoffline/memcg_iter.c
new file mode 100644
index 0000000000000000000000000000000000000000..541d0367ffd805d37336b440cc367a4f1f57d582
--- /dev/null
+++ b/source/tools/detect/mem/memcgoffline/memcg_iter.c
@@ -0,0 +1,291 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "memcg_iter.h"
+#include "kcore_utils.h"
+
+static unsigned long root_mem_cgroup;
+
+struct member_attribute *get_offset_no_cache(char *struct_name, 
+                            char *member_name, struct btf *handle)
+{
+    struct member_attribute *att;
+
+    att = btf_find_struct_member(handle, struct_name, member_name);
+    if (!att) {
+        return NULL;
+    }
+
+    att->offset = att->offset/8;
+         
+    return att;
+}
+
+int get_member_offset(char *struct_name, char *member_name, struct btf *handle)
+{
+    char prefix[LEN] = "struct ";
+    
+    strcat(prefix, struct_name);
+
+    return btf_get_member_offset(handle, prefix, member_name)/8;
+}
+
+static unsigned long _css_next_child(unsigned long pos, unsigned long parent,
+                        struct btf *btf_handle)
+{
+    struct member_attribute *att, *att2;
+    unsigned long next;
+
+    att = get_offset_no_cache("cgroup_subsys_state", "sibling", btf_handle);
+    if (!att)
+        return 0;
+
+    att2 = get_offset_no_cache("cgroup_subsys_state", "children", btf_handle);
+    if (!att2)
+        return 0;
+
+    if(!pos) {
+        kcore_readmem(parent + att2->offset, &next, sizeof(next));
+        next = next - att->offset;
+    } else {
+        kcore_readmem(pos + att->offset, &next, sizeof(next));
+        next = next - att->offset;
+    }
+
+    if(next + att->offset != parent + att2->offset)
+        return next;
+
+    return 0;
+}
+
+unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev,
+                struct btf *btf_handle)
+{
+    struct member_attribute *att, *att2;
+    unsigned long css, root_css;
+    unsigned long memcg = 0;
+    unsigned long pos = 0;
+    unsigned long next = 0;
+    unsigned long tmp1, tmp2;
+
+    if(!root)
+        root = root_mem_cgroup; 
+    if(!prev)
+        return root;
+    
+    //printf("root:%lx, prev:%lx\n", root, prev);
+
+    att = get_offset_no_cache("mem_cgroup", "css", btf_handle);
+    if (!att)
+        return 0;
+
+    att2 = get_offset_no_cache("cgroup_subsys_state", "parent", btf_handle);
+    if (!att2)
+        return 0;
+
+    pos = prev;
+    //kcore_readmem(pos + att->offset, &css, sizeof(css));
+    css = pos + att->offset;
+
+    //kcore_readmem(root+att->offset, &root_css, sizeof(root_css));
+    root_css = root + att->offset;
+    next = _css_next_child(0, css, btf_handle);
+    if(!next)
+    {
+        tmp1 = css;
+        while(tmp1 != root_css)
+        {
+            kcore_readmem(tmp1 + att2->offset, &tmp2, sizeof(tmp2));
+            next = _css_next_child(tmp1, tmp2, btf_handle);
+            if(next)
+                break;
+            tmp1 = tmp2;
+        }
+    }
+
+    if(!next)
+        return 0;
+
+    memcg = next - att->offset;
+
+    return memcg;
+}
+
+int memcg_iter_init()
+{
+    unsigned long tmp;
+    size_t size;
+
+    tmp = lookup_kernel_symbol("root_mem_cgroup");
+    if (tmp == (unsigned long )-1) {
+        LOG_ERROR("unable to get root_mem_cgroup\n");
+        return -1;
+    } else {
+        size = kcore_readmem(tmp, &root_mem_cgroup, 8);
+        if (size < 8) {
+            LOG_ERROR("get incorrect address where root_mem_cgroup point to\n");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int prepend(char **buffer, int *buflen, const char *str, 
+            int namelen, int off)
+{
+    *buflen -= namelen + off;
+    if (*buflen < 0)
+        return -1; 
+    *buffer -= namelen + off;
+    if (off)
+        **buffer = '/';
+    memcpy(*buffer + off, str, namelen);
+    return 0;
+}
+
+static int cgroup_path(unsigned long cgrp, char *buf, 
+            int buflen, struct btf *btf_handle)
+{
+    int ret  = -1;
+    char *start;
+    unsigned long cgp;
+    char tmpname[PATH_MAX];
+    struct member_attribute *cg_pa_att, *cg_name_att;
+    struct member_attribute *cgn_name_attr;
+
+    cg_pa_att = get_offset_no_cache("cgroup", "parent", btf_handle);
+    if (!cg_pa_att)
+        return -1;
+    
+    cg_name_att = get_offset_no_cache("cgroup", "name", btf_handle);
+    if (!cg_name_att)
+        return -1;
+
+    cgn_name_attr = get_offset_no_cache("cgroup_name", "name", btf_handle);
+    if (!cgn_name_attr)
+        return -1;
+    
+
+    kcore_readmem(cgrp + cg_pa_att->offset, &cgp, sizeof(cgp));
+    if (!cgp) {
+        if (strncpy(buf, "/", buflen) == NULL)
+            return -1;
+        return 0;
+    }
+
+    start = buf + buflen - 1;
+    *start = '\0';
+
+    do {
+        int len;
+        unsigned long name;
+
+        kcore_readmem(cgrp + cg_name_att->offset, &name, sizeof(name));
+
+        name += cgn_name_attr->offset;
+        kcore_readmem(name, tmpname,sizeof(tmpname));
+
+        len = strlen(tmpname);
+        if ((start -= len) < buf)
+            goto out;
+
+        memcpy(start, tmpname, len);
+
+        if (--start < buf)
+            goto out;
+        
+        *start = '/';
+        cgrp = cgp;
+
+        kcore_readmem(cgp + cg_pa_att->offset, &cgp, sizeof(cgp));
+
+    } while (cgp);
+
+    ret = 0;
+    memmove(buf, start, buf + buflen - start);
+out:
+    return ret;
+}
+
+void memcg_get_name(unsigned long memcg, char *name,
+                int len, struct btf *btf_handle)
+{
+    char *end;
+    int pos;
+    unsigned long cg, knname;
+    char subname[257];
+    struct member_attribute *att;
+
+    memset(subname, 0, sizeof(subname));
+    att = get_offset_no_cache("mem_cgroup", "css", btf_handle);
+    if (!att)
+        return;
+    
+    cg = memcg + att->offset;
+
+    att = get_offset_no_cache("cgroup_subsys_state", "cgroup", btf_handle);
+    if (!att)
+        return;
+
+    kcore_readmem(cg + att->offset, &cg, sizeof(cg));
+
+#ifdef LINUX_310
+    if (!cg)
+        return;
+    cgroup_path(cg, name, PATH_MAX);
+    end = name+strlen("/sys/fs/cgroup/memory/");
+    memmove(end, name, strlen(name)+1);
+    prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0);
+#else
+    unsigned long kn;
+    unsigned long pkn;
+    int kn_name_offset, kn_pa_offset;
+
+    att = get_offset_no_cache("cgroup", "kn", btf_handle);
+    if (!att)
+        return;
+
+    kcore_readmem(cg + att->offset, &kn, sizeof(kn));
+
+    if (!cg || !kn)
+        return;
+
+    end = name + len - 1;
+    prepend(&end, &len, "\0", 1, 0);
+    pkn = kn;
+
+    kn_name_offset = get_member_offset("kernfs_node", "name", btf_handle);
+    if (kn_name_offset < 0)
+        return;
+       
+    kn_pa_offset = get_member_offset("kernfs_node", "parent", btf_handle);
+    if (kn_pa_offset < 0)
+        return;
+
+    while (pkn) {
+        kcore_readmem(pkn + kn_name_offset, &knname, sizeof(knname));
+        kcore_readmem(knname, subname, sizeof(subname));
+
+        pos = prepend(&end, &len, subname, strlen(subname), 0);
+        if (pos)
+            break;
+
+        kcore_readmem(pkn + kn_pa_offset, &kn, sizeof(kn));
+        if ((pkn == kn) || !kn)
+            break;
+        pos = prepend(&end, &len, "/", 1, 0);
+        if (pos)
+            break;
+        pkn = kn;
+    }
+
+    prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0);
+
+    memmove(name, end, strlen(end) + 1);
+#endif
+}
\ No newline at end of file
diff --git a/source/tools/detect/mem/memcgoffline/memcgoffline.c b/source/tools/detect/mem/memcgoffline/memcgoffline.c
new file mode 100644
index 0000000000000000000000000000000000000000..16fe17db6a9a1a07ba9605ade1e676fabb2f28fd
--- /dev/null
+++ b/source/tools/detect/mem/memcgoffline/memcgoffline.c
@@ -0,0 +1,187 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <getopt.h>
+#include <sys/types.h>
+
+#include "kcore_utils.h"
+#include "memcg_iter.h"
+
+static struct btf *btf_handle = NULL;
+int total_memcg_num = 0;
+
+struct environment {
+	int print_cg_num;                 /* unused */
+} env = {
+	.print_cg_num = 10000,
+};
+
+static int caculate_offline(unsigned long start_memcg)
+{   
+    int offline_num = 0;
+    unsigned long css, css_flags, cnt, iter = 0;
+    long refcnt_value;
+    unsigned int flags_value;
+    char fileName[PATH_MAX];
+    struct member_attribute *css_attr, *css_flag_attr, *refcnt_attr;
+    struct member_attribute *cnt_attr;
+
+    css_attr = get_offset_no_cache("mem_cgroup", "css", btf_handle);
+    if (!css_attr) {
+        LOG_ERROR("get css offset of mem_cgroup failed!\n");
+        return -1;
+    }
+
+    css_flag_attr = get_offset_no_cache("cgroup_subsys_state", 
+                        "flags", btf_handle);
+    if (!css_flag_attr) {
+        LOG_ERROR("get flags offset of cgroup_subsys_state failed!\n");
+        return -1;
+    }
+
+    refcnt_attr = get_offset_no_cache("cgroup_subsys_state", 
+                    "refcnt", btf_handle);
+    if (!refcnt_attr) {
+        LOG_ERROR("get refcnt offset of cgroup_subsys_state failed!\n");
+        return -1;
+    }
+
+    cnt_attr = get_offset_no_cache("percpu_ref", "count", btf_handle);
+    if (!cnt_attr) {
+        LOG_ERROR("get cnt offset of percpu_ref failed!\n");
+        return -1;
+    }
+    
+    for_each_mem_cgroup(iter, start_memcg, btf_handle) {
+        css = iter + css_attr->offset;
+        css_flags = css + css_flag_attr->offset;
+
+        kcore_readmem(css_flags, &flags_value, sizeof(flags_value));
+
+        if (flags_value & CSS_DYING) {
+            cnt = css + refcnt_attr->offset + cnt_attr->offset;
+            
+            offline_num++;
+            kcore_readmem(cnt, &refcnt_value, sizeof(refcnt_value));
+            
+            if (env.print_cg_num > 0) {
+                memcg_get_name(iter, fileName, PATH_MAX, btf_handle);
+                printf("cgroup path:%s\trefcount=%ld\n", fileName, refcnt_value);
+                env.print_cg_num--;
+            }
+        }
+        total_memcg_num++;
+    }
+
+    return offline_num;
+}
+
+static void show_usage(char *prog)
+{
+	const char *str =
+	"   Usage: %s [OPTIONS]\n"
+	"   Options:\n"
+	"   -n PRINT_MAX_CG_NUM   Max offline memcg paths to printf(default 10000)\n"
+    "   -h HELP               help\n"
+    "   \n"
+
+    "   EXAMPLE:\n "
+    "   memcgoffline        # display number of offline memcg and all their paths.\n"
+    "   memcgoffline -n 10  # display number of offline memcg and "
+    "10 of offline memcg paths.\n"
+	;
+
+	fprintf(stderr, str, prog);
+	exit(EXIT_FAILURE);
+}
+
+static int parse_args(int argc, char **argv, struct environment *env)
+{
+	int c, option_index;
+    char *prog_name = "memcgoffline";
+
+	for (;;) {
+		c = getopt_long(argc, argv, "n:h", NULL, &option_index);
+        if (c == -1)
+            break;
+
+		switch (c) {
+			case 'n':
+				env->print_cg_num = (int)strtol(optarg, NULL, 10);
+                if (!errno)
+                    return -errno;
+				break;
+			case 'h':
+				show_usage(prog_name);	/* would exit */
+				break;
+			default:
+				show_usage(prog_name);
+		}
+	}
+
+    return 0;
+}
+
+struct btf *btf_init()
+{
+    char *btf_path;
+
+    btf_path = prepare_btf_file();
+    if (!btf_path)
+        return NULL;
+    
+    return btf_load(btf_path);
+}
+
+void btf_uninit(struct btf *btf)
+{
+    return btf__free(btf);
+}
+
+int main(int argc, char *argp[])
+{
+	int offline_memcg = 0, ret = 0;
+
+	ret = parse_args(argc, argp, &env);
+    if (ret) {
+        LOG_ERROR("parse arg error!\n");
+        return -1;
+    }
+
+    btf_handle = btf_init();
+    if (!btf_handle) {
+        LOG_ERROR("btf init failed!\n");
+        return -1;
+    }
+
+    ret = kcore_init();
+    if (ret) {
+        LOG_ERROR("kcore init failed!\n");
+        goto uninit_btf;
+    }
+
+    ret = memcg_iter_init();
+    if (ret) {
+        LOG_ERROR("memcg_iter_init failed!\n");
+        goto uninit_kcore;
+    }
+
+    offline_memcg = caculate_offline((unsigned long)NULL);
+    if (offline_memcg < 0) {
+        LOG_ERROR("caculate offline memcg failed!\n");
+        ret = offline_memcg;
+        goto uninit_kcore;
+    }
+    printf("Offline memory cgroup num: %d\n", offline_memcg);
+    printf("Total memory cgroup num: %d\n", total_memcg_num);
+
+uninit_kcore:
+    kcore_uninit();
+uninit_btf:
+    btf_uninit(btf_handle);
+
+    return ret;
+}
diff --git a/source/tools/detect/mem/podmem/entry/pagealloc.py b/source/tools/detect/mem/podmem/entry/pagealloc.py
index 4b6066b2860027a13f5fde1af09c99194b316afa..5e0ef2b16a4258fc80f5f245a82e28aadd907742 100644
--- a/source/tools/detect/mem/podmem/entry/pagealloc.py
+++ b/source/tools/detect/mem/podmem/entry/pagealloc.py
@@ -197,7 +197,8 @@ def get_info(meminfo, result,cid):
         meminfo["podinfo"][podname] = {}
         meminfo["podinfo"][podname]["podname"] = podname
         meminfo["podinfo"][podname]["podns"] = podns
-        meminfo["podinfo"][podname]["mem"] = 0
+        meminfo["podinfo"][podname]["rxmem"] = 0
+        meminfo["podinfo"][podname]["txmem"] = 0
     return podname
 
 def pagemem_scan(meminfo, ns):
@@ -241,7 +242,10 @@ def pagemem_check(meminfo,ns):
             pid = info[1]
             task_pid = task+"-"+pid
             rx = int(line_list[2])
-            tx = int(line_list[3])
+            if line.find("LISTEN") >= 0:
+                tx = 0
+            else:
+                tx = int(line_list[3])
             rx_mem += rx
             tx_mem += tx
             if task_pid not in memTask.keys():
diff --git a/source/tools/detect/mem/podmem/entry/podmem.py b/source/tools/detect/mem/podmem/entry/podmem.py
index 1ce46eae7e14a81ae82a4a921220b4e3d6777b8a..8b875a2053f59fa97192c8a33d951d7124072b6f 100644
--- a/source/tools/detect/mem/podmem/entry/podmem.py
+++ b/source/tools/detect/mem/podmem/entry/podmem.py
@@ -412,7 +412,7 @@ def handle_args(podinfo, argv):
             print("-j: dump result to json file (sysak podmem -s -j ./test.json)")
             print("-r: set sample rate ,default set to 1 (sysak podmem -s -r 2)")
             print("-t: output filecache top ,default for top 10 (sysak podmem -s -t 20)")
-            print("-m: analysis pod recv-Q memory")
+            print("-m: analysis pod recv-Q  and send-Q memory")
             sys.exit(2) 
         elif opt == '-r':
             cmdline['rate'] = int(arg)
diff --git a/source/tools/monitor/oomkill/kill.c b/source/tools/monitor/oomkill/kill.c
index 7000dc48b155a20972d54314a86b4745047d6e53..11e88db622c6ff94db08220c504493ffbe618b44 100644
--- a/source/tools/monitor/oomkill/kill.c
+++ b/source/tools/monitor/oomkill/kill.c
@@ -427,6 +427,8 @@ procinfo_t find_largest_process(const poll_loop_args_t* args)
  */
 void kill_process(const poll_loop_args_t* args, int sig, const procinfo_t* victim)
 {
+    char cmdline[512];
+
     if (victim->pid <= 0) {
         warn("Could not find a process to kill. Sleeping 1 second.\n");
         if (args->notify) {
@@ -444,10 +446,12 @@ void kill_process(const poll_loop_args_t* args, int sig, const procinfo_t* victi
     } else if (sig == 0) {
         sig_name = "0 (no-op signal)";
     }
+    get_cmdline(victim->pid, cmdline, sizeof(cmdline));
     // sig == 0 is used as a self-test during startup. Don't notify the user.
     if (sig != 0 || enable_debug) {
         warn("sending %s to process %d uid %d \"%s\": badness %ld, VmRSS %lld MiB adj:%d\n",
             sig_name, victim->pid, victim->uid, victim->name, victim->badness, victim->VmRSSkiB / 1024, victim->oom_score_adj);
+        warn("process cmdline:%s\n", cmdline);
     }
 
     int res = kill_wait(args, victim->pid, sig);
diff --git a/source/tools/monitor/oomkill/meminfo.c b/source/tools/monitor/oomkill/meminfo.c
index 4f02e886208b9cc47a9d896b708b578d86fcabf6..18bab6b1e072aa2e71431ba7ef42c673164fb67b 100644
--- a/source/tools/monitor/oomkill/meminfo.c
+++ b/source/tools/monitor/oomkill/meminfo.c
@@ -11,6 +11,9 @@
 #include <string.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 
 #include "globals.h"
 #include "meminfo.h"
@@ -253,6 +256,44 @@ int get_comm(int pid, char* out, size_t outlen)
     return 0;
 }
 
+int get_cmdline(int pid, char *out, int out_len) {
+    char cmdline_file[128] = {0};
+    int fd;
+    int i;
+    ssize_t nread, total = 0;
+
+    sprintf(cmdline_file, "%s/%d/cmdline", procdir_path, pid);
+
+    fd = open(cmdline_file, O_RDONLY);
+    if (fd == -1) {
+        printf("Failed to open %s\n", cmdline_file);
+        return -1; 
+    }   
+
+    while ((nread = read(fd, out + total, out_len - total - 1)) > 0) {
+        total += nread;
+    }   
+
+    close(fd);
+
+    if (total == -1) {
+        printf("Failed to read %s\n", cmdline_file);
+        return -1; 
+    }   
+
+    // Replace '\0' with spaces when arguments are not separated by '\0'
+    for (i = 0; i < total; i++) {
+        if (out[i] == '\0') {
+            out[i] = ' ';
+        }   
+    }   
+
+    out[total] = '\0';
+
+    return 0;
+}
+
+
 // Get the effective uid (EUID) of `pid`.
 // Returns the uid (>= 0) or -errno on error.
 int get_uid(int pid)
diff --git a/source/tools/monitor/oomkill/meminfo.h b/source/tools/monitor/oomkill/meminfo.h
index 22df1ef5af066bc7063138c140fe49ceabe3a732..4572b402065d40e874c9f478a0a9632c947cca6f 100644
--- a/source/tools/monitor/oomkill/meminfo.h
+++ b/source/tools/monitor/oomkill/meminfo.h
@@ -61,6 +61,7 @@ int get_oom_score(int pid);
 int get_oom_score_adj(const int pid, int* out);
 long long get_vm_rss_kib(int pid);
 int get_comm(int pid, char* out, size_t outlen);
+int get_cmdline(int pid, char *out, int out_len);
 int get_uid(int pid);
 
 #endif
diff --git a/source/tools/monitor/unity/collector/loop.lua b/source/tools/monitor/unity/collector/loop.lua
index 47e793ded8ae19fe1eb7a5740ca2b9d2206d828f..68b19a557bd4eedd50ed18326bd38de9c4e89ba6 100644
--- a/source/tools/monitor/unity/collector/loop.lua
+++ b/source/tools/monitor/unity/collector/loop.lua
@@ -16,7 +16,7 @@ local CguardDaemon = require("collector.guard.guardDaemon")
 local CguardSelfStat = require("collector.guard.guardSelfStat")
 local CpostPlugin = require("collector.postPlugin.postPlugin")
 local CforkRun = require("collector.execEngine.forkRun")
----local CpodFilter = require("collector.podMan.podFilter")
+local CpodFilter = require("collector.podMan.podFilter")
 local CpodsAll = require("collector.podMan.podsAll")
 local Cloop = class("loop")
 
@@ -52,11 +52,15 @@ function Cloop:loadLuaPlugin(res, proc_path, procffi)
         end
     end
     if res.container then
-        ---self._procs[c] = CpodFilter.new(res, self._proto, procffi, proc_path)
-        ---self._names[c] = "podFilter"
-        self._procs[c] = CpodsAll.new(res, self._proto, procffi, proc_path)
-        self._names[c] = "podMon"
-
+        if res.container.mode == "cgroup" then
+	    --print("mods1="..res.container.mode)
+            self._procs[c] = CpodFilter.new(res, self._proto, procffi, proc_path)
+            self._names[c] = "podFilter"
+        else
+	    --print("mods2="..res.container.mode)
+            self._procs[c] = CpodsAll.new(res, self._proto, procffi, proc_path)
+            self._names[c] = "podMon"
+        end
     end
     print("add " .. system:keyCount(self._procs) .. " lua plugin.")
 end
diff --git a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h b/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h
index f42a07526cf9de13fde141c620607b025c5c5a42..e3cc818b1fcbfeaa908ec4549a5d1d213a731176 100644
--- a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h
+++ b/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h
@@ -1,5 +1,5 @@
-#ifndef UNITY_SAMPLE_H
-#define UNITY_SAMPLE_H
+#ifndef UNITY_IMC_LATENCY_H
+#define UNITY_IMC_LATENCY_H
 
 #include <linux/types.h>
 #include <stdbool.h>
@@ -123,4 +123,4 @@ struct topology_ent {
     int64_t socket_id;
 };
 
-#endif  // UNITY_SAMPLE_H
+#endif
diff --git a/source/tools/monitor/unity/collector/proc_stat.lua b/source/tools/monitor/unity/collector/proc_stat.lua
index e5834d75d8f3ad3d49670a55bb71205005fa3b78..dbdec60beadc78c5404c8dedf41e94cc57e299ef 100644
--- a/source/tools/monitor/unity/collector/proc_stat.lua
+++ b/source/tools/monitor/unity/collector/proc_stat.lua
@@ -14,6 +14,9 @@ function CprocStat:_init_(proto, pffi, mnt, pFile)
     CvProc._init_(self, proto, pffi, mnt,pFile or "proc/stat")
     self._funs = self:setupTable()
     self._cpuArr = {}
+    self._total_warn = 0
+    self._sys_warn = 0
+    self._user_warn = 0
 end
 
 function CprocStat:_cpuHead()
@@ -23,6 +26,12 @@ end
 
 function CprocStat:_procCpu(now, last)
     if last then
+        local user_thresh = 40
+        local sys_thresh = 25
+        local total_thresh = 55
+        local user_util = 0
+        local sys_util = 0
+        local warn = 0
         local vs = {}
         local sum = 0
         local index = self:_cpuHead()
@@ -37,10 +46,39 @@ function CprocStat:_procCpu(now, last)
             local total = tonumber(sum)
             for i = 1, #vs do
                 local v = tonumber(vs[i])
+
+		--for warn events
+		if index[i] == "user" or index[i] == "nice" then
+			user_util = user_util + v*100.0/total
+		end
+		if index[i] == "sys" or index[i] == "softirq" then
+			sys_util = sys_util + v*100.0/total
+		end
+		if index[i] == "idle" then
+			total_util = 100 - (v*100.0/total)
+		end
+
                 local cell = {name=index[i], value=tonumber(v * 100.0 / total)}
                 table.insert(res, cell)
             end
             table.insert(res, {name="total", value=total})
+	    --warn events
+	    if user_util > user_thresh then
+		self._user_warn = self._user_warn + 1
+	    end
+            local cell0 = {name="usr_warn", value=self._user_warn}
+	    table.insert(res, cell0) 
+	    if sys_util > sys_thresh then
+		self._sys_warn = self._sys_warn + 1
+	    end
+	    local cell1 = {name="sys_warn", value=self._sys_warn}
+            table.insert(res, cell1) 
+	    if total_util > user_thresh then
+		self._total_warn = self._total_warn + 1
+	    end
+            local cell2 = {name="total_warn", value=self._total_warn}
+	    table.insert(res, cell2)
+
             return res
         end
     end
diff --git a/source/tools/monitor/unity/etc/base.yaml b/source/tools/monitor/unity/etc/base.yaml
index c784e6423989083471f048adcbc76669d7e6cd32..9350c344463efd2157cbe1163a849c6e24a1c578 100644
--- a/source/tools/monitor/unity/etc/base.yaml
+++ b/source/tools/monitor/unity/etc/base.yaml
@@ -49,92 +49,92 @@ plugins:
 
 metrics:
   -
-    title: sysak_proc_cpu_total
+    title: sysom_proc_cpu_total
     from: cpu_total
     head: mode
     help: "cpu usage info for total."
     type: "gauge"
-  - title: sysak_proc_cpus
+  - title: sysom_proc_cpus
     from: cpus
     head: mode
     help: "cpu usage info for per-cpu."
     type: "gauge"
-  - title: sysak_proc_sirq
+  - title: sysom_proc_sirq
     from: sirq
     head: type
     help: "system soft irq times."
     type: "gauge"
-  - title: sysak_proc_stat_counters
+  - title: sysom_proc_stat_counters
     from: stat_counters
     head: counter
     help: "system state counter."
     type: "gauge"
-  - title: sysak_proc_meminfo
+  - title: sysom_proc_meminfo
     from: meminfo
     head: value
     help: "meminfo from /proc/meminfo."
     type: "gauge"
-  - title: sysak_proc_vmstat
+  - title: sysom_proc_vmstat
     from: vmstat
     head: value
     help: "vmstat info from /proc/vmstat."
     type: "gauge"
-  - title: sysak_proc_self_statm
+  - title: sysom_proc_self_statm
     from: self_statm
     head: value
     help: "statm info from /proc/self/statm."
     type: "gauge"
-  - title: sysak_proc_networks
+  - title: sysom_proc_networks
     from: networks
     head: counter
     help: "networks info from /proc/net/dev."
     type: "gauge"
-  - title: sysak_proc_disks
+  - title: sysom_proc_disks
     from: disks
     head: counter
     help: "disk info from /proc/diskstats."
     type: "gauge"
-  - title: sysak_proc_pkt_status
+  - title: sysom_proc_pkt_status
     from: pkt_status
     head: counter
     help: "net status info from /proc/net/snmp and /proc/net/status."
     type: "gauge"
-  - title: sysak_fs_stat
+  - title: sysom_fs_stat
     from: fs_stat
     head: counter
     help: "file system information."
     type: "gauge"
-  - title: sysak_sock_stat
+  - title: sysom_sock_stat
     from: sock_stat
     head: value
     help: "sock stat counters from /proc/net/sockstat"
     type: "gauge"
-  - title: sysak_proc_schedstat
+  - title: sysom_proc_schedstat
     from: proc_schedstat
     head: value
     help: "schedule state of percpu."
     type: "gauge"
-  - title: sysak_proc_loadavg
+  - title: sysom_proc_loadavg
     from: proc_loadavg
     head: value
     help: "loadavg of system from /proc/loadavg"
     type: "gauge"
-  - title: sysak_proc_buddyinfo
+  - title: sysom_proc_buddyinfo
     from: buddyinfo
     head: value
     help: "buddyinfo of system from /proc/buddyinfo"
     type: "gauge"
-  - title: sysak_IOMonIndForDisksIO
+  - title: sysom_IOMonIndForDisksIO
     from: IOMonIndForDisksIO
     head: value
     help: "Disk IO indicators and abnormal events"
     type: "gauge"
-  - title: sysak_IOMonIndForSystemIO
+  - title: sysom_IOMonIndForSystemIO
     from: IOMonIndForSystemIO
     head: value
     help: "System indicators and abnormal events about IO"
     type: "gauge"
-  - title: sysak_IOMonDiagLog
+  - title: sysom_IOMonDiagLog
     from: IOMonDiagLog
     head: value
     help: "Diagnose log for IO exception"
@@ -144,87 +144,87 @@ metrics:
     head: value
     help: "nosched/irqoff:sys and irqoff hold cpu and didn't scheduling"
     type: "gauge"
-  - title: sysak_cpu_dist
+  - title: sysom_cpu_dist
     from: cpu_dist
     head: value
     help: "task cpu sched dist."
     type: "gauge"
-  - title: sysak_net_health_hist
+  - title: sysom_net_health_hist
     from: net_health_hist
     head: value
     help: "net_health_hist"
     type: "gauge"
-  - title: sysak_net_health_count
+  - title: sysom_net_health_count
     from: net_health_count
     head: value
     help: "net_health_count"
     type: "gauge"
-  - title: sysak_net_retrans_count
+  - title: sysom_net_retrans_count
     from: net_retrans_count
     head: value
     help: "net_retrans_count"
     type: "gauge"
-  - title: sysak_gpuinfo
+  - title: sysom_gpuinfo
     from: gpuinfo
     head: value
     help: "gpuinfo of system from nvidia-smi"
     type: "gauge"
-  - title: sysak_uname
+  - title: sysom_uname
     from: uname
     head: value
     help: "uname info"
     type: "gauge"
-  - title: sysak_uptime
+  - title: sysom_uptime
     from: uptime
     head: value
     help: "uptime from /proc/uptime"
     type: "gauge"
-  - title: sysak_system_release
+  - title: sysom_system_release
     from: system_release
     head: value
     help: "system_release from /etc/os-release"
     type: "gauge"
-  - title: sysak_cgroups
+  - title: sysom_cgroups
     from: cgroups
     head: value
     help: "cgroup number."
     type: "gauge"
-  - title: sysak_per_sirqs
+  - title: sysom_per_sirqs
     from: per_sirqs
     head: value
     help: "per_sirqs."
     type: "gauge"
-  - title: sysak_softnets
+  - title: sysom_softnets
     from: softnets
     head: value
     help: "cgroup number."
     type: "gauge"
-  - title: sysak_interrupts
+  - title: sysom_interrupts
     from: interrupts
     head: value
     help: "interrupts."
     type: "gauge"
-  - title: sysak_net_ip_count
+  - title: sysom_net_ip_count
     from: net_ip_count
     head: value
     help: "net snmp net_ip_count"
     type: "gauge"
-  - title: sysak_net_icmp_count
+  - title: sysom_net_icmp_count
     from: net_icmp_count
     head: value
     help: "net snmp net_icmp_count"
     type: "gauge"
-  - title: sysak_net_udp_count
+  - title: sysom_net_udp_count
     from: net_udp_count
     head: value
     help: "net snmp net_udp_count"
     type: "gauge"
-  - title: sysak_net_tcp_count
+  - title: sysom_net_tcp_count
     from: net_tcp_count
     head: value
     help: "net snmp net_tcp_count"
     type: "gauge"
-  - title: sysak_net_tcp_ext_count
+  - title: sysom_net_tcp_ext_count
     from: net_tcp_ext_count
     head: value
     help: "net stat net_tcp_ext_count"
diff --git a/source/tools/monitor/unity/etc/group.yaml b/source/tools/monitor/unity/etc/group.yaml
index 3208ecc2089cf7b950584ddd1448c0af01955678..6e3d830285bb20d511982ee766d76a5f1ec6cf5c 100644
--- a/source/tools/monitor/unity/etc/group.yaml
+++ b/source/tools/monitor/unity/etc/group.yaml
@@ -62,33 +62,33 @@ plugins:
     description: "summary retrans out put."
 
 metrics:
-  - title: sysak_proc_pkt_status
+  - title: sysom_proc_pkt_status
     from: pkt_status
     head: counter
     help: "net status info from /proc/net/snmp and /proc/net/status."
     type: "gauge"
-  - title: sysak_net_health_hist
+  - title: sysom_net_health_hist
     from: net_health_hist
     head: value
     help: "net_health_hist"
     type: "gauge"
-  - title: sysak_net_health_count
+  - title: sysom_net_health_count
     from: net_health_count
     head: value
     help: "net_health_count"
     type: "gauge"
-  - title: sysak_net_retrans_count
+  - title: sysom_net_retrans_count
     from: net_retrans_count
     head: value
     help: "net_retrans_count"
     type: "gauge"
-  - title: sysak_virtout_dist
+  - title: sysom_virtout_dist
     from: virtout_dist
     head: value
-    help: "sysak_virtout_dist"
+    help: "sysom_virtout_dist"
     type: "gauge"
-  - title: sysak_retrans
+  - title: sysom_retrans
     from: retrans
     head: value
-    help: "sysak_retrans"
+    help: "sysom_retrans"
     type: "gauge"
\ No newline at end of file
diff --git a/source/tools/monitor/unity/etc/k8s.yaml b/source/tools/monitor/unity/etc/k8s.yaml
index f73d003b926edac23391bc3505c4930c01c71ddc..bddd986428f66b37318da80e98e8c08c43f5dc90 100644
--- a/source/tools/monitor/unity/etc/k8s.yaml
+++ b/source/tools/monitor/unity/etc/k8s.yaml
@@ -25,7 +25,11 @@ outline:
 
 container:
   mode: "pods"
-  luaPlugin: ["cg_cpu_cfs_quota","cg_mem_drcm_glob_latency","cg_memory_util","cg_cpu_stat_sample", "cg_cpuacct_stat","cg_memory_drcm_latency", "cg_memory_fail_cnt","cg_memory_dcmp_latency"]
+  #mode:"cgroup"
+  #"cg_cpuacct_stat" is a substitute of cg_cpuacct_proc_stat
+  luaPlugin: ["cg_memory_fail_cnt", "cg_memory_util", "cg_memory_dcmp_latency",
+             "cg_memory_drcm_latency", "cg_cpuacct_wait_latency", "cg_cpuacct_proc_stat",
+             "cg_cpu_stat", "cg_pmu_events", "cg_cpu_cfs_quota", "cg_mem_drcm_glob_latency"]
   directCgPath:
     - "/"
     - "/kubepods.slice"
@@ -277,13 +281,12 @@ metrics:
     head: value
     help: "sysom_cg_memory_util"
     type: "gauge"
-  - title: sysom_cg_memgdrcm_latency
+  - title: sysom_cg_mem_glob_drcm_latency
     from: cgGlbDrcmLatency
     head: value
     help: "sysom global memory latency"
     type: "gauge"
-
-  - title: sysom_cg_memdrcm_latency
+  - title: sysom_cg_mem_drcm_latency
     from: cg_memdrcm_latency
     head: value
     help: "sysom_cg_memdrcm_latency"
@@ -293,6 +296,26 @@ metrics:
     head: value
     help: "sysom_cg_memmcmp_latency"
     type: "gauge"
+  - title: sysom_cg_mem_dcmp_latency
+    from: cg_memdcmp_latency
+    head: value
+    help: "sysom_cg_mem_dcmp_latency"
+    type: "gauge"
+  - title: sysom_cg_cpuacct_wait_latency
+    from: cg_wait_latency
+    head: value
+    help: "sysom_cg_cpuacct_wait_latency"
+    type: "gauge"
+  - title: sysom_cg_cpuacct_proc_stat
+    from: cg_cpuacct_proc_stat
+    head: value
+    help: "sysom_cg_cpuacct_proc_stat"
+    type: "gauge"
+  - title: sysom_cg_cpu_quota
+    from: cgCpuQuota
+    head: value
+    help: "quota_us,peroid_us and quota/period"
+    type: "gauge"
   - title: sysom_cg_cpu_stat
     from: cg_cpu_stat
     head: value
@@ -303,8 +326,8 @@ metrics:
     head: value
     help: "cpuacct/cpuacct.stat"
     type: "gauge"
-  - title: sysom_cg_cfs_quota
-    from: cgCpuQuota
+  - title: sysom_cg_pmu_events
+    from: pmu_cg_events
     head: value
-    help: "cfs quota"
+    help: "pmu events of cgroups"
     type: "gauge"