From 3615213715d544a1c511650f9c171468b1820272 Mon Sep 17 00:00:00 2001
From: Hailong Liu <liuhailong@linux.alibaba.com>
Date: Thu, 13 Jul 2023 17:30:29 +0800
Subject: [PATCH 01/16] unity/etc: Add some metrics for k8s.yaml

Signed-off-by: Hailong Liu <liuhailong@linux.alibaba.com>
---
 source/tools/monitor/unity/etc/k8s.yaml | 36 ++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/source/tools/monitor/unity/etc/k8s.yaml b/source/tools/monitor/unity/etc/k8s.yaml
index 19e6059e..8b9f1690 100644
--- a/source/tools/monitor/unity/etc/k8s.yaml
+++ b/source/tools/monitor/unity/etc/k8s.yaml
@@ -25,7 +25,10 @@ outline:
 
 container:
   mode: "pods"
-  luaPlugin: ["cg_cpu_cfs_quota","cg_mem_drcm_glob_latency","cg_memory_util","cg_cpu_stat_sample", "cg_cpuacct_stat","cg_memory_drcm_latency", "cg_memory_fail_cnt","cg_memory_dcmp_latency"]
+  #"cg_cpuacct_stat" is a substitute of cg_cpuacct_proc_stat
+  luaPlugin: ["cg_memory_fail_cnt", "cg_memory_util", "cg_memory_dcmp_latency",
+             "cg_memory_drcm_latency", "cg_cpuacct_wait_latency", "cg_cpuacct_proc_stat",
+             "cg_cpu_stat", "cg_pmu_events", "cg_cpu_cfs_quota", "cg_mem_drcm_glob_latency"]
   directCgPath:
     - "/"
     - "/kubepods.slice"
@@ -277,13 +280,12 @@ metrics:
     head: value
     help: "sysom_cg_memory_util"
     type: "gauge"
-  - title: sysom_cg_memgdrcm_latency
+  - title: sysak_cg_mem_glob_drcm_latency
     from: cgGlbDrcmLatency
     head: value
     help: "sysom global memory latency"
     type: "gauge"
-
-  - title: sysom_cg_memdrcm_latency
+  - title: sysom_cg_mem_drcm_latency
     from: cg_memdrcm_latency
     head: value
     help: "sysom_cg_memdrcm_latency"
@@ -293,6 +295,26 @@ metrics:
     head: value
     help: "sysom_cg_memmcmp_latency"
     type: "gauge"
+  - title: sysak_cg_mem_dcmp_latency
+    from: cg_memdcmp_latency
+    head: value
+    help: "sysak_cg_mem_dcmp_latency"
+    type: "gauge"
+  - title: sysak_cg_cpuacct_wait_latency
+    from: cg_wait_latency
+    head: value
+    help: "sysak_cg_cpuacct_wait_latency"
+    type: "gauge"
+  - title: sysak_cg_cpuacct_proc_stat
+    from: cg_cpuacct_proc_stat
+    head: value
+    help: "sysak_cg_cpuacct_proc_stat"
+    type: "gauge"
+  - title: sysak_cg_cpu_quota
+    from: cgCpuQuota
+    head: value
+    help: "quota_us,peroid_us and quota/period"
+    type: "gauge"
   - title: sysom_cg_cpu_stat
     from: cg_cpu_stat
     head: value
@@ -303,8 +325,8 @@ metrics:
     head: value
     help: "cpuacct/cpuacct.stat"
     type: "gauge"
-  - title: sysom_cg_cfs_quota
-    from: cgCpuQuota
+  - title: sysak_cg_pmu_events
+    from: pmu_cg_events
     head: value
-    help: "cfs quota"
+    help: "pmu events of cgroups"
     type: "gauge"
-- 
Gitee


From 7eab513442e296a34a87ba2da38a28e9b42194ac Mon Sep 17 00:00:00 2001
From: zhilan <xwliu@linux.alibaba.com>
Date: Fri, 14 Jul 2023 16:06:49 +0800
Subject: [PATCH 02/16] podmem: pagealloc: fix bugs in tx

---
 source/tools/detect/mem/podmem/entry/pagealloc.py | 8 ++++++--
 source/tools/detect/mem/podmem/entry/podmem.py    | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/source/tools/detect/mem/podmem/entry/pagealloc.py b/source/tools/detect/mem/podmem/entry/pagealloc.py
index 4b6066b2..5e0ef2b1 100644
--- a/source/tools/detect/mem/podmem/entry/pagealloc.py
+++ b/source/tools/detect/mem/podmem/entry/pagealloc.py
@@ -197,7 +197,8 @@ def get_info(meminfo, result,cid):
         meminfo["podinfo"][podname] = {}
         meminfo["podinfo"][podname]["podname"] = podname
         meminfo["podinfo"][podname]["podns"] = podns
-        meminfo["podinfo"][podname]["mem"] = 0
+        meminfo["podinfo"][podname]["rxmem"] = 0
+        meminfo["podinfo"][podname]["txmem"] = 0
     return podname
 
 def pagemem_scan(meminfo, ns):
@@ -241,7 +242,10 @@ def pagemem_check(meminfo,ns):
             pid = info[1]
             task_pid = task+"-"+pid
             rx = int(line_list[2])
-            tx = int(line_list[3])
+            if line.find("LISTEN") >= 0:
+                tx = 0
+            else:
+                tx = int(line_list[3])
             rx_mem += rx
             tx_mem += tx
             if task_pid not in memTask.keys():
diff --git a/source/tools/detect/mem/podmem/entry/podmem.py b/source/tools/detect/mem/podmem/entry/podmem.py
index 1ce46eae..8b875a20 100644
--- a/source/tools/detect/mem/podmem/entry/podmem.py
+++ b/source/tools/detect/mem/podmem/entry/podmem.py
@@ -412,7 +412,7 @@ def handle_args(podinfo, argv):
             print("-j: dump result to json file (sysak podmem -s -j ./test.json)")
             print("-r: set sample rate ,default set to 1 (sysak podmem -s -r 2)")
             print("-t: output filecache top ,default for top 10 (sysak podmem -s -t 20)")
-            print("-m: analysis pod recv-Q memory")
+            print("-m: analysis pod recv-Q  and send-Q memory")
             sys.exit(2) 
         elif opt == '-r':
             cmdline['rate'] = int(arg)
-- 
Gitee


From b9cc16b27f1ba21affef51da2a137462bce24dca Mon Sep 17 00:00:00 2001
From: Hailong Liu <liuhailong@linux.alibaba.com>
Date: Fri, 14 Jul 2023 17:31:05 +0800
Subject: [PATCH 03/16] unity/podMan: Provide an alternative way for pods or
 cgroup

Signed-off-by: Hailong Liu <liuhailong@linux.alibaba.com>
---
 source/tools/monitor/unity/collector/loop.lua | 16 ++++++++++------
 source/tools/monitor/unity/etc/k8s.yaml       |  1 +
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/source/tools/monitor/unity/collector/loop.lua b/source/tools/monitor/unity/collector/loop.lua
index 47e793de..68b19a55 100644
--- a/source/tools/monitor/unity/collector/loop.lua
+++ b/source/tools/monitor/unity/collector/loop.lua
@@ -16,7 +16,7 @@ local CguardDaemon = require("collector.guard.guardDaemon")
 local CguardSelfStat = require("collector.guard.guardSelfStat")
 local CpostPlugin = require("collector.postPlugin.postPlugin")
 local CforkRun = require("collector.execEngine.forkRun")
----local CpodFilter = require("collector.podMan.podFilter")
+local CpodFilter = require("collector.podMan.podFilter")
 local CpodsAll = require("collector.podMan.podsAll")
 local Cloop = class("loop")
 
@@ -52,11 +52,15 @@ function Cloop:loadLuaPlugin(res, proc_path, procffi)
         end
     end
     if res.container then
-        ---self._procs[c] = CpodFilter.new(res, self._proto, procffi, proc_path)
-        ---self._names[c] = "podFilter"
-        self._procs[c] = CpodsAll.new(res, self._proto, procffi, proc_path)
-        self._names[c] = "podMon"
-
+        if res.container.mode == "cgroup" then
+	    --print("mods1="..res.container.mode)
+            self._procs[c] = CpodFilter.new(res, self._proto, procffi, proc_path)
+            self._names[c] = "podFilter"
+        else
+	    --print("mods2="..res.container.mode)
+            self._procs[c] = CpodsAll.new(res, self._proto, procffi, proc_path)
+            self._names[c] = "podMon"
+        end
     end
     print("add " .. system:keyCount(self._procs) .. " lua plugin.")
 end
diff --git a/source/tools/monitor/unity/etc/k8s.yaml b/source/tools/monitor/unity/etc/k8s.yaml
index 8b9f1690..458b90f4 100644
--- a/source/tools/monitor/unity/etc/k8s.yaml
+++ b/source/tools/monitor/unity/etc/k8s.yaml
@@ -25,6 +25,7 @@ outline:
 
 container:
   mode: "pods"
+  #mode:"cgroup"
   #"cg_cpuacct_stat" is a substitute of cg_cpuacct_proc_stat
   luaPlugin: ["cg_memory_fail_cnt", "cg_memory_util", "cg_memory_dcmp_latency",
              "cg_memory_drcm_latency", "cg_cpuacct_wait_latency", "cg_cpuacct_proc_stat",
-- 
Gitee


From 09283528f3861e144ed757295fcb1edb74b6c53d Mon Sep 17 00:00:00 2001
From: yinbinbin <yinbinbin001@linux.alibaba.com>
Date: Fri, 14 Jul 2023 21:17:26 +0800
Subject: [PATCH 04/16] oomkill: print cmdline

Signed-off-by: yinbinbin <yinbinbin001@linux.alibaba.com>
---
 source/tools/monitor/oomkill/kill.c    |  4 +++
 source/tools/monitor/oomkill/meminfo.c | 41 ++++++++++++++++++++++++++
 source/tools/monitor/oomkill/meminfo.h |  1 +
 3 files changed, 46 insertions(+)

diff --git a/source/tools/monitor/oomkill/kill.c b/source/tools/monitor/oomkill/kill.c
index 7000dc48..11e88db6 100644
--- a/source/tools/monitor/oomkill/kill.c
+++ b/source/tools/monitor/oomkill/kill.c
@@ -427,6 +427,8 @@ procinfo_t find_largest_process(const poll_loop_args_t* args)
  */
 void kill_process(const poll_loop_args_t* args, int sig, const procinfo_t* victim)
 {
+    char cmdline[512];
+
     if (victim->pid <= 0) {
         warn("Could not find a process to kill. Sleeping 1 second.\n");
         if (args->notify) {
@@ -444,10 +446,12 @@ void kill_process(const poll_loop_args_t* args, int sig, const procinfo_t* victi
     } else if (sig == 0) {
         sig_name = "0 (no-op signal)";
     }
+    get_cmdline(victim->pid, cmdline, sizeof(cmdline));
     // sig == 0 is used as a self-test during startup. Don't notify the user.
     if (sig != 0 || enable_debug) {
         warn("sending %s to process %d uid %d \"%s\": badness %ld, VmRSS %lld MiB adj:%d\n",
             sig_name, victim->pid, victim->uid, victim->name, victim->badness, victim->VmRSSkiB / 1024, victim->oom_score_adj);
+        warn("process cmdline:%s\n", cmdline);
     }
 
     int res = kill_wait(args, victim->pid, sig);
diff --git a/source/tools/monitor/oomkill/meminfo.c b/source/tools/monitor/oomkill/meminfo.c
index 4f02e886..18bab6b1 100644
--- a/source/tools/monitor/oomkill/meminfo.c
+++ b/source/tools/monitor/oomkill/meminfo.c
@@ -11,6 +11,9 @@
 #include <string.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 
 #include "globals.h"
 #include "meminfo.h"
@@ -253,6 +256,44 @@ int get_comm(int pid, char* out, size_t outlen)
     return 0;
 }
 
+int get_cmdline(int pid, char *out, int out_len) {
+    char cmdline_file[128] = {0};
+    int fd;
+    int i;
+    ssize_t nread, total = 0;
+
+    sprintf(cmdline_file, "%s/%d/cmdline", procdir_path, pid);
+
+    fd = open(cmdline_file, O_RDONLY);
+    if (fd == -1) {
+        printf("Failed to open %s\n", cmdline_file);
+        return -1; 
+    }   
+
+    while ((nread = read(fd, out + total, out_len - total - 1)) > 0) {
+        total += nread;
+    }   
+
+    close(fd);
+
+    if (total == -1) {
+        printf("Failed to read %s\n", cmdline_file);
+        return -1; 
+    }   
+
+    // Replace '\0' with spaces when arguments are not separated by '\0'
+    for (i = 0; i < total; i++) {
+        if (out[i] == '\0') {
+            out[i] = ' ';
+        }   
+    }   
+
+    out[total] = '\0';
+
+    return 0;
+}
+
+
 // Get the effective uid (EUID) of `pid`.
 // Returns the uid (>= 0) or -errno on error.
 int get_uid(int pid)
diff --git a/source/tools/monitor/oomkill/meminfo.h b/source/tools/monitor/oomkill/meminfo.h
index 22df1ef5..4572b402 100644
--- a/source/tools/monitor/oomkill/meminfo.h
+++ b/source/tools/monitor/oomkill/meminfo.h
@@ -61,6 +61,7 @@ int get_oom_score(int pid);
 int get_oom_score_adj(const int pid, int* out);
 long long get_vm_rss_kib(int pid);
 int get_comm(int pid, char* out, size_t outlen);
+int get_cmdline(int pid, char *out, int out_len);
 int get_uid(int pid);
 
 #endif
-- 
Gitee


From 9def63213b5c636582af83b130a3a9a2de1e1e63 Mon Sep 17 00:00:00 2001
From: stephanie <1165643831@qq.com>
Date: Mon, 17 Jul 2023 06:13:20 +0000
Subject: [PATCH 05/16] !708 remove /.idea & **/.DS_Store, modify .gitignore *
 modify .gitignore * remove /.idea & **/.DS_Store, modify .gitignore

---
 .DS_Store                      | Bin 6148 -> 0 bytes
 .gitignore                     |   2 ++
 .idea/.gitignore               |   8 --------
 .idea/misc.xml                 |  18 ------------------
 .idea/vcs.xml                  |   6 ------
 source/.DS_Store               | Bin 6148 -> 0 bytes
 source/tools/.DS_Store         | Bin 6148 -> 0 bytes
 source/tools/monitor/.DS_Store | Bin 6148 -> 0 bytes
 8 files changed, 2 insertions(+), 32 deletions(-)
 delete mode 100644 .DS_Store
 delete mode 100644 .idea/.gitignore
 delete mode 100644 .idea/misc.xml
 delete mode 100644 .idea/vcs.xml
 delete mode 100644 source/.DS_Store
 delete mode 100644 source/tools/.DS_Store
 delete mode 100644 source/tools/monitor/.DS_Store

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index b639f68ef81e47f5d1ca0c3e13497b3d586b058e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHK%}T>S5Z<-5O({YT3Oz1(Em&hwikDF93mDOZN=;1AV9b^zHHT8jSzpK}@p+ut
z-5i1iZz6UEcE8#A+0A^A{b7vp$s#yptj8F$pdoTp)(D!{y0%O(B3E-fu+T5ExgSe^
z%|w6Egx}s`8Ix?mQda-|4}Ts<Sw0+o@~uX5yVbVZR@b_7pJd_YUcSgCUV4M0ODSW&
z(mnqw3X559_e>_a7bRh;5~3i4klX7h31l&mizLWYuB#nZ$Lh>_{pHdgjR(TEPgVo5
z9NWWz7#$t2Rvl|^|KRju_8h+?@=X)TfpaB01}k_6rB>6cn<lYL9>HGaRI!A_05L!e
z5CfaXfH@1S?&g(F6%zx*zz+=I{ve<sItELPYU_Xwug~ajBBFqfZwW+U&@otQ1P=(;
zsen3_n<oa>>0lQo&M{bO)ai_?m0=#Ua`kxOYIU#+70$S$k$Pf)7^pMQ(8C^{|L5?_
ztbOFyQ)omC5Ci{=0p6Ut(+L!1&em_`;aMx7-9tmcxDpi*&})|f7~nqAS5EC0s6(7%
Xu+)gNpk1W{(nUZKLLD*i3k-Y#X)jDR

diff --git a/.gitignore b/.gitignore
index 40a844cf..fa19e01d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,8 @@
 *.ko
 *.ko.cmd
 *.idea
+**/.idea/
+**/.DS_Store
 Module.symvers
 modules.builtin
 modules.order
diff --git a/.idea/.gitignore b/.idea/.gitignore
deleted file mode 100644
index 13566b81..00000000
--- a/.idea/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
-# Editor-based HTTP Client requests
-/httpRequests/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index 53624c9e..00000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,18 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ExternalStorageConfigurationManager" enabled="true" />
-  <component name="MakefileSettings">
-    <option name="linkedExternalProjectsSettings">
-      <MakefileProjectSettings>
-        <option name="externalProjectPath" value="$PROJECT_DIR$" />
-        <option name="modules">
-          <set>
-            <option value="$PROJECT_DIR$" />
-          </set>
-        </option>
-        <option name="version" value="2" />
-      </MakefileProjectSettings>
-    </option>
-  </component>
-  <component name="MakefileWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
-</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 35eb1ddf..00000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file
diff --git a/source/.DS_Store b/source/.DS_Store
deleted file mode 100644
index fe62bc3d37d7e43ee3c765a76137456f136a5618..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHK%}T>S5Z>*NO({YS3Oz1(Em&hwikA@U3mDOZN=-=7V9b^#wTDv3SzpK}@p+ut
z-9U@Mqlld;yWi~m>}Edb{xHV4H;;}Ovl(L+G(?U{g`m08RWreeT+NY%i)<PuGOU>9
zZ<_Gin=EG%ehFLu{*Q2)#97|&fAU(r(cEcSEvsYQ`%iM==RrQtykK^Vqbn(su+)R_
zI-V@X_TITn^B_(qGnEiW69~DxiPK0fJULIJOy&C8VYRLH*zPWu&S2ORj&r)|iRIAg
z_r&1%WVLEr`v-?-m*eN;C6#ZQNDhoE*)>?gJ1A=vz525>k?A8?%ZxITkQg8ahyh|?
zI~g$Nfz{d0@~LWKfEf6J0o)%1G(^{6p;2ue(BbtN{S8DE(D5ySC=9v=3yt6b;W`yi
zr*iYe;5r@b!o;}-3ynIRakVncV^%I7FI=q-cA>%<cQsN^3=jjG4Ak||#`FII{xX%1
z{LK^^5d*})KVyJ5M*hfyqRiR)tvozy1+)ifC>U3u0s{KRB>)Dvk93t&`vvL{=Nc?D
U;w)%a>40<*P=ruN4EzEEUjs->9{>OV

diff --git a/source/tools/.DS_Store b/source/tools/.DS_Store
deleted file mode 100644
index 12272494a9145ab77e6821d51a35aa9e656937e1..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHK%}T>S5Z-O8O({YT3Oz1(Em&hwikDF93mDOZN=-=7V45vSY7V84v%Zi|;`2DO
zy8){?coMNQu=!@^XLsj=><?p%j~2lpV;#nr1r3p-QXy!rbk$5SB3EN%OtQq!)43m2
z4D=UGxb`NS!MbA>vQO)mK6u{zc@(BuzyHo_^~TnA(`s66>&|_Yxtn>}A{}|j4fZaj
zjQmpf{Ht)9PddA2GS0j(o+c_G45kordmYAs%tvw&2dT<+wZm#ztx2c5T-t+SPuTX!
zswb91yWbOoqvO@8W$o=BoL)?xqFF3oG?g4!SF&TUgjZ0$RQ2p8aU|mh@RV6)G$AoS
z3=jjvz-BRE&H<~vS!GkT!~iky0|sz^5YP}EgM~)5b-)L&&**O;qJWNX2}EJgF<58>
z4+z((fI5|%CkEH)U>7FNF<5BS>5QwHVH`7a`FP=KcCZT-&bXtIzQh1Au+BhTcRP6g
zpTjRx`N&^Sp&l_n4E!?&cw_92M^KbGTfdcuXRUyC4-Ez53RFNqU%LcA2ltV#a%#Uo
a9pW5=g+?3&?J6CRE&_@W`Va%Zz`zH${7m8i

diff --git a/source/tools/monitor/.DS_Store b/source/tools/monitor/.DS_Store
deleted file mode 100644
index 54403a1caa441bc5047ab36847f73aef327fef34..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHK%}T>S5Z<+|O({YS3Oz1(Em&hwikDF93mDOZN=-=7V9b^#u}CT8tS{t~_&m<+
zZlJ~BQN+%`?l(I>yO|HNKa4Rx%)>**Y{r-c4UwZ#A!x32ZJA(1uI6|R=IJzuWl%BA
z-!$R3H(1UR7O{-2e*Z@>jiWT{^*;Glt=`yfT1~5M-Fc64?qz<Kr$c{sgQH6+<Dk_2
z;3}HT$M)`-Ofo-8CNq@~g%b$5y^fMl&WAEj!c^sY+F`Y<*4XYW7EXWA6^?VV?25&}
z>2*c_=y<toS$q2jrx)X=_&Jepnn(_eE7>tv!aFD{6}@<~B$mkoSj&tul8_i628aP-
zU^5vo=YiGU%<`#fVt^R<fdSkf1T;j~V4+cM9nj(R8U1xc6wvW4fhY{R1`Cbg0pU6o
zP^WV9#Nav|?83yk1`CZkopH4?%wtwAA1_?34tAl!8Fw{OPYe(P>kQQN(8lxs9DbR~
zNB(*Wjfeqa;GZ$T>mzS8grdyZ`mH=XYX!7>Xebz0paKH=+9d!6xQ}#{Q~L$#5a${!
VG~z61SLuLs5m1CsM-2P|17G=VOEmxh

-- 
Gitee


From fd6570e91da885aecf16722c50f1ab55e5c7a1b1 Mon Sep 17 00:00:00 2001
From: Hailong Liu <liuhailong@linux.alibaba.com>
Date: Tue, 18 Jul 2023 11:14:19 +0800
Subject: [PATCH 06/16] proc_stat: Add cpu_util threshold warning events

Signed-off-by: Hailong Liu <liuhailong@linux.alibaba.com>
---
 .../monitor/unity/collector/proc_stat.lua     | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/source/tools/monitor/unity/collector/proc_stat.lua b/source/tools/monitor/unity/collector/proc_stat.lua
index e5834d75..dbdec60b 100644
--- a/source/tools/monitor/unity/collector/proc_stat.lua
+++ b/source/tools/monitor/unity/collector/proc_stat.lua
@@ -14,6 +14,9 @@ function CprocStat:_init_(proto, pffi, mnt, pFile)
     CvProc._init_(self, proto, pffi, mnt,pFile or "proc/stat")
     self._funs = self:setupTable()
     self._cpuArr = {}
+    self._total_warn = 0
+    self._sys_warn = 0
+    self._user_warn = 0
 end
 
 function CprocStat:_cpuHead()
@@ -23,6 +26,12 @@ end
 
 function CprocStat:_procCpu(now, last)
     if last then
+        local user_thresh = 40
+        local sys_thresh = 25
+        local total_thresh = 55
+        local user_util = 0
+        local sys_util = 0
+        local warn = 0
         local vs = {}
         local sum = 0
         local index = self:_cpuHead()
@@ -37,10 +46,39 @@ function CprocStat:_procCpu(now, last)
             local total = tonumber(sum)
             for i = 1, #vs do
                 local v = tonumber(vs[i])
+
+		--for warn events
+		if index[i] == "user" or index[i] == "nice" then
+			user_util = user_util + v*100.0/total
+		end
+		if index[i] == "sys" or index[i] == "softirq" then
+			sys_util = sys_util + v*100.0/total
+		end
+		if index[i] == "idle" then
+			total_util = 100 - (v*100.0/total)
+		end
+
                 local cell = {name=index[i], value=tonumber(v * 100.0 / total)}
                 table.insert(res, cell)
             end
             table.insert(res, {name="total", value=total})
+	    --warn events
+	    if user_util > user_thresh then
+		self._user_warn = self._user_warn + 1
+	    end
+            local cell0 = {name="usr_warn", value=self._user_warn}
+	    table.insert(res, cell0) 
+	    if sys_util > sys_thresh then
+		self._sys_warn = self._sys_warn + 1
+	    end
+	    local cell1 = {name="sys_warn", value=self._sys_warn}
+            table.insert(res, cell1) 
+	    if total_util > user_thresh then
+		self._total_warn = self._total_warn + 1
+	    end
+            local cell2 = {name="total_warn", value=self._total_warn}
+	    table.insert(res, cell2)
+
             return res
         end
     end
-- 
Gitee


From eb25e8b37ce77e01ef701d8dc1ff60653447026a Mon Sep 17 00:00:00 2001
From: = <wodemia@linux.alibaba.com>
Date: Mon, 10 Jul 2023 11:56:12 +0800
Subject: [PATCH 07/16] imc_latency: init in sysak tools

---
 source/tools/detect/mem/imc_latency/Makefile  |   4 +
 source/tools/detect/mem/imc_latency/README.md |  46 ++
 .../detect/mem/imc_latency/imc_latency.c      | 770 ++++++++++++++++++
 .../detect/mem/imc_latency/imc_latency.h      | 122 +++
 4 files changed, 942 insertions(+)
 create mode 100644 source/tools/detect/mem/imc_latency/Makefile
 create mode 100644 source/tools/detect/mem/imc_latency/README.md
 create mode 100644 source/tools/detect/mem/imc_latency/imc_latency.c
 create mode 100644 source/tools/detect/mem/imc_latency/imc_latency.h

diff --git a/source/tools/detect/mem/imc_latency/Makefile b/source/tools/detect/mem/imc_latency/Makefile
new file mode 100644
index 00000000..282f8bfd
--- /dev/null
+++ b/source/tools/detect/mem/imc_latency/Makefile
@@ -0,0 +1,4 @@
+target := imc_latency
+mods := imc_latency.o
+
+include $(SRC)/mk/csrc.mk
diff --git a/source/tools/detect/mem/imc_latency/README.md b/source/tools/detect/mem/imc_latency/README.md
new file mode 100644
index 00000000..8b089785
--- /dev/null
+++ b/source/tools/detect/mem/imc_latency/README.md
@@ -0,0 +1,46 @@
+# imc_latency
+
+基于PMU事件的DDR内存访问延迟，用于检查微架构层级是否存在内存竞争。
+
+## 原理与限制
+
+基于IMC的PMU组件实现，需要硬件支持。目前仅支持Intel的Ice Lake（ICX）、Sky Lake（SKX）、Cascade Lake以及Sapphire Rapids(SPR)等架构。
+
+| micro-architecture | code | cpu-model number |
+| ------------------ | ---- | ---------------- |
+| Sapphire Rapids    | SPR  | 143              |
+| Ice Lake           | ICX  | 106/108          |
+| Cascade Lake       |      | 106              |
+| Sky Lake-X         | SKX  | 85               |
+
+可以通过`lscpu`的`Model`字段检查硬件是否支持。
+
+```bash
+Architecture:        x86_64
+CPU op-mode(s):      32-bit, 64-bit
+Byte Order:          Little Endian
+CPU(s):              128
+On-line CPU(s) list: 0-127
+Thread(s) per core:  2
+Core(s) per socket:  32
+Socket(s):           2
+NUMA node(s):        2
+Vendor ID:           GenuineIntel
+BIOS Vendor ID:      Intel(R) Corporation
+CPU family:          6
+Model:               106
+Model name:          Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz
+BIOS Model name:     Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz
+Stepping:            6
+CPU MHz:             3500.000
+CPU max MHz:         3500.0000
+CPU min MHz:         800.0000
+BogoMIPS:            5800.00
+Virtualization:      VT-x
+L1d cache:           48K
+L1i cache:           32K
+L2 cache:            1280K
+L3 cache:            49152K
+NUMA node0 CPU(s):   0-31,64-95
+NUMA node1 CPU(s):   32-63,96-127
+```
diff --git a/source/tools/detect/mem/imc_latency/imc_latency.c b/source/tools/detect/mem/imc_latency/imc_latency.c
new file mode 100644
index 00000000..e3ef2687
--- /dev/null
+++ b/source/tools/detect/mem/imc_latency/imc_latency.c
@@ -0,0 +1,770 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <memory.h>
+#include <errno.h>
+#include <strings.h>
+#include <sys/syscall.h>
+#include <linux/types.h>
+
+#include "imc_latency.h"
+
+// #define DEBUG
+
+struct Env {
+    uint32_t max_cpuid;
+    int32_t cpu_model;
+    int32_t cpu_family;
+    int32_t cpu_stepping;
+    bool vm;
+    int64_t nr_cpu;
+    int64_t nr_socket;
+    int64_t nr_core;
+    int64_t nr_channel;
+    int64_t* socket_ref_core;
+} env = {.vm = false};
+
+typedef struct event {
+    uint64_t rpq_occ;
+    uint64_t rpq_ins;
+    uint64_t wpq_occ;
+    uint64_t wpq_ins;
+    uint64_t dram_speed;
+} event;
+
+typedef struct channel_record {
+    uint64_t rpq_occ;
+    uint64_t rpq_ins;
+    uint64_t wpq_occ;
+    uint64_t wpq_ins;
+    double read_latency;
+    double write_latency;
+} channel_record;
+
+typedef struct socket_record {
+    channel_record* channel_record_arr;
+    uint64_t rpq_occ;
+    uint64_t rpq_ins;
+    uint64_t wpq_occ;
+    uint64_t wpq_ins;
+    double read_latency;
+    double write_latency;
+    uint64_t dram_clock;
+} socket_record;
+
+typedef struct record {
+    socket_record* socket_record_arr;
+} record;
+
+record before, after;
+
+time_t before_ts = 0, after_ts = 0;
+imc_pmu* pmus = 0;
+
+int64_t read_sys_file(char* path, bool slient) {
+    int64_t val;
+    FILE* fp = fopen(path, "r");
+    if (!fp) {
+        if (!slient) fprintf(stderr, "Failed open sys-file: %s\n", path);
+        return -1;
+    }
+
+    fscanf(fp, "%ld\n", &val);
+#ifdef DEBUG
+    fprintf(stderr, "read from=%s val=%ld\n", path, val);
+#endif
+    if (fp) fclose(fp);
+    return val;
+}
+
+static int write_reg(imc_event* ev, uint64_t val) {
+    int err = 0;
+    if (ev->fd >= 0) {
+        close(ev->fd);
+        ev->fd = -1;
+    }
+
+    ev->attr.config = ev->fixed ? 0xff : val;
+
+    if ((ev->fd = syscall(SYS_perf_event_open, &ev->attr, -1, ev->core_id, -1,
+                          0)) <= 0) {
+        fprintf(stderr, "Linux Perf: Error on programming PMU %d:%s\n",
+                ev->pmu_id, strerror(errno));
+        fprintf(stderr, "config: 0x%llx config1: 0x%llx config2: 0x%llx\n",
+                ev->attr.config, ev->attr.config1, ev->attr.config2);
+        if (errno == EMFILE) fprintf(stderr, "%s", ULIMIT_RECOMMENDATION);
+
+        return -1;
+    }
+    return err;
+}
+
+static uint64_t read_reg(imc_event* ev) {
+    uint64_t result = 0;
+    if (ev->fd >= 0) {
+        int status = read(ev->fd, &result, sizeof(result));
+        if (status != sizeof(result)) {
+            fprintf(
+                stderr,
+                "PCM Error: failed to read from Linux perf handle %d PMU %d\n",
+                ev->fd, ev->pmu_id);
+        }
+    }
+    return result;
+}
+
+static bool is_cpu_online(int cpu_id) {
+    char path[BUF_SIZE];
+    uint64_t val;
+    bool res = false;
+
+    snprintf(path, BUF_SIZE, "/sys/devices/system/cpu/cpu%d/online", cpu_id);
+
+    FILE* fp = fopen(path, "r");
+    if (!fp) {
+        fprintf(stderr, "Failed open %s.\n", path);
+        goto cleanup;
+    }
+
+    val = read_sys_file(path, true);
+    if (val == UINT64_MAX) {
+        goto cleanup;
+    }
+
+    res = true;
+
+cleanup:
+    if (fp) fclose(fp);
+    return res;
+}
+
+int64_t read_core_id(int cpu_id) {
+    char core_id_path[BUF_SIZE];
+    int64_t val = -1;
+
+    snprintf(core_id_path, BUF_SIZE,
+             "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu_id);
+    val = read_sys_file(core_id_path, true);
+    return val;
+}
+
+int64_t read_physical_package_id(int cpu_id) {
+    char pkg_id_path[BUF_SIZE];
+
+    int64_t val = -1;
+
+    snprintf(pkg_id_path, BUF_SIZE,
+             "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
+             cpu_id);
+    val = read_sys_file(pkg_id_path, true);
+
+    return val;
+}
+
+static int get_topology(int id, struct topology_ent* ent) {
+    int err = 0;
+    ent->core_id = read_core_id(id);
+    ent->socket_id = read_physical_package_id(id);
+    if (ent->core_id == -1 || ent->socket_id == -1) {
+#ifdef DEBUG
+        fprintf(stderr, "get coreid=%d socket_id=%d\n", ent->core_id,
+                ent->socket_id);
+#endif
+        err = -1;
+    }
+
+    return err;
+}
+
+static int discovery_topology() {
+    int err = 0, i = 0;
+    struct topology_ent* topo = 0;
+
+    env.nr_cpu = sysconf(_SC_NPROCESSORS_CONF);
+
+    if (env.nr_cpu < 0) {
+        fprintf(stderr, "Failed get nr_cpu.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    topo = calloc(env.nr_cpu, sizeof(struct topology_ent));
+    if (!topo) {
+        fprintf(stderr, "Faile calloc topology memory.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    int64_t max_skt_id = 0;
+    int64_t max_core_id = 0;
+    for (i = 0; i < env.nr_cpu; i++) {
+        err = get_topology(i, topo + i);
+        if (err) {
+            fprintf(stderr, "Failed get topology cpuid:%d\n", i);
+            goto cleanup;
+        }
+
+        max_skt_id =
+            max_skt_id > topo[i].socket_id ? max_skt_id : topo[i].socket_id;
+        max_core_id =
+            max_core_id > topo[i].core_id ? max_core_id : topo[i].core_id;
+    }
+
+    env.nr_socket = max_skt_id + 1;
+    env.nr_core = max_core_id + 1;
+
+    env.socket_ref_core = calloc(env.nr_socket, sizeof(int64_t));
+    if (!env.socket_ref_core) {
+        fprintf(stderr, "Failed calloc socket_ref_core. nr_socket=%d\n",
+                env.nr_socket);
+        err = -1;
+        goto cleanup;
+    }
+
+    for (i = 0; i < env.nr_cpu; i++) {
+        if (!is_cpu_online(i)) continue;
+        env.socket_ref_core[topo[i].socket_id] = i;
+    }
+
+cleanup:
+    if (topo) free(topo);
+    topo = 0;
+    return err;
+}
+
+static void cpuid_1(int leaf, CPUID_INFO* info) {
+    __asm__ __volatile__("cpuid"
+                         : "=a"(info->reg.eax), "=b"(info->reg.ebx),
+                           "=c"(info->reg.ecx), "=d"(info->reg.edx)
+                         : "a"(leaf));
+}
+
+void cpuid_2(const unsigned leaf, const unsigned subleaf, CPUID_INFO* info) {
+    __asm__ __volatile__("cpuid"
+                         : "=a"(info->reg.eax), "=b"(info->reg.ebx),
+                           "=c"(info->reg.ecx), "=d"(info->reg.edx)
+                         : "a"(leaf), "c"(subleaf));
+}
+
+static bool detect_model() {
+    char buffer[1024];
+    union {
+        char cbuf[16];
+        int ibuf[16 / sizeof(int)];
+    } buf;
+
+    CPUID_INFO cpuinfo;
+
+    bzero(buffer, 1024);
+    bzero(buf.cbuf, 16);
+    cpuid_1(0, &cpuinfo);
+
+    buf.ibuf[0] = cpuinfo.array[1];
+    buf.ibuf[1] = cpuinfo.array[3];
+    buf.ibuf[2] = cpuinfo.array[2];
+
+    if (strncmp(buf.cbuf, "GenuineIntel", 4 * 3) != 0) {
+        fprintf(stderr, "Not intel cpu.\n");
+        return false;
+    }
+
+    env.max_cpuid = cpuinfo.array[0];
+
+    cpuid_1(1, &cpuinfo);
+    env.cpu_family = (((cpuinfo.array[0]) >> 8) & 0xf) |
+                     ((cpuinfo.array[0] & 0xf00000) >> 16);
+    env.cpu_model = (((cpuinfo.array[0]) & 0xf0) >> 4) |
+                    ((cpuinfo.array[0] & 0xf0000) >> 12);
+    env.cpu_stepping = cpuinfo.array[0] & 0x0f;
+
+    if (cpuinfo.reg.ecx & (1UL << 31UL)) {
+        env.vm = true;
+        fprintf(stderr,
+                "WARN: Detected a hypervisor/virtualization technology. Some "
+                "metrics might not be available due to configuration or "
+                "availability of virtual hardware features.\n");
+    }
+
+    if (env.cpu_family != 6) {
+        fprintf(stderr, "Unsupport CPU Family: %d\n", env.cpu_family);
+        return false;
+    }
+
+    return true;
+}
+
+bool is_model_support() {
+    switch (env.cpu_model) {
+        case NEHALEM:
+            env.cpu_model = NEHALEM_EP;
+            break;
+        case ATOM_2:
+            env.cpu_model = ATOM;
+            break;
+        case HASWELL_ULT:
+        case HASWELL_2:
+            env.cpu_model = HASWELL;
+            break;
+        case BROADWELL_XEON_E3:
+            env.cpu_model = BROADWELL;
+            break;
+        case ICX_D:
+            env.cpu_model = ICX;
+            break;
+        case CML_1:
+            env.cpu_model = CML;
+            break;
+        case ICL_1:
+            env.cpu_model = ICL;
+            break;
+        case TGL_1:
+            env.cpu_model = TGL;
+            break;
+        case ADL_1:
+            env.cpu_model = ADL;
+            break;
+        case RPL_1:
+        case RPL_2:
+        case RPL_3:
+            env.cpu_model = RPL;
+            break;
+    }
+
+    return (env.cpu_model == ICX || env.cpu_model == SPR ||
+            env.cpu_model == SKX);
+}
+
+uint32_t* get_ddr_latency_metric_config() {
+    uint32_t* cfgs = 0;
+    cfgs = calloc(4, sizeof(uint32_t));
+    if (!cfgs) {
+        fprintf(stderr, "Failed calloc cfgs memory.\n");
+        return NULL;
+    }
+
+    if (ICX == env.cpu_model || SPR == env.cpu_model) {
+        cfgs[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM RPQ occupancy pch 0
+        cfgs[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) +
+                  MC_CH_PCI_PMON_CTL_UMASK(1);  // DRAM RPQ Insert.pch 0
+        cfgs[2] = MC_CH_PCI_PMON_CTL_EVENT(0x82) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM WPQ Occupancy pch 0
+        cfgs[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) +
+                  MC_CH_PCI_PMON_CTL_UMASK(1);  // DRAM WPQ Insert.pch 0
+    } else {
+        cfgs[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM RPQ occupancy
+        cfgs[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM RPQ Insert
+        cfgs[2] = MC_CH_PCI_PMON_CTL_EVENT(0x81) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM WPQ Occupancy
+        cfgs[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) +
+                  MC_CH_PCI_PMON_CTL_UMASK(0);  // DRAM WPQ Insert
+    }
+
+    return cfgs;
+}
+
+struct perf_event_attr init_perf_event_attr(bool group) {
+    struct perf_event_attr e;
+    bzero(&e, sizeof(struct perf_event_attr));
+    e.type = -1;  // must be set up later
+    e.size = sizeof(e);
+    e.config = -1;  // must be set up later
+    e.read_format = group ? PERF_FORMAT_GROUP
+                          : 0; /* PERF_FORMAT_TOTAL_TIME_ENABLED |
+      PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_GROUP ; */
+    return e;
+}
+
+void init_imc_event(imc_event* event, int pmu_id, int core_id, bool fixed) {
+    struct perf_event_attr attr = init_perf_event_attr(false);
+    attr.type = pmu_id;
+    event->attr = attr;
+    event->fixed = fixed;
+    event->pmu_id = pmu_id;
+    event->core_id = core_id;
+    event->fd = -1;
+}
+
+void init_imc_reggrp(imc_reg_group* grp, int socket_id, int pmu_id) {
+    int i = 0;
+#ifdef DEBUG
+
+    fprintf(stderr, "Init imc reg group: socketid=%d pmuid=%d\n", socket_id,
+            pmu_id);
+#endif
+    init_imc_event(&grp->fixed_ev, pmu_id, env.socket_ref_core[socket_id],
+                   true);
+
+    for (i = 0; i < GENERAL_REG_NUM; i++) {
+        init_imc_event(&grp->general_ev[i], pmu_id,
+                       env.socket_ref_core[socket_id], false);
+    }
+}
+
+imc_pmu* init_imc_pmus(int64_t* pmu_ids, int64_t size) {
+    int skt_id = 0;
+    int pmu_id = 0;
+
+    imc_pmu* pmus = calloc(env.nr_socket, sizeof(imc_pmu));
+
+    for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+        pmus[skt_id].reg_groups = calloc(size, sizeof(imc_reg_group));
+        pmus[skt_id].socket_id = skt_id;
+        pmus[skt_id].nr_grp = size;
+
+        for (pmu_id = 0; pmu_id < size; pmu_id++) {
+            init_imc_reggrp(&pmus[skt_id].reg_groups[pmu_id], skt_id,
+                            pmu_ids[pmu_id]);
+        }
+    }
+
+    return pmus;
+}
+
+void program_imc(uint32_t* cfgs, imc_pmu* pmus) {
+    int skt_id = 0;
+    int pmu_id = 0;
+    int idx = 0;
+    for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+        imc_pmu* pmu = pmus + skt_id;
+        for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) {
+            imc_reg_group* grp = pmu->reg_groups + pmu_id;
+            /* enabel and reset fixed counter(DRAM clock) */
+            write_reg(&grp->fixed_ev, MC_CH_PCI_PMON_FIXED_CTL_EN);
+            write_reg(&grp->fixed_ev, MC_CH_PCI_PMON_FIXED_CTL_EN +
+                                          MC_CH_PCI_PMON_FIXED_CTL_RST);
+            for (idx = 0; idx < GENERAL_REG_NUM; idx++) {
+                uint64_t event = cfgs[idx];
+                if (SPR == env.cpu_model) {
+                    write_reg(&grp->general_ev[idx], event);
+                } else {
+                    write_reg(&grp->general_ev[idx], MC_CH_PCI_PMON_CTL_EN);
+                    write_reg(&grp->general_ev[idx],
+                              MC_CH_PCI_PMON_CTL_EN | event);
+                }
+            }
+        }
+    }
+}
+
+socket_record* alloc_socket_record() {
+    int skt_id = 0;
+    socket_record* rec = calloc(env.nr_socket, sizeof(socket_record));
+    for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+        rec[skt_id].channel_record_arr =
+            calloc(env.nr_channel, sizeof(channel_record));
+    }
+    return rec;
+}
+
+void free_socket_record(socket_record* rec) {
+    int skt_id = 0;
+    for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+        free(rec[skt_id].channel_record_arr);
+    }
+    free(rec);
+}
+
+void init_data() {
+    before.socket_record_arr = alloc_socket_record();
+    after.socket_record_arr = alloc_socket_record();
+}
+
+void free_data() {
+    free_socket_record(before.socket_record_arr);
+    free_socket_record(after.socket_record_arr);
+}
+
+int64_t get_perf_pmuid(int num) {
+    int64_t id = -1;
+    char imc_path[BUF_SIZE];
+
+    if (num != -1) {
+        snprintf(imc_path, BUF_SIZE,
+                 "/sys/bus/event_source/devices/uncore_imc_%d/type", num);
+    } else {
+        snprintf(imc_path, BUF_SIZE,
+                 "/sys/bus/event_source/devices/uncore_imc/type");
+    }
+
+    id = read_sys_file(imc_path, true);
+
+    return id;
+}
+
+static int64_t* enumerate_imc_PMUs() {
+    int64_t* pmu_ids = 0;
+    int idx = 0, i = 0;
+
+    pmu_ids = calloc(MAX_IMC_ID, sizeof(int64_t));
+
+    if (!pmu_ids) {
+        fprintf(stderr, "Failed calloc pmu ids memory.\n");
+        return NULL;
+    }
+
+    for (i = -1; i <= MAX_IMC_ID; ++i) {
+        int64_t pmu_id = get_perf_pmuid(i);
+        if (pmu_id != -1) pmu_ids[idx++] = pmu_id;
+    }
+
+    env.nr_channel = idx;
+
+cleanup:
+
+    if (env.nr_channel == 0 && pmu_ids) {
+        free(pmu_ids);
+        pmu_ids = 0;
+    }
+
+    return pmu_ids;
+}
+
+static int init_env() {
+    int err = 0;
+    int64_t* pmu_ids = 0;
+    uint32_t* cfgs = 0;
+
+    // check model
+    if (!detect_model()) {
+        fprintf(stderr, "Failed detect model.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    if (!is_model_support()) {
+        fprintf(stderr, "Unsupport model.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    // get core/socket info
+    err = discovery_topology();
+    if (err) {
+        fprintf(stderr, "Failed discovery topology.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    // get all imc-pmu id
+    pmu_ids = enumerate_imc_PMUs();
+    if (!pmu_ids) {
+        fprintf(stderr, "Failed enumerate imc pmus.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    cfgs = get_ddr_latency_metric_config();
+    if (!cfgs) {
+        fprintf(stderr, "Failed enumerate imc pmus.\n");
+        err = -1;
+        goto cleanup;
+    }
+
+    // init pmu
+    pmus = init_imc_pmus(pmu_ids, env.nr_channel);
+
+    // write pmu register
+    program_imc(cfgs, pmus);
+
+    // init data
+    init_data();
+
+    fprintf(stderr, "nr_socket=%d nr_core=%d nr_cpu=%d nr_channel=%d \n",
+            env.nr_socket, env.nr_core, env.nr_cpu, env.nr_channel);
+    int i = 0;
+    for (i = 0; i < env.nr_socket; i++) {
+        fprintf(stderr, "socket%d-ref cpu=%d\n", i, env.socket_ref_core[i]);
+    }
+
+cleanup:
+
+    if (pmu_ids) {
+        free(pmu_ids);
+        pmu_ids = 0;
+    }
+
+    if (cfgs) {
+        free(cfgs);
+        cfgs = 0;
+    }
+
+    return err;
+}
+
+void read_imc() {
+    int skt_id = 0, pmu_id = 0, counter_id = 0;
+    after_ts = time(0);
+
+    for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+        imc_pmu* pmu = pmus + skt_id;
+        socket_record* socket_ev = &after.socket_record_arr[skt_id];
+        for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) {
+            imc_reg_group* grp = pmu->reg_groups + pmu_id;
+            channel_record* channel_ev =
+                &after.socket_record_arr[skt_id].channel_record_arr[pmu_id];
+            /* enabel and reset fixed counter(DRAM clock) */
+            if (pmu_id == 0) {
+                socket_ev->dram_clock = read_reg(&grp->fixed_ev);
+                if (env.cpu_model == ICX || env.cpu_model == SNOWRIDGE) {
+                    socket_ev->dram_clock = 2 * socket_ev->dram_clock;
+                }
+            }
+
+            channel_ev->rpq_occ = read_reg(&grp->general_ev[RPQ_OCC]);
+            channel_ev->rpq_ins = read_reg(&grp->general_ev[RPQ_INS]);
+            channel_ev->wpq_occ = read_reg(&grp->general_ev[WPQ_OCC]);
+            channel_ev->wpq_ins = read_reg(&grp->general_ev[WPQ_INS]);
+
+            socket_ev->rpq_occ += channel_ev->rpq_occ;
+            socket_ev->rpq_ins += channel_ev->rpq_ins;
+            socket_ev->wpq_occ += channel_ev->wpq_occ;
+            socket_ev->wpq_ins += channel_ev->wpq_ins;
+        }
+    }
+
+    if (before_ts) {
+        double delta = after_ts - before_ts;
+        for (skt_id = 0; skt_id < env.nr_socket; skt_id++) {
+            socket_record* before_socket_ev = &before.socket_record_arr[skt_id];
+            socket_record* after_socket_ev = &after.socket_record_arr[skt_id];
+            imc_pmu* pmu = pmus + skt_id;
+            double dram_speed =
+                (after_socket_ev->dram_clock - before_socket_ev->dram_clock) /
+                (delta * (double)1e9);
+
+            for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) {
+                channel_record* before_channel_ev =
+                    &before_socket_ev->channel_record_arr[pmu_id];
+                channel_record* after_channel_ev =
+                    &after_socket_ev->channel_record_arr[pmu_id];
+
+                if (after_channel_ev->rpq_ins - before_channel_ev->rpq_ins >
+                    0) {
+                    after_channel_ev->read_latency =
+                        (after_channel_ev->rpq_occ -
+                         before_channel_ev->rpq_occ) /
+                        (after_channel_ev->rpq_ins -
+                         before_channel_ev->rpq_ins) /
+                        dram_speed;
+                }
+
+                if (after_channel_ev->wpq_ins - before_channel_ev->wpq_ins >
+                    0) {
+                    after_channel_ev->write_latency =
+                        (after_channel_ev->wpq_occ -
+                         before_channel_ev->wpq_occ) /
+                        (after_channel_ev->wpq_ins -
+                         before_channel_ev->wpq_ins) /
+                        dram_speed;
+                }
+            }
+
+            if (after_socket_ev->rpq_ins - before_socket_ev->rpq_ins > 0) {
+                after_socket_ev->read_latency =
+                    (after_socket_ev->rpq_occ - before_socket_ev->rpq_occ) /
+                    (after_socket_ev->rpq_ins - before_socket_ev->rpq_ins) /
+                    dram_speed;
+            }
+
+            if (after_socket_ev->wpq_ins - before_socket_ev->wpq_ins > 0) {
+                after_socket_ev->write_latency =
+                    (after_socket_ev->wpq_occ - before_socket_ev->wpq_occ) /
+                    (after_socket_ev->wpq_ins - before_socket_ev->wpq_ins) /
+                    dram_speed;
+            }
+        }
+    }
+}
+
+#ifdef DEBUG
+void print_socket(socket_record* rec) {
+    fprintf(stderr,
+            "rpq_occ=%ld rpq_ins=%ld wpq_occ=%ld wpq_ins=%ld dram_clocks=%ld "
+            "r_latency=%lf w_latency=%lf\n",
+            rec->rpq_occ, rec->rpq_ins, rec->wpq_occ, rec->wpq_ins,
+            rec->dram_clock, rec->read_latency, rec->write_latency);
+}
+
+void print_channel(channel_record* rec) {
+    fprintf(stderr,
+            "rpq_occ=%ld rpq_ins=%ld wpq_occ=%ld wpq_ins=%ld r_latency = % lf "
+            "w_latency = % lf\n ",
+            rec->rpq_occ, rec->rpq_ins, rec->wpq_occ, rec->wpq_ins,
+            rec->read_latency, rec->write_latency);
+}
+
+void print_record(record* rec) {
+    int i = 0;
+    int j = 0;
+    for (i = 0; i < env.nr_socket; i++) {
+        print_socket(&rec->socket_record_arr[i]);
+        for (j = 0; j < env.nr_channel; j++) {
+            print_channel(&rec->socket_record_arr[i].channel_record_arr[j]);
+        }
+    }
+}
+#endif
+
+static int collect_data() {
+    int32_t socket_id = 0, channel_id = 0, line_num = 0;
+    read_imc();
+
+    fprintf(stderr, "[SOCKET_LEVEL]\n");
+    fprintf(stderr, "%16s %16s %16s\n", "socket", "rlat", "wlat");
+
+    for (socket_id = 0; socket_id < env.nr_socket; socket_id++) {
+        char socket_name[32];
+        snprintf(socket_name, 32, "%d", socket_id);
+        socket_record* srec = &after.socket_record_arr[socket_id];
+        fprintf(stderr, "%16s %16lf %16lf\n", socket_name, srec->read_latency,
+                srec->write_latency);
+    }
+
+    for (socket_id = 0; socket_id < env.nr_socket; socket_id++) {
+        fprintf(stderr, "[CHANNEL_LEVEL-SOCKET%d]\n", socket_id);
+        char socket_name[32];
+        snprintf(socket_name, 32, "%d", socket_id);
+
+        socket_record* srec = &after.socket_record_arr[socket_id];
+
+        fprintf(stderr, "%16s %16s %16s\n", "channel", "rlat", "wlat");
+        for (channel_id = 0; channel_id < env.nr_channel; channel_id++) {
+            channel_record* crec = &srec->channel_record_arr[channel_id];
+            char channel_name[32];
+            snprintf(channel_name, 32, "%d", channel_id);
+            fprintf(stderr, "%16s %16s %16s\n", channel_name,
+                    crec->read_latency, crec->write_latency);
+        }
+    }
+
+    /* swap data */
+    socket_record* tmp = before.socket_record_arr;
+    before.socket_record_arr = after.socket_record_arr;
+    after.socket_record_arr = tmp;
+
+    /* clear after data */
+    free_socket_record(after.socket_record_arr);
+    after.socket_record_arr = alloc_socket_record();
+
+    /* reset before timestamp */
+    before_ts = after_ts;
+
+    return 0;
+}
+
+static clean_env(void) { free_data(); }
+
+int main() {
+    init_env();
+    while (1) {
+        sleep(1);
+        collect_data();
+    }
+
+    clean_env();
+}
diff --git a/source/tools/detect/mem/imc_latency/imc_latency.h b/source/tools/detect/mem/imc_latency/imc_latency.h
new file mode 100644
index 00000000..6c8ccde5
--- /dev/null
+++ b/source/tools/detect/mem/imc_latency/imc_latency.h
@@ -0,0 +1,122 @@
+#ifndef UNITY_SAMPLE_H
+#define UNITY_SAMPLE_H
+
+#include <linux/types.h>
+#include <stdbool.h>
+#include <linux/perf_event.h>
+#include <stdint.h>
+
+#define ULIMIT_RECOMMENDATION                                                 \
+    ("try executing 'ulimit -n 1000000' to increase the limit on the number " \
+     "of open files.\n")
+
+typedef union CPUID_INFO {
+    int array[4];
+    struct {
+        unsigned int eax, ebx, ecx, edx;
+    } reg;
+} CPUID_INFO;
+
+enum INTEL_CPU_MODEL {
+    NEHALEM_EP = 26,
+    NEHALEM = 30,
+    ATOM = 28,
+    ATOM_2 = 53,
+    CENTERTON = 54,
+    BAYTRAIL = 55,
+    AVOTON = 77,
+    CHERRYTRAIL = 76,
+    APOLLO_LAKE = 92,
+    GEMINI_LAKE = 122,
+    DENVERTON = 95,
+    SNOWRIDGE = 134,
+    CLARKDALE = 37,
+    WESTMERE_EP = 44,
+    NEHALEM_EX = 46,
+    WESTMERE_EX = 47,
+    SANDY_BRIDGE = 42,
+    JAKETOWN = 45,
+    IVY_BRIDGE = 58,
+    HASWELL = 60,
+    HASWELL_ULT = 69,
+    HASWELL_2 = 70,
+    IVYTOWN = 62,
+    HASWELLX = 63,
+    BROADWELL = 61,
+    BROADWELL_XEON_E3 = 71,
+    BDX_DE = 86,
+    SKL_UY = 78,
+    KBL = 158,
+    KBL_1 = 142,
+    CML = 166,
+    CML_1 = 165,
+    ICL = 126,
+    ICL_1 = 125,
+    RKL = 167,
+    TGL = 140,
+    TGL_1 = 141,
+    ADL = 151,
+    ADL_1 = 154,
+    RPL = 0xb7,
+    RPL_1 = 0xba,
+    RPL_2 = 0xbf,
+    RPL_3 = 0xbe,
+    BDX = 79,
+    KNL = 87,
+    SKL = 94,
+    SKX = 85,
+    ICX_D = 108,
+    ICX = 106,
+    SPR = 143,
+    END_OF_MODEL_LIST = 0x0ffff
+};
+
+#define MC_CH_PCI_PMON_CTL_EVENT(x) (x << 0)
+#define MC_CH_PCI_PMON_CTL_UMASK(x) (x << 8)
+#define MC_CH_PCI_PMON_CTL_RST (1 << 17)
+#define MC_CH_PCI_PMON_CTL_EDGE_DET (1 << 18)
+#define MC_CH_PCI_PMON_CTL_EN (1 << 22)
+#define MC_CH_PCI_PMON_CTL_INVERT (1 << 23)
+#define MC_CH_PCI_PMON_CTL_THRESH(x) (x << 24UL)
+#define MC_CH_PCI_PMON_FIXED_CTL_RST (1 << 19)
+#define MC_CH_PCI_PMON_FIXED_CTL_EN (1 << 22)
+#define UNC_PMON_UNIT_CTL_FRZ_EN (1 << 16)
+#define UNC_PMON_UNIT_CTL_RSV ((1 << 16) + (1 << 17))
+
+#define RPQ_OCC 0
+#define RPQ_INS 1
+#define WPQ_OCC 2
+#define WPQ_INS 3
+
+#define BUF_SIZE 1024
+#define MAX_IMC_ID 100
+#define GENERAL_REG_NUM 4
+#define FIXED_REG_NUM 1
+
+typedef struct imc_event_t {
+    struct perf_event_attr attr;
+    int fd;
+    int core_id;
+    int pmu_id;
+    bool fixed;
+} imc_event;
+
+typedef struct imc_reg_group_t {
+    imc_event general_ev[GENERAL_REG_NUM];
+    imc_event fixed_ev;
+    int pmu_id;
+} imc_reg_group;
+
+typedef struct imc_pmu_t {
+    imc_reg_group* reg_groups;
+    int socket_id;
+    int nr_grp;
+} imc_pmu;
+
+struct topology_ent {
+    int64_t cpu_id;
+    int64_t core_id;
+    int64_t socket_id;
+};
+
+#endif  // UNITY_SAMPLE_H
-- 
Gitee


From 21bfa8ad77496c27ff072749e75555b01659c353 Mon Sep 17 00:00:00 2001
From: ZouTao <wodemia@linux.alibaba.com>
Date: Mon, 10 Jul 2023 15:18:53 +0800
Subject: [PATCH 08/16] imc_latency: add options

---
 source/tools/detect/mem/imc_latency/README.md |  48 ++-
 .../detect/mem/imc_latency/imc_latency.c      | 275 ++++++++++++++----
 .../detect/mem/imc_latency/imc_latency.h      |  34 +++
 3 files changed, 294 insertions(+), 63 deletions(-)

diff --git a/source/tools/detect/mem/imc_latency/README.md b/source/tools/detect/mem/imc_latency/README.md
index 8b089785..fd669fb1 100644
--- a/source/tools/detect/mem/imc_latency/README.md
+++ b/source/tools/detect/mem/imc_latency/README.md
@@ -2,16 +2,52 @@
 
 基于PMU事件的DDR内存访问延迟，用于检查微架构层级是否存在内存竞争。
 
+## Usgae
+
+### 使用用例
+
+```bash
+Sample:
+
+imc_latency -f /dev/stdout #输出日子到控制台
+imc_latency -d 15 i 20  # 每15秒采集一次 输出20次采集结果
+```
+
+### 结果说明
+
+一次的采集结果如下，输出的的指标类型由read_latency(rlat)和write_latency(wlat)，指标的level有socket和channel两种级别。
+
+- SOCKET_LEVEL： socket层级的读写内存延迟，通过对channel级的指标求平均得到。
+- CHANNEL_LEVEL：channel级别的读写内存延迟
+  
+```bash
+[TIME-STAMP] 2023-07-10 07:06:17
+[SOCKET_LEVEL]
+               0       1
+    rlat   13.75   14.37
+    wlat   39.37   37.49
+[CHANNEL_LEVEL]-[SOCKET-0]
+               0       1       2       3       4       5       6       7       8       9      10      11
+    rlat   14.37   13.75    0.00   13.75   13.75    0.00   13.12   13.75    0.00   14.37   13.75    0.00
+    wlat   40.62   39.99    0.00   39.37   38.74    0.00   40.62   39.37    0.00   39.99   38.74    0.00
+[CHANNEL_LEVEL]-[SOCKET-1]
+               0       1       2       3       4       5       6       7       8       9      10      11
+    rlat   15.00   13.75    0.00   13.75   13.75    0.00   13.75   14.37    0.00   14.37   14.37    0.00
+    wlat   38.12   37.49    0.00   36.87   36.87    0.00   38.12   38.12    0.00   38.12   37.49    0.00
+```
+
 ## 原理与限制
 
 基于IMC的PMU组件实现，需要硬件支持。目前仅支持Intel的Ice Lake（ICX）、Sky Lake（SKX）、Cascade Lake以及Sapphire Rapids(SPR)等架构。
 
-| micro-architecture | code | cpu-model number |
-| ------------------ | ---- | ---------------- |
-| Sapphire Rapids    | SPR  | 143              |
-| Ice Lake           | ICX  | 106/108          |
-| Cascade Lake       |      | 106              |
-| Sky Lake-X         | SKX  | 85               |
+| 微架构          | 代号 | cpu-model编号 |
+| --------------- | ---- | ------------- |
+| Sapphire Rapids | SPR  | 143           |
+| Ice Lake        | ICX  | 106/108       |
+| Cascade Lake    |      | 106           |
+| Sky Lake-X      | SKX  | 85            |
+
+### 检查是否支持
 
 可以通过`lscpu`的`Model`字段检查硬件是否支持。
 
diff --git a/source/tools/detect/mem/imc_latency/imc_latency.c b/source/tools/detect/mem/imc_latency/imc_latency.c
index e3ef2687..204d1343 100644
--- a/source/tools/detect/mem/imc_latency/imc_latency.c
+++ b/source/tools/detect/mem/imc_latency/imc_latency.c
@@ -1,11 +1,15 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
-#include <unistd.h>
 #include <stdbool.h>
+#include <strings.h>
+#include <signal.h>
+#include <unistd.h>
 #include <memory.h>
 #include <errno.h>
-#include <strings.h>
+#include <argp.h>
+#include <time.h>
+#include <sys/stat.h>
 #include <sys/syscall.h>
 #include <linux/types.h>
 
@@ -13,6 +17,27 @@
 
 // #define DEBUG
 
+const char* argp_program_version = "imc_latency 0.1";
+const char argp_program_doc[] =
+    "Detect the memory latency based on IMC PMU.\n"
+    "\n"
+
+    "USAGE: imc_latency [--help] [-d DELAY] [-i ITERATION] [-f LOGFILE]\n"
+    "\n"
+
+    "EXAMPLES:\n"
+    "    imc_latency            # run forever, display the memory latency.\n"
+    "    imc_latency -f foo.log   # log to foo.log.\n";
+
+static const struct argp_option opts[] = {
+    {"delay", 'd', "DELAY", 0, "Sample peroid, default is 3 seconds"},
+    {"iter", 'i', "ITERATION", 0, "Output times, default run forever"},
+    {"logfile", 'f', "LOGFILE", 0,
+     "Logfile for result, default /var/log/sysak/imc_latency/imc_latency.log"},
+    {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"},
+    {},
+};
+
 struct Env {
     uint32_t max_cpuid;
     int32_t cpu_model;
@@ -24,44 +49,99 @@ struct Env {
     int64_t nr_core;
     int64_t nr_channel;
     int64_t* socket_ref_core;
-} env = {.vm = false};
-
-typedef struct event {
-    uint64_t rpq_occ;
-    uint64_t rpq_ins;
-    uint64_t wpq_occ;
-    uint64_t wpq_ins;
-    uint64_t dram_speed;
-} event;
-
-typedef struct channel_record {
-    uint64_t rpq_occ;
-    uint64_t rpq_ins;
-    uint64_t wpq_occ;
-    uint64_t wpq_ins;
-    double read_latency;
-    double write_latency;
-} channel_record;
-
-typedef struct socket_record {
-    channel_record* channel_record_arr;
-    uint64_t rpq_occ;
-    uint64_t rpq_ins;
-    uint64_t wpq_occ;
-    uint64_t wpq_ins;
-    double read_latency;
-    double write_latency;
-    uint64_t dram_clock;
-} socket_record;
-
-typedef struct record {
-    socket_record* socket_record_arr;
-} record;
+    int64_t nr_iter;
+    int64_t delay;
+} env = {.vm = false, .nr_iter = INT64_MAX, .delay = DEFAUlT_PEROID};
 
 record before, after;
-
 time_t before_ts = 0, after_ts = 0;
 imc_pmu* pmus = 0;
+char log_dir[FILE_PATH_LEN] = "/var/log/sysak/imc_latency";
+char default_log_path[FILE_PATH_LEN] =
+    "/var/log/sysak/imc_latency/imc_latency.log";
+char* log_path = 0;
+FILE* log_fp = 0;
+bool exiting = false;
+
+static void sigint_handler(int signo) { exiting = 1; }
+
+/* if out of range or no number found return nonzero */
+static int parse_long(const char* str, long* retval) {
+    int err = 0;
+    char* endptr;
+    errno = 0;
+    long val = strtol(str, &endptr, 10);
+
+    /* Check for various possible errors */
+    if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN)) ||
+        (errno != 0 && val == 0)) {
+        fprintf(stderr, "Failed parse val.\n");
+        err = errno;
+        return err;
+    }
+
+    if (endptr == str) return err = -1;
+    *retval = val;
+    return err;
+}
+
+static error_t parse_arg(int key, char* arg, struct argp_state* state) {
+    int err = 0;
+    long val;
+    switch (key) {
+        case 'h':
+            argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+            break;
+        case 'd':
+            err = parse_long(arg, &val);
+            if (err || val <= 0) {
+                fprintf(stderr, "Failed parse delay.\n");
+                argp_usage(state);
+            }
+
+            env.delay = val;
+            break;
+        case 'i':
+            err = parse_long(arg, &val);
+            if (err || val <= 0) {
+                fprintf(stderr, "Failed parse iteration-num.\n");
+                argp_usage(state);
+            }
+            env.nr_iter = val;
+            env.nr_iter++;
+            break;
+        case 'f':
+            log_path = arg;
+            break;
+        case ARGP_KEY_ARG:
+            break;
+        default:
+            return ARGP_ERR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+static int prepare_directory(char* path) {
+    int ret;
+
+    ret = mkdir(path, 0777);
+    if (ret < 0 && errno != EEXIST)
+        return errno;
+    else
+        return 0;
+}
+
+static FILE* open_logfile() {
+    FILE* f = 0;
+    if (!log_path) {
+        log_path = default_log_path;
+    }
+
+    f = fopen(log_path, "w");
+
+    return f;
+}
 
 int64_t read_sys_file(char* path, bool slient) {
     int64_t val;
@@ -573,13 +653,14 @@ static int init_env() {
 
     // init data
     init_data();
-
+#ifdef DEBUG
     fprintf(stderr, "nr_socket=%d nr_core=%d nr_cpu=%d nr_channel=%d \n",
             env.nr_socket, env.nr_core, env.nr_cpu, env.nr_channel);
     int i = 0;
     for (i = 0; i < env.nr_socket; i++) {
         fprintf(stderr, "socket%d-ref cpu=%d\n", i, env.socket_ref_core[i]);
     }
+#endif
 
 cleanup:
 
@@ -710,38 +791,77 @@ void print_record(record* rec) {
 }
 #endif
 
-static int collect_data() {
-    int32_t socket_id = 0, channel_id = 0, line_num = 0;
-    read_imc();
+static char* ts2str(time_t ts, char* buf, int size) {
+    struct tm* t = gmtime(&ts);
+    strftime(buf, size, "%Y-%m-%d %H:%M:%S", t);
+    return buf;
+}
+
+static void output_ts(FILE* dest) {
+    char stime_str[BUF_SIZE] = {0};
+    time_t now = time(0);
+    fprintf(dest, "[TIME-STAMP] %s\n", ts2str(now, stime_str, BUF_SIZE));
+}
+
+static void output_socket_lat(FILE* dest) {
+    int32_t socket_id = 0;
 
-    fprintf(stderr, "[SOCKET_LEVEL]\n");
-    fprintf(stderr, "%16s %16s %16s\n", "socket", "rlat", "wlat");
+    fprintf(dest, "%s\n", "[SOCKET_LEVEL]");
+    // fprintf(dest, "%8s%16s%16s\n", "socket", "rlat", "wlat");
+    fprintf(dest, "%8s", "");
 
     for (socket_id = 0; socket_id < env.nr_socket; socket_id++) {
-        char socket_name[32];
-        snprintf(socket_name, 32, "%d", socket_id);
+        fprintf(dest, "%8d", socket_id);
+    }
+    fprintf(dest, "\n");
+
+    fprintf(dest, "%8s", "rlat");
+    for (socket_id = 0; socket_id < env.nr_socket; socket_id++) {
         socket_record* srec = &after.socket_record_arr[socket_id];
-        fprintf(stderr, "%16s %16lf %16lf\n", socket_name, srec->read_latency,
-                srec->write_latency);
+        fprintf(dest, "%8.2lf", srec->read_latency);
     }
+    fprintf(dest, "\n");
 
+    fprintf(dest, "%8s", "wlat");
+    for (socket_id = 0; socket_id < env.nr_socket; socket_id++) {
+        socket_record* srec = &after.socket_record_arr[socket_id];
+        fprintf(dest, "%8.2lf", srec->write_latency);
+    }
+    fprintf(dest, "\n");
+}
+
+static void output_channel_lat(FILE* dest) {
+    int32_t socket_id = 0, channel_id = 0;
     for (socket_id = 0; socket_id < env.nr_socket; socket_id++) {
-        fprintf(stderr, "[CHANNEL_LEVEL-SOCKET%d]\n", socket_id);
         char socket_name[32];
         snprintf(socket_name, 32, "%d", socket_id);
 
         socket_record* srec = &after.socket_record_arr[socket_id];
 
-        fprintf(stderr, "%16s %16s %16s\n", "channel", "rlat", "wlat");
+        fprintf(dest, "[CHANNEL_LEVEL]-[SOCKET-%d]\n", socket_id);
+        fprintf(dest, "%8s", "");
+        for (channel_id = 0; channel_id < env.nr_channel; channel_id++) {
+            fprintf(dest, "%8d", channel_id);
+        }
+        fprintf(dest, "\n");
+
+        fprintf(dest, "%8s", "rlat");
+        for (channel_id = 0; channel_id < env.nr_channel; channel_id++) {
+            channel_record* crec = &srec->channel_record_arr[channel_id];
+            fprintf(dest, "%8.2lf", crec->read_latency);
+        }
+        fprintf(dest, "\n");
+
+        fprintf(dest, "%8s", "wlat");
         for (channel_id = 0; channel_id < env.nr_channel; channel_id++) {
             channel_record* crec = &srec->channel_record_arr[channel_id];
-            char channel_name[32];
-            snprintf(channel_name, 32, "%d", channel_id);
-            fprintf(stderr, "%16s %16s %16s\n", channel_name,
-                    crec->read_latency, crec->write_latency);
+            fprintf(dest, "%8.2lf", crec->write_latency);
         }
+        fprintf(dest, "\n");
     }
+}
 
+void swap_record() {
     /* swap data */
     socket_record* tmp = before.socket_record_arr;
     before.socket_record_arr = after.socket_record_arr;
@@ -753,17 +873,58 @@ static int collect_data() {
 
     /* reset before timestamp */
     before_ts = after_ts;
+}
 
-    return 0;
+static void output_split(FILE* dest) { fprintf(dest, "\n"); }
+static void collect_data() {
+    int32_t socket_id = 0, channel_id = 0, line_num = 0;
+    read_imc();
+
+    if (before_ts) {
+        output_ts(log_fp);
+        output_socket_lat(log_fp);
+        output_channel_lat(log_fp);
+        output_split(log_fp);
+        fflush(log_fp);
+    }
+
+    swap_record();
 }
 
-static clean_env(void) { free_data(); }
+static void clean_env(void) { free_data(); }
+
+int main(int argc, char** argv) {
+    int err;
+    /* parse args */
+    static const struct argp argp = {
+        .options = opts,
+        .parser = parse_arg,
+        .doc = argp_program_doc,
+    };
+
+    err = argp_parse(&argp, argc, argv, 0, 0, 0);
+    if (err) {
+        fprintf(stderr, "Failed parse args.\n");
+        return -1;
+    }
+
+    prepare_directory(log_dir);
+    log_fp = open_logfile();
+    if (!log_fp) {
+        fprintf(stderr, "Failed open log file.\n");
+        return -1;
+    }
+
+    if (signal(SIGINT, sigint_handler) == SIG_ERR) {
+        fprintf(stderr, "Failed set signal handler.\n");
+        return -errno;
+    }
 
-int main() {
     init_env();
-    while (1) {
-        sleep(1);
+
+    while (env.nr_iter-- && !exiting) {
         collect_data();
+        sleep(env.delay);
     }
 
     clean_env();
diff --git a/source/tools/detect/mem/imc_latency/imc_latency.h b/source/tools/detect/mem/imc_latency/imc_latency.h
index 6c8ccde5..1d9fddda 100644
--- a/source/tools/detect/mem/imc_latency/imc_latency.h
+++ b/source/tools/detect/mem/imc_latency/imc_latency.h
@@ -92,6 +92,8 @@ enum INTEL_CPU_MODEL {
 #define MAX_IMC_ID 100
 #define GENERAL_REG_NUM 4
 #define FIXED_REG_NUM 1
+#define FILE_PATH_LEN 256
+#define DEFAUlT_PEROID 3
 
 typedef struct imc_event_t {
     struct perf_event_attr attr;
@@ -119,4 +121,36 @@ struct topology_ent {
     int64_t socket_id;
 };
 
+typedef struct event {
+    uint64_t rpq_occ;
+    uint64_t rpq_ins;
+    uint64_t wpq_occ;
+    uint64_t wpq_ins;
+    uint64_t dram_speed;
+} event;
+
+typedef struct channel_record {
+    uint64_t rpq_occ;
+    uint64_t rpq_ins;
+    uint64_t wpq_occ;
+    uint64_t wpq_ins;
+    double read_latency;
+    double write_latency;
+} channel_record;
+
+typedef struct socket_record {
+    channel_record* channel_record_arr;
+    uint64_t rpq_occ;
+    uint64_t rpq_ins;
+    uint64_t wpq_occ;
+    uint64_t wpq_ins;
+    double read_latency;
+    double write_latency;
+    uint64_t dram_clock;
+} socket_record;
+
+typedef struct record {
+    socket_record* socket_record_arr;
+} record;
+
 #endif  // UNITY_SAMPLE_H
-- 
Gitee


From 4ab6f360ca1bf0ab0ec54b5783b80069ece36d5a Mon Sep 17 00:00:00 2001
From: = <wodemia@linux.alibaba.com>
Date: Mon, 10 Jul 2023 15:25:59 +0800
Subject: [PATCH 09/16] process unsupportde model

---
 .../detect/mem/imc_latency/imc_latency.c      | 37 +++----------------
 1 file changed, 6 insertions(+), 31 deletions(-)

diff --git a/source/tools/detect/mem/imc_latency/imc_latency.c b/source/tools/detect/mem/imc_latency/imc_latency.c
index 204d1343..e6f9340c 100644
--- a/source/tools/detect/mem/imc_latency/imc_latency.c
+++ b/source/tools/detect/mem/imc_latency/imc_latency.c
@@ -762,35 +762,6 @@ void read_imc() {
     }
 }
 
-#ifdef DEBUG
-void print_socket(socket_record* rec) {
-    fprintf(stderr,
-            "rpq_occ=%ld rpq_ins=%ld wpq_occ=%ld wpq_ins=%ld dram_clocks=%ld "
-            "r_latency=%lf w_latency=%lf\n",
-            rec->rpq_occ, rec->rpq_ins, rec->wpq_occ, rec->wpq_ins,
-            rec->dram_clock, rec->read_latency, rec->write_latency);
-}
-
-void print_channel(channel_record* rec) {
-    fprintf(stderr,
-            "rpq_occ=%ld rpq_ins=%ld wpq_occ=%ld wpq_ins=%ld r_latency = % lf "
-            "w_latency = % lf\n ",
-            rec->rpq_occ, rec->rpq_ins, rec->wpq_occ, rec->wpq_ins,
-            rec->read_latency, rec->write_latency);
-}
-
-void print_record(record* rec) {
-    int i = 0;
-    int j = 0;
-    for (i = 0; i < env.nr_socket; i++) {
-        print_socket(&rec->socket_record_arr[i]);
-        for (j = 0; j < env.nr_channel; j++) {
-            print_channel(&rec->socket_record_arr[i].channel_record_arr[j]);
-        }
-    }
-}
-#endif
-
 static char* ts2str(time_t ts, char* buf, int size) {
     struct tm* t = gmtime(&ts);
     strftime(buf, size, "%Y-%m-%d %H:%M:%S", t);
@@ -920,8 +891,12 @@ int main(int argc, char** argv) {
         return -errno;
     }
 
-    init_env();
-
+    err = init_env();
+    if (err) {
+        fprintf(stderr, "Init env error.\n");
+        return -1;
+    }
+    
     while (env.nr_iter-- && !exiting) {
         collect_data();
         sleep(env.delay);
-- 
Gitee


From c2ca38fc4e129bf9f2e4a6932a5c52859a0ca6a2 Mon Sep 17 00:00:00 2001
From: = <wodemia@linux.alibaba.com>
Date: Mon, 10 Jul 2023 17:45:44 +0800
Subject: [PATCH 10/16] imc_latency: fix readme.md

---
 source/tools/detect/mem/imc_latency/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/tools/detect/mem/imc_latency/README.md b/source/tools/detect/mem/imc_latency/README.md
index fd669fb1..90c46acd 100644
--- a/source/tools/detect/mem/imc_latency/README.md
+++ b/source/tools/detect/mem/imc_latency/README.md
@@ -10,7 +10,7 @@
 Sample:
 
 imc_latency -f /dev/stdout #输出日子到控制台
-imc_latency -d 15 i 20  # 每15秒采集一次 输出20次采集结果
+imc_latency -d 15 -i 20  # 每15秒采集一次 输出20次采集结果
 ```
 
 ### 结果说明
@@ -38,7 +38,7 @@ imc_latency -d 15 i 20  # 每15秒采集一次 输出20次采集结果
 
 ## 原理与限制
 
-基于IMC的PMU组件实现，需要硬件支持。目前仅支持Intel的Ice Lake（ICX）、Sky Lake（SKX）、Cascade Lake以及Sapphire Rapids(SPR)等架构。
+基于IMC的PMU组件实现，需要硬件支持。目前仅支持Intel的Ice Lake（ICX）、Sky Lake（SKX）、Cascade Lake以及Sapphire Rapids(SPR)等微架构。
 
 | 微架构          | 代号 | cpu-model编号 |
 | --------------- | ---- | ------------- |
@@ -49,7 +49,7 @@ imc_latency -d 15 i 20  # 每15秒采集一次 输出20次采集结果
 
 ### 检查是否支持
 
-可以通过`lscpu`的`Model`字段检查硬件是否支持。
+可以使用`lscpu`命令，通过CPU的`Model`字段查看微架构类型，检查硬件是否支持。
 
 ```bash
 Architecture:        x86_64
-- 
Gitee


From 7c82e5cb8027d3fa67cb10dec9ebb3ec88de35ea Mon Sep 17 00:00:00 2001
From: = <wodemia@linux.alibaba.com>
Date: Tue, 11 Jul 2023 16:32:36 +0800
Subject: [PATCH 11/16] imc_latency: rename macro

---
 source/tools/detect/mem/imc_latency/imc_latency.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/tools/detect/mem/imc_latency/imc_latency.h b/source/tools/detect/mem/imc_latency/imc_latency.h
index 1d9fddda..d2b6387e 100644
--- a/source/tools/detect/mem/imc_latency/imc_latency.h
+++ b/source/tools/detect/mem/imc_latency/imc_latency.h
@@ -1,5 +1,5 @@
-#ifndef UNITY_SAMPLE_H
-#define UNITY_SAMPLE_H
+#ifndef IMC_LATENCY_H
+#define IMC_LATENCY_H
 
 #include <linux/types.h>
 #include <stdbool.h>
@@ -153,4 +153,4 @@ typedef struct record {
     socket_record* socket_record_arr;
 } record;
 
-#endif  // UNITY_SAMPLE_H
+#endif
-- 
Gitee


From 1e27b878cd5e55b4581aaea2f82b4401dfbdf11b Mon Sep 17 00:00:00 2001
From: jietaoxiao <jietaoxiao@linux.alibaba.com>
Date: Thu, 13 Jul 2023 09:37:03 +0800
Subject: [PATCH 12/16] memcgoffline:  add offline memcg dectect tool to
 /tools/detect/mem

---
 source/lib/uapi/Makefile                      |   3 +-
 source/lib/uapi/include/kcore_utils.h         |  70 +++++
 source/lib/uapi/kcore_utils.c                 | 295 ++++++++++++++++++
 source/tools/detect/mem/memcgoffline/Makefile |   7 +
 .../mem/memcgoffline/include/btfparse.h       |  39 +++
 .../mem/memcgoffline/include/memcg_iter.h     |  36 +++
 .../detect/mem/memcgoffline/memcg_iter.c      | 291 +++++++++++++++++
 .../detect/mem/memcgoffline/memcgoffline.c    | 187 +++++++++++
 8 files changed, 927 insertions(+), 1 deletion(-)
 create mode 100644 source/lib/uapi/include/kcore_utils.h
 create mode 100644 source/lib/uapi/kcore_utils.c
 create mode 100644 source/tools/detect/mem/memcgoffline/Makefile
 create mode 100644 source/tools/detect/mem/memcgoffline/include/btfparse.h
 create mode 100644 source/tools/detect/mem/memcgoffline/include/memcg_iter.h
 create mode 100644 source/tools/detect/mem/memcgoffline/memcg_iter.c
 create mode 100644 source/tools/detect/mem/memcgoffline/memcgoffline.c

diff --git a/source/lib/uapi/Makefile b/source/lib/uapi/Makefile
index b92c5bdc..d267d7a7 100644
--- a/source/lib/uapi/Makefile
+++ b/source/lib/uapi/Makefile
@@ -1,6 +1,7 @@
 SOURCE := $(shell find . -name "*.c")
 OBJS :=$(patsubst %.c,%.o,$(SOURCE))
 STATIC_OBJS := $(addprefix $(OBJPATH)/,$(OBJS))
+LIBS += -L /usr/lib64 -l:libelf.a
 
 libsysak: $(OBJ_LIB_PATH)/libsysak.a
 
@@ -10,6 +11,6 @@ $(OBJ_LIB_PATH)/libsysak.a: $(STATIC_OBJS)
 $(STATIC_OBJS): $(OBJS)
 
 $(OBJS): %.o : %.c
-	gcc -c -o $(OBJPATH)/$@ $< -I$(SRC)/lib/uapi/include
+	gcc -c -o $(OBJPATH)/$@ $< -I$(SRC)/lib/uapi/include $(LIBS)
 
 
diff --git a/source/lib/uapi/include/kcore_utils.h b/source/lib/uapi/include/kcore_utils.h
new file mode 100644
index 00000000..a877ed9c
--- /dev/null
+++ b/source/lib/uapi/include/kcore_utils.h
@@ -0,0 +1,70 @@
+#ifndef __KCORE_UTILS_H
+#define __KCORE_UTLIS_H 
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <elf.h>
+
+#define BUFF_MAX		4096
+#define MAX_KCORE_ELF_HEADER_SIZE   32768
+
+#ifdef DEBUG
+#define LOG_DEBUG(...)	fprintf(stderr, __VA_ARGS__)
+#else
+#define LOG_DEBUG(...)	do { } while (0)
+#endif /* DEBUG */
+
+#define LOG_INFO(...)	fprintf(stdout, __VA_ARGS__)
+#define LOG_WARN(...)	fprintf(stderr, __VA_ARGS__)
+#define LOG_ERROR(...)	fprintf(stderr, __VA_ARGS__)
+
+#define MIN(a,b)       (((a)<(b))?(a):(b))
+#define MAX(a,b)       (((a)>(b))?(a):(b))
+
+/* struct to record the kcore elf file data*/
+struct proc_kcore_data {
+	unsigned int flags;
+	unsigned int segments;
+	char *elf_header;
+	size_t header_size;
+	Elf64_Phdr *load64;
+	Elf64_Phdr *notes64;
+	Elf32_Phdr *load32;
+	Elf32_Phdr *notes32;
+	void *vmcoreinfo;
+	unsigned int size_vmcoreinfo;
+};
+
+
+/**
+ * lookup_kernel_symbol - look up kernel symbol address from /proc/kallsyms
+ * 
+ * @symbol_name: kernel symbol name to look up.
+ * @return: the address of the kernel symbol. 
+ * 
+ */
+uintptr_t lookup_kernel_symbol(const char *symbol_name);
+
+/* prepare_btf_file - check exist btf file, if not exist, download it */
+char *prepare_btf_file();
+
+/* open /proc/kcore and read necessary data to interpret kcore */
+int kcore_init();
+
+/* close /proc/kcore and do some cleanup */
+void kcore_uninit();
+
+/**
+ * kcore_readmem - read data of certain kernel address from kcore
+ * 
+ * @kvaddr: kernel address to read.
+ * @buf: buf for readed data.
+ * @size: size of the data to read. 
+ * @return: size of the data beeing read if success.
+ * 
+ * Note: must call after kcore_init()
+ */
+ssize_t kcore_readmem(unsigned long kvaddr, void *buf, ssize_t size);
+
+
+#endif
\ No newline at end of file
diff --git a/source/lib/uapi/kcore_utils.c b/source/lib/uapi/kcore_utils.c
new file mode 100644
index 00000000..e58b45c2
--- /dev/null
+++ b/source/lib/uapi/kcore_utils.c
@@ -0,0 +1,295 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <fcntl.h>
+
+#include "kcore_utils.h"
+
+#define LEN             (128)
+
+static struct proc_kcore_data proc_kcore_data = { 0 };
+static struct proc_kcore_data *pkd = &proc_kcore_data;
+
+static int kcore_fd = 0;
+
+/*
+ * Routines of kcore, i.e., /proc/kcore
+ */
+uintptr_t lookup_kernel_symbol(const char *symbol_name)
+{
+	const char *kallsyms_file = "/proc/kallsyms";
+	FILE *fp;
+	char line[BUFF_MAX];
+	char *pos;
+	uintptr_t addr = -1UL;
+
+	fp = fopen(kallsyms_file, "r");
+	if (fp == NULL) {
+		perror("fopen: /proc/kallsyms");
+		return -1;
+	}
+
+	while (fgets(line, BUFF_MAX, fp)) {
+		if ((pos = strstr(line, symbol_name)) == NULL)
+			continue;
+
+		/* Remove trailing newline */
+		line[strcspn(line, "\n")] = '\0';
+
+		/* Exact match */
+		if (pos == line || !isspace(*(pos - 1)))
+			continue;
+		if (!strcmp(pos, symbol_name)) {
+			addr = strtoul(line, NULL, 16);
+			break;
+		}
+	}
+
+	if (addr == -1UL)
+		LOG_ERROR("failed to lookup symbol: %s\n", symbol_name);
+
+	fclose(fp);
+	return addr;
+}
+
+static int kcore_elf_init()
+{
+	Elf64_Ehdr *elf64;
+	Elf64_Phdr *load64;
+	Elf64_Phdr *notes64;
+	char eheader[MAX_KCORE_ELF_HEADER_SIZE];
+	size_t load_size, notes_size;
+
+	if (read(kcore_fd, eheader, MAX_KCORE_ELF_HEADER_SIZE) !=
+			MAX_KCORE_ELF_HEADER_SIZE) {
+		perror("read: /proc/kcore ELF header");
+		return -1;
+	}
+
+	elf64 = (Elf64_Ehdr *)&eheader[0];
+	notes64 = (Elf64_Phdr *)&eheader[sizeof(Elf64_Ehdr)];
+	load64 = (Elf64_Phdr *)&eheader[sizeof(Elf64_Ehdr) +
+					sizeof(Elf64_Phdr)];
+
+	pkd->segments = elf64->e_phnum - 1;
+
+	notes_size = load_size = 0;
+	if (notes64->p_type == PT_NOTE)
+		notes_size = notes64->p_offset + notes64->p_filesz;
+	if (notes64->p_type == PT_LOAD)
+		load_size = (unsigned long)(load64+(elf64->e_phnum)) -
+				(unsigned long)elf64;
+
+	pkd->header_size = MAX(notes_size, load_size);
+	if (!pkd->header_size)
+		pkd->header_size = MAX_KCORE_ELF_HEADER_SIZE;
+
+	if ((pkd->elf_header = (char *)malloc(pkd->header_size)) == NULL) {
+		perror("malloc: /proc/kcore ELF header");
+		return -1;
+	}
+
+	memcpy(&pkd->elf_header[0], &eheader[0], pkd->header_size);
+	pkd->notes64 = (Elf64_Phdr *)&pkd->elf_header[sizeof(Elf64_Ehdr)];
+	pkd->load64 = (Elf64_Phdr *)&pkd->elf_header[sizeof(Elf64_Ehdr) +
+						     sizeof(Elf64_Phdr)];
+
+	return 0;
+}
+
+int kcore_init() 
+{
+    if ((kcore_fd = open("/proc/kcore", O_RDONLY)) < 0) {
+		perror("open: /proc/kcore");
+		return -1;
+	}
+
+    if (kcore_elf_init())
+		goto failed;
+
+    return 0;
+    
+failed:
+	close(kcore_fd);
+	return -1;
+}
+
+void kcore_uninit(void)
+{
+	if (pkd->elf_header)
+		free(pkd->elf_header);
+	if (kcore_fd > 0)
+		close(kcore_fd);
+}
+
+/*
+ * We may accidentally access invalid pfns on some kernels
+ * like 4.9, due to known bugs. Just skip it.
+ */
+ssize_t kcore_readmem(unsigned long kvaddr, void *buf, ssize_t size)
+{
+	Elf64_Phdr *lp64;
+	unsigned long offset = -1UL;
+	ssize_t read_size;
+	unsigned int i;
+
+	for (i = 0; i < pkd->segments; i++) {
+		lp64 = pkd->load64 + i;
+		if ((kvaddr >= lp64->p_vaddr) &&
+			(kvaddr < (lp64->p_vaddr + lp64->p_memsz))) {
+			offset = (off_t)(kvaddr - lp64->p_vaddr) +
+					(off_t)lp64->p_offset;
+			break;
+		}
+	}
+	if (i == pkd->segments) {
+		for (i = 0; i < pkd->segments; i++) {
+			lp64 = pkd->load64 + i;
+			LOG_DEBUG("%2d: [0x%lx, 0x%lx)\n", i, lp64->p_vaddr,
+					lp64->p_vaddr + lp64->p_memsz);
+		}
+		//printf("invalid kvaddr 0x%lx\n", kvaddr);
+		goto failed;
+	}
+
+	if (lseek(kcore_fd, offset, SEEK_SET) < 0) {
+		perror("lseek: /proc/kcore");
+		goto failed;
+	}
+
+	read_size = read(kcore_fd, buf, size);
+	if (read_size < size) {
+		perror("read: /proc/kcore");
+		goto failed;
+	}
+
+	return read_size;
+
+failed:
+	return -1;
+}
+
+static void stripWhiteSpace(char *str)
+{   
+    char tmp_str[strlen(str)];
+    int i, j = 0;
+
+    for (i = 0; str[i] != '\0'; i++) {
+        if (str[i] != ' ' && str[i] != '\t' 
+                && str[i] != '\n') {
+            tmp_str[j] = str[i];
+            j++;
+        }
+    }
+
+    tmp_str[j] = '\0';
+    strcpy(str, tmp_str);
+
+    return;
+}
+
+static int do_cmd(const char *cmd, char *result, int len)
+{
+    FILE *res;
+    char region[LEN] = {0};
+    char *str;
+
+    res = popen(cmd, "r");
+    if (res == NULL) {
+        printf("get region id failed\n");
+        return -1;
+    }
+
+    if (feof(res)) {
+        printf("cmd line end\n");
+        return 0;
+    }
+    fgets(region, sizeof(region)-1, res);
+    str = region;
+    stripWhiteSpace(str);
+    /* skip \n */
+    strncpy(result, str, len - 1);
+    result[len - 1] = '\0';
+    pclose(res);
+    return 0;
+}
+
+static int download_btf()
+{
+    char region[LEN] = {0};
+    char arch[LEN] = {0};
+    char kernel[LEN] = {0};
+    char dw[LEN+LEN] = {0};
+    char timeout[LEN] = "-internal";
+    char sysak_path[LEN] = "/boot";
+    char *curl_cmd = "curl -s --connect-timeout 2 http://100.100.100.200/latest/meta-data/region-id 2>&1";
+    char *arch_cmd = "uname -m";
+    char *kernel_cmd = "uname -r";
+    char *tmp;
+
+    do_cmd(curl_cmd, region, LEN);
+    if (!strstr(region,"cn-")) {
+        strcpy(region, "cn-hangzhou");
+        memset(timeout, 0, sizeof(timeout));
+    }
+
+    do_cmd(arch_cmd, arch, LEN);
+
+    do_cmd(kernel_cmd, kernel, LEN);
+
+    if((tmp = getenv("SYSAK_WORK_PATH")) != NULL)
+    {
+        memset(sysak_path, 0, sizeof(sysak_path));
+        strcpy(sysak_path, tmp);
+        strcat(sysak_path, "/tools/");
+        strcat(sysak_path, kernel);
+    }
+
+    snprintf(dw, LEN + LEN + LEN, "wget -T 5 -t 2 -q -O %s/vmlinux-%s https://sysom-cn-%s.oss-cn-%s%s.aliyuncs.com/home/hive/btf/%s/vmlinux-%s", sysak_path, kernel, &region[3],&region[3], timeout,arch, kernel);
+
+    do_cmd(dw, kernel, LEN);
+    return 0;
+}
+
+static int check_btf_file(char *btf)
+{
+    struct stat fstat;
+    int ret = 0;
+
+    ret = stat(btf, &fstat);
+    if (ret)
+        return -1;
+    if (fstat.st_size < 10*1024)
+        return -1;
+
+    return 0;
+}
+
+char *prepare_btf_file()
+{
+    static char btf[LEN] = {0};
+    char ver[LEN] = {0};
+    char *cmd = "uname -r";
+
+    do_cmd(cmd, ver, LEN);
+
+    if (getenv("SYSAK_WORK_PATH") != NULL)
+        sprintf(btf,"%s/tools/%s/vmlinux-%s", getenv("SYSAK_WORK_PATH"), ver, ver);
+    else
+        sprintf(btf,"/boot/vmlinux-%s", ver);
+
+    if (check_btf_file(btf)) {
+        download_btf();
+    };
+
+    if (check_btf_file(btf)) {
+        LOG_ERROR("btf file:%s not found \n", btf);
+        return NULL;
+    }
+
+    return btf;
+}
\ No newline at end of file
diff --git a/source/tools/detect/mem/memcgoffline/Makefile b/source/tools/detect/mem/memcgoffline/Makefile
new file mode 100644
index 00000000..de3835b5
--- /dev/null
+++ b/source/tools/detect/mem/memcgoffline/Makefile
@@ -0,0 +1,7 @@
+target := memcgoffline
+LIBS += -L ${OBJ_LIB_PATH}/lib -l:libcoolbpf.a -l:libsysak.a -lelf -lz
+INCLUDES += -I$(SRC)/tools/detect/mem/memcgoffline/include
+LDFLAGS += -Wall $(LIBS)
+mods := memcg_iter.o memcgoffline.o
+
+include $(SRC)/mk/csrc.mk
\ No newline at end of file
diff --git a/source/tools/detect/mem/memcgoffline/include/btfparse.h b/source/tools/detect/mem/memcgoffline/include/btfparse.h
new file mode 100644
index 00000000..84204f54
--- /dev/null
+++ b/source/tools/detect/mem/memcgoffline/include/btfparse.h
@@ -0,0 +1,39 @@
+
+
+#ifndef __BTF_PARSE_H
+#define __BTF_PARSE_H
+
+
+
+/**
+ * btf_load: load btf from btf_custom_path
+ * 
+ * @btf_custom_path: path of btf file
+ */
+struct btf *btf_load(char *btf_custom_path);
+typedef unsigned int uint32_t;
+
+struct member_attribute
+{
+    uint32_t size;      // size of structure's member
+    uint32_t real_size; // real_size mean 
+    uint32_t offset;    // offset of member in strucutre
+};
+
+/**
+ * btf_find_struct_member - find struct btfid by structure's name
+ * 
+ * @btf: 
+ * @struct_name: name of struct
+ * @member_name: name of structure's member
+ * @return: NULL mean error, get error number from errno. 
+ * 
+ * Note: Remember to free pointer of struct member_attribute
+ */
+struct member_attribute *btf_find_struct_member(struct btf *btf, char *struct_name, char *member_name);
+
+int btf_get_member_offset(struct btf *btf, char *name, char *member_name);
+void btf__free(struct btf *btf);
+
+#endif
+
diff --git a/source/tools/detect/mem/memcgoffline/include/memcg_iter.h b/source/tools/detect/mem/memcgoffline/include/memcg_iter.h
new file mode 100644
index 00000000..300a82b0
--- /dev/null
+++ b/source/tools/detect/mem/memcgoffline/include/memcg_iter.h
@@ -0,0 +1,36 @@
+#ifndef __MEMCG_ITER_H_
+#define __MEMCG_ITER_H_
+
+#include "btfparse.h"
+
+#define PATH_MAX        (2048)
+#define LEN             (255)
+#define CSS_DYING       (1 << 4)     /* css is dying */
+
+/* iterator function of "for_each_mem_cgroup" */
+unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev,
+                struct btf* handle);
+
+/* find out and set root_mem_cgroup from kallsyms*/
+int memcg_iter_init();
+
+/* Iter all memory cgroups, must call after memcg_iter_init() */
+#define for_each_mem_cgroup(iter, start, btf)           \
+    for (iter = _mem_cgroup_iter(start, (unsigned long)NULL, btf);  \
+         iter != (unsigned long)NULL;              \
+         iter = _mem_cgroup_iter(start, iter, btf))
+
+/* 
+ * get member offset of certain struct, need to read from btf file,
+ * (don't call it in loop which may cause huge overhead)
+ */
+struct member_attribute *get_offset_no_cache(char *struct_name, 
+                            char *member_name, struct btf *handle);
+
+int get_member_offset(char *struct_name, char *member_name, 
+        struct btf *handle);
+
+void memcg_get_name(unsigned long memcg, char *name,
+                int len, struct btf *btf_handle);
+
+#endif
\ No newline at end of file
diff --git a/source/tools/detect/mem/memcgoffline/memcg_iter.c b/source/tools/detect/mem/memcgoffline/memcg_iter.c
new file mode 100644
index 00000000..541d0367
--- /dev/null
+++ b/source/tools/detect/mem/memcgoffline/memcg_iter.c
@@ -0,0 +1,291 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "memcg_iter.h"
+#include "kcore_utils.h"
+
+static unsigned long root_mem_cgroup;
+
+struct member_attribute *get_offset_no_cache(char *struct_name, 
+                            char *member_name, struct btf *handle)
+{
+    struct member_attribute *att;
+
+    att = btf_find_struct_member(handle, struct_name, member_name);
+    if (!att) {
+        return NULL;
+    }
+
+    att->offset = att->offset/8;
+         
+    return att;
+}
+
+int get_member_offset(char *struct_name, char *member_name, struct btf *handle)
+{
+    char prefix[LEN] = "struct ";
+    
+    strcat(prefix, struct_name);
+
+    return btf_get_member_offset(handle, prefix, member_name)/8;
+}
+
+static unsigned long _css_next_child(unsigned long pos, unsigned long parent,
+                        struct btf *btf_handle)
+{
+    struct member_attribute *att, *att2;
+    unsigned long next;
+
+    att = get_offset_no_cache("cgroup_subsys_state", "sibling", btf_handle);
+    if (!att)
+        return 0;
+
+    att2 = get_offset_no_cache("cgroup_subsys_state", "children", btf_handle);
+    if (!att2)
+        return 0;
+
+    if(!pos) {
+        kcore_readmem(parent + att2->offset, &next, sizeof(next));
+        next = next - att->offset;
+    } else {
+        kcore_readmem(pos + att->offset, &next, sizeof(next));
+        next = next - att->offset;
+    }
+
+    if(next + att->offset != parent + att2->offset)
+        return next;
+
+    return 0;
+}
+
+unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev,
+                struct btf *btf_handle)
+{
+    struct member_attribute *att, *att2;
+    unsigned long css, root_css;
+    unsigned long memcg = 0;
+    unsigned long pos = 0;
+    unsigned long next = 0;
+    unsigned long tmp1, tmp2;
+
+    if(!root)
+        root = root_mem_cgroup; 
+    if(!prev)
+        return root;
+    
+    //printf("root:%lx, prev:%lx\n", root, prev);
+
+    att = get_offset_no_cache("mem_cgroup", "css", btf_handle);
+    if (!att)
+        return 0;
+
+    att2 = get_offset_no_cache("cgroup_subsys_state", "parent", btf_handle);
+    if (!att2)
+        return 0;
+
+    pos = prev;
+    //kcore_readmem(pos + att->offset, &css, sizeof(css));
+    css = pos + att->offset;
+
+    //kcore_readmem(root+att->offset, &root_css, sizeof(root_css));
+    root_css = root + att->offset;
+    next = _css_next_child(0, css, btf_handle);
+    if(!next)
+    {
+        tmp1 = css;
+        while(tmp1 != root_css)
+        {
+            kcore_readmem(tmp1 + att2->offset, &tmp2, sizeof(tmp2));
+            next = _css_next_child(tmp1, tmp2, btf_handle);
+            if(next)
+                break;
+            tmp1 = tmp2;
+        }
+    }
+
+    if(!next)
+        return 0;
+
+    memcg = next - att->offset;
+
+    return memcg;
+}
+
+int memcg_iter_init()
+{
+    unsigned long tmp;
+    size_t size;
+
+    tmp = lookup_kernel_symbol("root_mem_cgroup");
+    if (tmp == (unsigned long )-1) {
+        LOG_ERROR("unable to get root_mem_cgroup\n");
+        return -1;
+    } else {
+        size = kcore_readmem(tmp, &root_mem_cgroup, 8);
+        if (size < 8) {
+            LOG_ERROR("get incorrect address where root_mem_cgroup point to\n");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int prepend(char **buffer, int *buflen, const char *str, 
+            int namelen, int off)
+{
+    *buflen -= namelen + off;
+    if (*buflen < 0)
+        return -1; 
+    *buffer -= namelen + off;
+    if (off)
+        **buffer = '/';
+    memcpy(*buffer + off, str, namelen);
+    return 0;
+}
+
+static int cgroup_path(unsigned long cgrp, char *buf, 
+            int buflen, struct btf *btf_handle)
+{
+    int ret  = -1;
+    char *start;
+    unsigned long cgp;
+    char tmpname[PATH_MAX];
+    struct member_attribute *cg_pa_att, *cg_name_att;
+    struct member_attribute *cgn_name_attr;
+
+    cg_pa_att = get_offset_no_cache("cgroup", "parent", btf_handle);
+    if (!cg_pa_att)
+        return -1;
+    
+    cg_name_att = get_offset_no_cache("cgroup", "name", btf_handle);
+    if (!cg_name_att)
+        return -1;
+
+    cgn_name_attr = get_offset_no_cache("cgroup_name", "name", btf_handle);
+    if (!cgn_name_attr)
+        return -1;
+    
+
+    kcore_readmem(cgrp + cg_pa_att->offset, &cgp, sizeof(cgp));
+    if (!cgp) {
+        if (strncpy(buf, "/", buflen) == NULL)
+            return -1;
+        return 0;
+    }
+
+    start = buf + buflen - 1;
+    *start = '\0';
+
+    do {
+        int len;
+        unsigned long name;
+
+        kcore_readmem(cgrp + cg_name_att->offset, &name, sizeof(name));
+
+        name += cgn_name_attr->offset;
+        kcore_readmem(name, tmpname,sizeof(tmpname));
+
+        len = strlen(tmpname);
+        if ((start -= len) < buf)
+            goto out;
+
+        memcpy(start, tmpname, len);
+
+        if (--start < buf)
+            goto out;
+        
+        *start = '/';
+        cgrp = cgp;
+
+        kcore_readmem(cgp + cg_pa_att->offset, &cgp, sizeof(cgp));
+
+    } while (cgp);
+
+    ret = 0;
+    memmove(buf, start, buf + buflen - start);
+out:
+    return ret;
+}
+
+void memcg_get_name(unsigned long memcg, char *name,
+                int len, struct btf *btf_handle)
+{
+    char *end;
+    int pos;
+    unsigned long cg, knname;
+    char subname[257];
+    struct member_attribute *att;
+
+    memset(subname, 0, sizeof(subname));
+    att = get_offset_no_cache("mem_cgroup", "css", btf_handle);
+    if (!att)
+        return;
+    
+    cg = memcg + att->offset;
+
+    att = get_offset_no_cache("cgroup_subsys_state", "cgroup", btf_handle);
+    if (!att)
+        return;
+
+    kcore_readmem(cg + att->offset, &cg, sizeof(cg));
+
+#ifdef LINUX_310
+    if (!cg)
+        return;
+    cgroup_path(cg, name, PATH_MAX);
+    end = name+strlen("/sys/fs/cgroup/memory/");
+    memmove(end, name, strlen(name)+1);
+    prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0);
+#else
+    unsigned long kn;
+    unsigned long pkn;
+    int kn_name_offset, kn_pa_offset;
+
+    att = get_offset_no_cache("cgroup", "kn", btf_handle);
+    if (!att)
+        return;
+
+    kcore_readmem(cg + att->offset, &kn, sizeof(kn));
+
+    if (!cg || !kn)
+        return;
+
+    end = name + len - 1;
+    prepend(&end, &len, "\0", 1, 0);
+    pkn = kn;
+
+    kn_name_offset = get_member_offset("kernfs_node", "name", btf_handle);
+    if (kn_name_offset < 0)
+        return;
+       
+    kn_pa_offset = get_member_offset("kernfs_node", "parent", btf_handle);
+    if (kn_pa_offset < 0)
+        return;
+
+    while (pkn) {
+        kcore_readmem(pkn + kn_name_offset, &knname, sizeof(knname));
+        kcore_readmem(knname, subname, sizeof(subname));
+
+        pos = prepend(&end, &len, subname, strlen(subname), 0);
+        if (pos)
+            break;
+
+        kcore_readmem(pkn + kn_pa_offset, &kn, sizeof(kn));
+        if ((pkn == kn) || !kn)
+            break;
+        pos = prepend(&end, &len, "/", 1, 0);
+        if (pos)
+            break;
+        pkn = kn;
+    }
+
+    prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0);
+
+    memmove(name, end, strlen(end) + 1);
+#endif
+}
\ No newline at end of file
diff --git a/source/tools/detect/mem/memcgoffline/memcgoffline.c b/source/tools/detect/mem/memcgoffline/memcgoffline.c
new file mode 100644
index 00000000..16fe17db
--- /dev/null
+++ b/source/tools/detect/mem/memcgoffline/memcgoffline.c
@@ -0,0 +1,187 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <getopt.h>
+#include <sys/types.h>
+
+#include "kcore_utils.h"
+#include "memcg_iter.h"
+
+static struct btf *btf_handle = NULL;
+int total_memcg_num = 0;
+
+struct environment {
+	int print_cg_num;                 /* unused */
+} env = {
+	.print_cg_num = 10000,
+};
+
+static int caculate_offline(unsigned long start_memcg)
+{   
+    int offline_num = 0;
+    unsigned long css, css_flags, cnt, iter = 0;
+    long refcnt_value;
+    unsigned int flags_value;
+    char fileName[PATH_MAX];
+    struct member_attribute *css_attr, *css_flag_attr, *refcnt_attr;
+    struct member_attribute *cnt_attr;
+
+    css_attr = get_offset_no_cache("mem_cgroup", "css", btf_handle);
+    if (!css_attr) {
+        LOG_ERROR("get css offset of mem_cgroup failed!\n");
+        return -1;
+    }
+
+    css_flag_attr = get_offset_no_cache("cgroup_subsys_state", 
+                        "flags", btf_handle);
+    if (!css_flag_attr) {
+        LOG_ERROR("get flags offset of cgroup_subsys_state failed!\n");
+        return -1;
+    }
+
+    refcnt_attr = get_offset_no_cache("cgroup_subsys_state", 
+                    "refcnt", btf_handle);
+    if (!refcnt_attr) {
+        LOG_ERROR("get refcnt offset of cgroup_subsys_state failed!\n");
+        return -1;
+    }
+
+    cnt_attr = get_offset_no_cache("percpu_ref", "count", btf_handle);
+    if (!cnt_attr) {
+        LOG_ERROR("get cnt offset of percpu_ref failed!\n");
+        return -1;
+    }
+    
+    for_each_mem_cgroup(iter, start_memcg, btf_handle) {
+        css = iter + css_attr->offset;
+        css_flags = css + css_flag_attr->offset;
+
+        kcore_readmem(css_flags, &flags_value, sizeof(flags_value));
+
+        if (flags_value & CSS_DYING) {
+            cnt = css + refcnt_attr->offset + cnt_attr->offset;
+            
+            offline_num++;
+            kcore_readmem(cnt, &refcnt_value, sizeof(refcnt_value));
+            
+            if (env.print_cg_num > 0) {
+                memcg_get_name(iter, fileName, PATH_MAX, btf_handle);
+                printf("cgroup path:%s\trefcount=%ld\n", fileName, refcnt_value);
+                env.print_cg_num--;
+            }
+        }
+        total_memcg_num++;
+    }
+
+    return offline_num;
+}
+
+static void show_usage(char *prog)
+{
+	const char *str =
+	"   Usage: %s [OPTIONS]\n"
+	"   Options:\n"
+	"   -n PRINT_MAX_CG_NUM   Max offline memcg paths to printf(default 10000)\n"
+    "   -h HELP               help\n"
+    "   \n"
+
+    "   EXAMPLE:\n "
+    "   memcgoffline        # display number of offline memcg and all their paths.\n"
+    "   memcgoffline -n 10  # display number of offline memcg and "
+    "10 of offline memcg paths.\n"
+	;
+
+	fprintf(stderr, str, prog);
+	exit(EXIT_FAILURE);
+}
+
+static int parse_args(int argc, char **argv, struct environment *env)
+{
+	int c, option_index;
+    char *prog_name = "memcgoffline";
+
+	for (;;) {
+		c = getopt_long(argc, argv, "n:h", NULL, &option_index);
+        if (c == -1)
+            break;
+
+		switch (c) {
+			case 'n':
+				env->print_cg_num = (int)strtol(optarg, NULL, 10);
+                if (!errno)
+                    return -errno;
+				break;
+			case 'h':
+				show_usage(prog_name);	/* would exit */
+				break;
+			default:
+				show_usage(prog_name);
+		}
+	}
+
+    return 0;
+}
+
+struct btf *btf_init()
+{
+    char *btf_path;
+
+    btf_path = prepare_btf_file();
+    if (!btf_path)
+        return NULL;
+    
+    return btf_load(btf_path);
+}
+
+void btf_uninit(struct btf *btf)
+{
+    return btf__free(btf);
+}
+
+int main(int argc, char *argp[])
+{
+	int offline_memcg = 0, ret = 0;
+
+	ret = parse_args(argc, argp, &env);
+    if (ret) {
+        LOG_ERROR("parse arg error!\n");
+        return -1;
+    }
+
+    btf_handle = btf_init();
+    if (!btf_handle) {
+        LOG_ERROR("btf init failed!\n");
+        return -1;
+    }
+
+    ret = kcore_init();
+    if (ret) {
+        LOG_ERROR("kcore init failed!\n");
+        goto uninit_btf;
+    }
+
+    ret = memcg_iter_init();
+    if (ret) {
+        LOG_ERROR("memcg_iter_init failed!\n");
+        goto uninit_kcore;
+    }
+
+    offline_memcg = caculate_offline((unsigned long)NULL);
+    if (offline_memcg < 0) {
+        LOG_ERROR("caculate offline memcg failed!\n");
+        ret = offline_memcg;
+        goto uninit_kcore;
+    }
+    printf("Offline memory cgroup num: %d\n", offline_memcg);
+    printf("Total memory cgroup num: %d\n", total_memcg_num);
+
+uninit_kcore:
+    kcore_uninit();
+uninit_btf:
+    btf_uninit(btf_handle);
+
+    return ret;
+}
-- 
Gitee


From 9e83af508eed22c08b878cc9dbb769006633e1e7 Mon Sep 17 00:00:00 2001
From: jietaoxiao <jietaoxiao@linux.alibaba.com>
Date: Wed, 19 Jul 2023 11:16:52 +0800
Subject: [PATCH 13/16] tools/detect/mem/memcgoffline: fix make error

---
 source/mk/csrc.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/mk/csrc.mk b/source/mk/csrc.mk
index 8701dc77..e3fdf57c 100644
--- a/source/mk/csrc.mk
+++ b/source/mk/csrc.mk
@@ -1,6 +1,6 @@
 objs := $(foreach n, $(mods), $(OBJPATH)/$(n))
 
-CFLAGS += $(EXTRA_CFLAGS) -I$(SRC)/lib/uapi/include
+CFLAGS += $(EXTRA_CFLAGS) $(INCLUDES) -I$(SRC)/lib/uapi/include
 LDFLAGS += $(EXTRA_LDFLAGS)
 
 ifeq ($(KERNEL_DEPEND), Y)
-- 
Gitee


From 0a83d759a7fb9a0b2fc9c03fde3ac18251260f67 Mon Sep 17 00:00:00 2001
From: "muya.zj" <muya.zj@alibaba-inc.com>
Date: Wed, 19 Jul 2023 10:33:50 +0800
Subject: [PATCH 14/16] sysak.service: change MemoryLimit

Signed-off-by: muya.zj <muya.zj@alibaba-inc.com>
---
 rpm/sysak.service | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rpm/sysak.service b/rpm/sysak.service
index 4379ea4c..447ecd20 100644
--- a/rpm/sysak.service
+++ b/rpm/sysak.service
@@ -7,7 +7,7 @@
   Restart=always
   RestartSec=10
   CPUQuota=30%
-  MemoryLimit=60M
+  MemoryLimit=300M
   ExecStart=/usr/local/sysak/.sysak_components/tools/dist/app/beeQ/run.sh
   ExecStop=kill -9 $(pidof unity-mon)
   ExecReload=kill -1 $(pidof unity-mon)
-- 
Gitee


From e5345e5d0595afd305a97e8a0d761c14b8593fea Mon Sep 17 00:00:00 2001
From: = <wodemia@linux.alibaba.com>
Date: Tue, 11 Jul 2023 16:33:11 +0800
Subject: [PATCH 15/16] unity: imc_latency rename macro

---
 .../unity/collector/plugin/imc_latency/imc_latency.h        | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h b/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h
index f42a0752..e3cc818b 100644
--- a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h
+++ b/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h
@@ -1,5 +1,5 @@
-#ifndef UNITY_SAMPLE_H
-#define UNITY_SAMPLE_H
+#ifndef UNITY_IMC_LATENCY_H
+#define UNITY_IMC_LATENCY_H
 
 #include <linux/types.h>
 #include <stdbool.h>
@@ -123,4 +123,4 @@ struct topology_ent {
     int64_t socket_id;
 };
 
-#endif  // UNITY_SAMPLE_H
+#endif
-- 
Gitee


From 697c1b8fa57a200d1c4098222eeeebb605454523 Mon Sep 17 00:00:00 2001
From: liaozhaoyan <zhaoyan.liao@linux.alibaba.com>
Date: Wed, 19 Jul 2023 23:25:11 +0800
Subject: [PATCH 16/16] change metrics description title, use sysom_ instead.

---
 source/tools/monitor/unity/etc/base.yaml  | 70 +++++++++++------------
 source/tools/monitor/unity/etc/group.yaml | 16 +++---
 source/tools/monitor/unity/etc/k8s.yaml   | 18 +++---
 3 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/source/tools/monitor/unity/etc/base.yaml b/source/tools/monitor/unity/etc/base.yaml
index c784e642..9350c344 100644
--- a/source/tools/monitor/unity/etc/base.yaml
+++ b/source/tools/monitor/unity/etc/base.yaml
@@ -49,92 +49,92 @@ plugins:
 
 metrics:
   -
-    title: sysak_proc_cpu_total
+    title: sysom_proc_cpu_total
     from: cpu_total
     head: mode
     help: "cpu usage info for total."
     type: "gauge"
-  - title: sysak_proc_cpus
+  - title: sysom_proc_cpus
     from: cpus
     head: mode
     help: "cpu usage info for per-cpu."
     type: "gauge"
-  - title: sysak_proc_sirq
+  - title: sysom_proc_sirq
     from: sirq
     head: type
     help: "system soft irq times."
     type: "gauge"
-  - title: sysak_proc_stat_counters
+  - title: sysom_proc_stat_counters
     from: stat_counters
     head: counter
     help: "system state counter."
     type: "gauge"
-  - title: sysak_proc_meminfo
+  - title: sysom_proc_meminfo
     from: meminfo
     head: value
     help: "meminfo from /proc/meminfo."
     type: "gauge"
-  - title: sysak_proc_vmstat
+  - title: sysom_proc_vmstat
     from: vmstat
     head: value
     help: "vmstat info from /proc/vmstat."
     type: "gauge"
-  - title: sysak_proc_self_statm
+  - title: sysom_proc_self_statm
     from: self_statm
     head: value
     help: "statm info from /proc/self/statm."
     type: "gauge"
-  - title: sysak_proc_networks
+  - title: sysom_proc_networks
     from: networks
     head: counter
     help: "networks info from /proc/net/dev."
     type: "gauge"
-  - title: sysak_proc_disks
+  - title: sysom_proc_disks
     from: disks
     head: counter
     help: "disk info from /proc/diskstats."
     type: "gauge"
-  - title: sysak_proc_pkt_status
+  - title: sysom_proc_pkt_status
     from: pkt_status
     head: counter
     help: "net status info from /proc/net/snmp and /proc/net/status."
     type: "gauge"
-  - title: sysak_fs_stat
+  - title: sysom_fs_stat
     from: fs_stat
     head: counter
     help: "file system information."
     type: "gauge"
-  - title: sysak_sock_stat
+  - title: sysom_sock_stat
     from: sock_stat
     head: value
     help: "sock stat counters from /proc/net/sockstat"
     type: "gauge"
-  - title: sysak_proc_schedstat
+  - title: sysom_proc_schedstat
     from: proc_schedstat
     head: value
     help: "schedule state of percpu."
     type: "gauge"
-  - title: sysak_proc_loadavg
+  - title: sysom_proc_loadavg
     from: proc_loadavg
     head: value
     help: "loadavg of system from /proc/loadavg"
     type: "gauge"
-  - title: sysak_proc_buddyinfo
+  - title: sysom_proc_buddyinfo
     from: buddyinfo
     head: value
     help: "buddyinfo of system from /proc/buddyinfo"
     type: "gauge"
-  - title: sysak_IOMonIndForDisksIO
+  - title: sysom_IOMonIndForDisksIO
     from: IOMonIndForDisksIO
     head: value
     help: "Disk IO indicators and abnormal events"
     type: "gauge"
-  - title: sysak_IOMonIndForSystemIO
+  - title: sysom_IOMonIndForSystemIO
     from: IOMonIndForSystemIO
     head: value
     help: "System indicators and abnormal events about IO"
     type: "gauge"
-  - title: sysak_IOMonDiagLog
+  - title: sysom_IOMonDiagLog
     from: IOMonDiagLog
     head: value
     help: "Diagnose log for IO exception"
@@ -144,87 +144,87 @@ metrics:
     head: value
     help: "nosched/irqoff:sys and irqoff hold cpu and didn't scheduling"
     type: "gauge"
-  - title: sysak_cpu_dist
+  - title: sysom_cpu_dist
     from: cpu_dist
     head: value
     help: "task cpu sched dist."
     type: "gauge"
-  - title: sysak_net_health_hist
+  - title: sysom_net_health_hist
     from: net_health_hist
     head: value
     help: "net_health_hist"
     type: "gauge"
-  - title: sysak_net_health_count
+  - title: sysom_net_health_count
     from: net_health_count
     head: value
     help: "net_health_count"
     type: "gauge"
-  - title: sysak_net_retrans_count
+  - title: sysom_net_retrans_count
     from: net_retrans_count
     head: value
     help: "net_retrans_count"
     type: "gauge"
-  - title: sysak_gpuinfo
+  - title: sysom_gpuinfo
     from: gpuinfo
     head: value
     help: "gpuinfo of system from nvidia-smi"
     type: "gauge"
-  - title: sysak_uname
+  - title: sysom_uname
     from: uname
     head: value
     help: "uname info"
     type: "gauge"
-  - title: sysak_uptime
+  - title: sysom_uptime
     from: uptime
     head: value
     help: "uptime from /proc/uptime"
     type: "gauge"
-  - title: sysak_system_release
+  - title: sysom_system_release
     from: system_release
     head: value
     help: "system_release from /etc/os-release"
     type: "gauge"
-  - title: sysak_cgroups
+  - title: sysom_cgroups
     from: cgroups
     head: value
     help: "cgroup number."
     type: "gauge"
-  - title: sysak_per_sirqs
+  - title: sysom_per_sirqs
     from: per_sirqs
     head: value
     help: "per_sirqs."
     type: "gauge"
-  - title: sysak_softnets
+  - title: sysom_softnets
     from: softnets
     head: value
     help: "cgroup number."
     type: "gauge"
-  - title: sysak_interrupts
+  - title: sysom_interrupts
     from: interrupts
     head: value
     help: "interrupts."
     type: "gauge"
-  - title: sysak_net_ip_count
+  - title: sysom_net_ip_count
     from: net_ip_count
     head: value
     help: "net snmp net_ip_count"
     type: "gauge"
-  - title: sysak_net_icmp_count
+  - title: sysom_net_icmp_count
     from: net_icmp_count
     head: value
     help: "net snmp net_icmp_count"
     type: "gauge"
-  - title: sysak_net_udp_count
+  - title: sysom_net_udp_count
     from: net_udp_count
     head: value
     help: "net snmp net_udp_count"
     type: "gauge"
-  - title: sysak_net_tcp_count
+  - title: sysom_net_tcp_count
     from: net_tcp_count
     head: value
     help: "net snmp net_tcp_count"
     type: "gauge"
-  - title: sysak_net_tcp_ext_count
+  - title: sysom_net_tcp_ext_count
     from: net_tcp_ext_count
     head: value
     help: "net stat net_tcp_ext_count"
diff --git a/source/tools/monitor/unity/etc/group.yaml b/source/tools/monitor/unity/etc/group.yaml
index 3208ecc2..6e3d8302 100644
--- a/source/tools/monitor/unity/etc/group.yaml
+++ b/source/tools/monitor/unity/etc/group.yaml
@@ -62,33 +62,33 @@ plugins:
     description: "summary retrans out put."
 
 metrics:
-  - title: sysak_proc_pkt_status
+  - title: sysom_proc_pkt_status
     from: pkt_status
     head: counter
     help: "net status info from /proc/net/snmp and /proc/net/status."
     type: "gauge"
-  - title: sysak_net_health_hist
+  - title: sysom_net_health_hist
     from: net_health_hist
     head: value
     help: "net_health_hist"
     type: "gauge"
-  - title: sysak_net_health_count
+  - title: sysom_net_health_count
     from: net_health_count
     head: value
     help: "net_health_count"
     type: "gauge"
-  - title: sysak_net_retrans_count
+  - title: sysom_net_retrans_count
     from: net_retrans_count
     head: value
     help: "net_retrans_count"
     type: "gauge"
-  - title: sysak_virtout_dist
+  - title: sysom_virtout_dist
     from: virtout_dist
     head: value
-    help: "sysak_virtout_dist"
+    help: "sysom_virtout_dist"
     type: "gauge"
-  - title: sysak_retrans
+  - title: sysom_retrans
     from: retrans
     head: value
-    help: "sysak_retrans"
+    help: "sysom_retrans"
     type: "gauge"
\ No newline at end of file
diff --git a/source/tools/monitor/unity/etc/k8s.yaml b/source/tools/monitor/unity/etc/k8s.yaml
index c4f63b2e..bddd9864 100644
--- a/source/tools/monitor/unity/etc/k8s.yaml
+++ b/source/tools/monitor/unity/etc/k8s.yaml
@@ -281,7 +281,7 @@ metrics:
     head: value
     help: "sysom_cg_memory_util"
     type: "gauge"
-  - title: sysak_cg_mem_glob_drcm_latency
+  - title: sysom_cg_mem_glob_drcm_latency
     from: cgGlbDrcmLatency
     head: value
     help: "sysom global memory latency"
@@ -296,22 +296,22 @@ metrics:
     head: value
     help: "sysom_cg_memmcmp_latency"
     type: "gauge"
-  - title: sysak_cg_mem_dcmp_latency
+  - title: sysom_cg_mem_dcmp_latency
     from: cg_memdcmp_latency
     head: value
-    help: "sysak_cg_mem_dcmp_latency"
+    help: "sysom_cg_mem_dcmp_latency"
     type: "gauge"
-  - title: sysak_cg_cpuacct_wait_latency
+  - title: sysom_cg_cpuacct_wait_latency
     from: cg_wait_latency
     head: value
-    help: "sysak_cg_cpuacct_wait_latency"
+    help: "sysom_cg_cpuacct_wait_latency"
     type: "gauge"
-  - title: sysak_cg_cpuacct_proc_stat
+  - title: sysom_cg_cpuacct_proc_stat
     from: cg_cpuacct_proc_stat
     head: value
-    help: "sysak_cg_cpuacct_proc_stat"
+    help: "sysom_cg_cpuacct_proc_stat"
     type: "gauge"
-  - title: sysak_cg_cpu_quota
+  - title: sysom_cg_cpu_quota
     from: cgCpuQuota
     head: value
     help: "quota_us,peroid_us and quota/period"
@@ -326,7 +326,7 @@ metrics:
     head: value
     help: "cpuacct/cpuacct.stat"
     type: "gauge"
-  - title: sysak_cg_pmu_events
+  - title: sysom_cg_pmu_events
     from: pmu_cg_events
     head: value
     help: "pmu events of cgroups"
-- 
Gitee