From 3615213715d544a1c511650f9c171468b1820272 Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Thu, 13 Jul 2023 17:30:29 +0800 Subject: [PATCH 01/16] unity/etc: Add some metrics for k8s.yaml Signed-off-by: Hailong Liu --- source/tools/monitor/unity/etc/k8s.yaml | 36 ++++++++++++++++++++----- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/source/tools/monitor/unity/etc/k8s.yaml b/source/tools/monitor/unity/etc/k8s.yaml index 19e6059e..8b9f1690 100644 --- a/source/tools/monitor/unity/etc/k8s.yaml +++ b/source/tools/monitor/unity/etc/k8s.yaml @@ -25,7 +25,10 @@ outline: container: mode: "pods" - luaPlugin: ["cg_cpu_cfs_quota","cg_mem_drcm_glob_latency","cg_memory_util","cg_cpu_stat_sample", "cg_cpuacct_stat","cg_memory_drcm_latency", "cg_memory_fail_cnt","cg_memory_dcmp_latency"] + #"cg_cpuacct_stat" is a substitute of cg_cpuacct_proc_stat + luaPlugin: ["cg_memory_fail_cnt", "cg_memory_util", "cg_memory_dcmp_latency", + "cg_memory_drcm_latency", "cg_cpuacct_wait_latency", "cg_cpuacct_proc_stat", + "cg_cpu_stat", "cg_pmu_events", "cg_cpu_cfs_quota", "cg_mem_drcm_glob_latency"] directCgPath: - "/" - "/kubepods.slice" @@ -277,13 +280,12 @@ metrics: head: value help: "sysom_cg_memory_util" type: "gauge" - - title: sysom_cg_memgdrcm_latency + - title: sysak_cg_mem_glob_drcm_latency from: cgGlbDrcmLatency head: value help: "sysom global memory latency" type: "gauge" - - - title: sysom_cg_memdrcm_latency + - title: sysom_cg_mem_drcm_latency from: cg_memdrcm_latency head: value help: "sysom_cg_memdrcm_latency" @@ -293,6 +295,26 @@ metrics: head: value help: "sysom_cg_memmcmp_latency" type: "gauge" + - title: sysak_cg_mem_dcmp_latency + from: cg_memdcmp_latency + head: value + help: "sysak_cg_mem_dcmp_latency" + type: "gauge" + - title: sysak_cg_cpuacct_wait_latency + from: cg_wait_latency + head: value + help: "sysak_cg_cpuacct_wait_latency" + type: "gauge" + - title: sysak_cg_cpuacct_proc_stat + from: cg_cpuacct_proc_stat + head: value + help: "sysak_cg_cpuacct_proc_stat" + type: "gauge" + - title: sysak_cg_cpu_quota + from: cgCpuQuota + head: value + help: "quota_us,peroid_us and quota/period" + type: "gauge" - title: sysom_cg_cpu_stat from: cg_cpu_stat head: value @@ -303,8 +325,8 @@ metrics: head: value help: "cpuacct/cpuacct.stat" type: "gauge" - - title: sysom_cg_cfs_quota - from: cgCpuQuota + - title: sysak_cg_pmu_events + from: pmu_cg_events head: value - help: "cfs quota" + help: "pmu events of cgroups" type: "gauge" -- Gitee From 7eab513442e296a34a87ba2da38a28e9b42194ac Mon Sep 17 00:00:00 2001 From: zhilan Date: Fri, 14 Jul 2023 16:06:49 +0800 Subject: [PATCH 02/16] podmem: pagealloc: fix bugs in tx --- source/tools/detect/mem/podmem/entry/pagealloc.py | 8 ++++++-- source/tools/detect/mem/podmem/entry/podmem.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/source/tools/detect/mem/podmem/entry/pagealloc.py b/source/tools/detect/mem/podmem/entry/pagealloc.py index 4b6066b2..5e0ef2b1 100644 --- a/source/tools/detect/mem/podmem/entry/pagealloc.py +++ b/source/tools/detect/mem/podmem/entry/pagealloc.py @@ -197,7 +197,8 @@ def get_info(meminfo, result,cid): meminfo["podinfo"][podname] = {} meminfo["podinfo"][podname]["podname"] = podname meminfo["podinfo"][podname]["podns"] = podns - meminfo["podinfo"][podname]["mem"] = 0 + meminfo["podinfo"][podname]["rxmem"] = 0 + meminfo["podinfo"][podname]["txmem"] = 0 return podname def pagemem_scan(meminfo, ns): @@ -241,7 +242,10 @@ def pagemem_check(meminfo,ns): pid = info[1] task_pid = task+"-"+pid rx = int(line_list[2]) - tx = int(line_list[3]) + if line.find("LISTEN") >= 0: + tx = 0 + else: + tx = int(line_list[3]) rx_mem += rx tx_mem += tx if task_pid not in memTask.keys(): diff --git a/source/tools/detect/mem/podmem/entry/podmem.py b/source/tools/detect/mem/podmem/entry/podmem.py index 1ce46eae..8b875a20 100644 --- a/source/tools/detect/mem/podmem/entry/podmem.py +++ b/source/tools/detect/mem/podmem/entry/podmem.py @@ -412,7 +412,7 @@ def handle_args(podinfo, argv): print("-j: dump result to json file (sysak podmem -s -j ./test.json)") print("-r: set sample rate ,default set to 1 (sysak podmem -s -r 2)") print("-t: output filecache top ,default for top 10 (sysak podmem -s -t 20)") - print("-m: analysis pod recv-Q memory") + print("-m: analysis pod recv-Q and send-Q memory") sys.exit(2) elif opt == '-r': cmdline['rate'] = int(arg) -- Gitee From b9cc16b27f1ba21affef51da2a137462bce24dca Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Fri, 14 Jul 2023 17:31:05 +0800 Subject: [PATCH 03/16] unity/podMan: Provide an alternative way for pods or cgroup Signed-off-by: Hailong Liu --- source/tools/monitor/unity/collector/loop.lua | 16 ++++++++++------ source/tools/monitor/unity/etc/k8s.yaml | 1 + 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/source/tools/monitor/unity/collector/loop.lua b/source/tools/monitor/unity/collector/loop.lua index 47e793de..68b19a55 100644 --- a/source/tools/monitor/unity/collector/loop.lua +++ b/source/tools/monitor/unity/collector/loop.lua @@ -16,7 +16,7 @@ local CguardDaemon = require("collector.guard.guardDaemon") local CguardSelfStat = require("collector.guard.guardSelfStat") local CpostPlugin = require("collector.postPlugin.postPlugin") local CforkRun = require("collector.execEngine.forkRun") ----local CpodFilter = require("collector.podMan.podFilter") +local CpodFilter = require("collector.podMan.podFilter") local CpodsAll = require("collector.podMan.podsAll") local Cloop = class("loop") @@ -52,11 +52,15 @@ function Cloop:loadLuaPlugin(res, proc_path, procffi) end end if res.container then - ---self._procs[c] = CpodFilter.new(res, self._proto, procffi, proc_path) - ---self._names[c] = "podFilter" - self._procs[c] = CpodsAll.new(res, self._proto, procffi, proc_path) - self._names[c] = "podMon" - + if res.container.mode == "cgroup" then + --print("mods1="..res.container.mode) + self._procs[c] = CpodFilter.new(res, self._proto, procffi, proc_path) + self._names[c] = "podFilter" + else + --print("mods2="..res.container.mode) + self._procs[c] = CpodsAll.new(res, self._proto, procffi, proc_path) + self._names[c] = "podMon" + end end print("add " .. system:keyCount(self._procs) .. " lua plugin.") end diff --git a/source/tools/monitor/unity/etc/k8s.yaml b/source/tools/monitor/unity/etc/k8s.yaml index 8b9f1690..458b90f4 100644 --- a/source/tools/monitor/unity/etc/k8s.yaml +++ b/source/tools/monitor/unity/etc/k8s.yaml @@ -25,6 +25,7 @@ outline: container: mode: "pods" + #mode:"cgroup" #"cg_cpuacct_stat" is a substitute of cg_cpuacct_proc_stat luaPlugin: ["cg_memory_fail_cnt", "cg_memory_util", "cg_memory_dcmp_latency", "cg_memory_drcm_latency", "cg_cpuacct_wait_latency", "cg_cpuacct_proc_stat", -- Gitee From 09283528f3861e144ed757295fcb1edb74b6c53d Mon Sep 17 00:00:00 2001 From: yinbinbin Date: Fri, 14 Jul 2023 21:17:26 +0800 Subject: [PATCH 04/16] oomkill: print cmdline Signed-off-by: yinbinbin --- source/tools/monitor/oomkill/kill.c | 4 +++ source/tools/monitor/oomkill/meminfo.c | 41 ++++++++++++++++++++++++++ source/tools/monitor/oomkill/meminfo.h | 1 + 3 files changed, 46 insertions(+) diff --git a/source/tools/monitor/oomkill/kill.c b/source/tools/monitor/oomkill/kill.c index 7000dc48..11e88db6 100644 --- a/source/tools/monitor/oomkill/kill.c +++ b/source/tools/monitor/oomkill/kill.c @@ -427,6 +427,8 @@ procinfo_t find_largest_process(const poll_loop_args_t* args) */ void kill_process(const poll_loop_args_t* args, int sig, const procinfo_t* victim) { + char cmdline[512]; + if (victim->pid <= 0) { warn("Could not find a process to kill. Sleeping 1 second.\n"); if (args->notify) { @@ -444,10 +446,12 @@ void kill_process(const poll_loop_args_t* args, int sig, const procinfo_t* victi } else if (sig == 0) { sig_name = "0 (no-op signal)"; } + get_cmdline(victim->pid, cmdline, sizeof(cmdline)); // sig == 0 is used as a self-test during startup. Don't notify the user. if (sig != 0 || enable_debug) { warn("sending %s to process %d uid %d \"%s\": badness %ld, VmRSS %lld MiB adj:%d\n", sig_name, victim->pid, victim->uid, victim->name, victim->badness, victim->VmRSSkiB / 1024, victim->oom_score_adj); + warn("process cmdline:%s\n", cmdline); } int res = kill_wait(args, victim->pid, sig); diff --git a/source/tools/monitor/oomkill/meminfo.c b/source/tools/monitor/oomkill/meminfo.c index 4f02e886..18bab6b1 100644 --- a/source/tools/monitor/oomkill/meminfo.c +++ b/source/tools/monitor/oomkill/meminfo.c @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include #include "globals.h" #include "meminfo.h" @@ -253,6 +256,44 @@ int get_comm(int pid, char* out, size_t outlen) return 0; } +int get_cmdline(int pid, char *out, int out_len) { + char cmdline_file[128] = {0}; + int fd; + int i; + ssize_t nread, total = 0; + + sprintf(cmdline_file, "%s/%d/cmdline", procdir_path, pid); + + fd = open(cmdline_file, O_RDONLY); + if (fd == -1) { + printf("Failed to open %s\n", cmdline_file); + return -1; + } + + while ((nread = read(fd, out + total, out_len - total - 1)) > 0) { + total += nread; + } + + close(fd); + + if (total == -1) { + printf("Failed to read %s\n", cmdline_file); + return -1; + } + + // Replace '\0' with spaces when arguments are not separated by '\0' + for (i = 0; i < total; i++) { + if (out[i] == '\0') { + out[i] = ' '; + } + } + + out[total] = '\0'; + + return 0; +} + + // Get the effective uid (EUID) of `pid`. // Returns the uid (>= 0) or -errno on error. int get_uid(int pid) diff --git a/source/tools/monitor/oomkill/meminfo.h b/source/tools/monitor/oomkill/meminfo.h index 22df1ef5..4572b402 100644 --- a/source/tools/monitor/oomkill/meminfo.h +++ b/source/tools/monitor/oomkill/meminfo.h @@ -61,6 +61,7 @@ int get_oom_score(int pid); int get_oom_score_adj(const int pid, int* out); long long get_vm_rss_kib(int pid); int get_comm(int pid, char* out, size_t outlen); +int get_cmdline(int pid, char *out, int out_len); int get_uid(int pid); #endif -- Gitee From 9def63213b5c636582af83b130a3a9a2de1e1e63 Mon Sep 17 00:00:00 2001 From: stephanie <1165643831@qq.com> Date: Mon, 17 Jul 2023 06:13:20 +0000 Subject: [PATCH 05/16] !708 remove /.idea & **/.DS_Store, modify .gitignore * modify .gitignore * remove /.idea & **/.DS_Store, modify .gitignore --- .DS_Store | Bin 6148 -> 0 bytes .gitignore | 2 ++ .idea/.gitignore | 8 -------- .idea/misc.xml | 18 ------------------ .idea/vcs.xml | 6 ------ source/.DS_Store | Bin 6148 -> 0 bytes source/tools/.DS_Store | Bin 6148 -> 0 bytes source/tools/monitor/.DS_Store | Bin 6148 -> 0 bytes 8 files changed, 2 insertions(+), 32 deletions(-) delete mode 100644 .DS_Store delete mode 100644 .idea/.gitignore delete mode 100644 .idea/misc.xml delete mode 100644 .idea/vcs.xml delete mode 100644 source/.DS_Store delete mode 100644 source/tools/.DS_Store delete mode 100644 source/tools/monitor/.DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index b639f68ef81e47f5d1ca0c3e13497b3d586b058e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}T>S5Z<-5O({YT3Oz1(Em&hwikDF93mDOZN=;1AV9b^zHHT8jSzpK}@p+ut z-5i1iZz6UEcE8#A+0A^A{b7vp$s#yptj8F$pdoTp)(D!{y0%O(B3E-fu+T5ExgSe^ z%|w6Egx}s`8Ix?mQda-|4}Ts_a7bRh;5~3i4klX7h31l&mizLWYuB#nZ$Lh>_{pHdgjR(TEPgVo5 z9NWWz7#$t2Rvl|^|KRju_8h+?@=X)TfpaB01}k_6rB>6cnHGaRI!A_05L!e z5CfaXfH@1S?&g(F6%zx*zz+=I{ve>0lQo&M{bO)ai_?m0=#Ua`kxOYIU#+70$S$k$Pf)7^pMQ(8C^{|L5?_ ztbOFyQ)omC5Ci{=0p6Ut(+L!1&em_`;aMx7-9tmcxDpi*&})|f7~nqAS5EC0s6(7% Xu+)gNpk1W{(nUZKLLD*i3k-Y#X)jDR diff --git a/.gitignore b/.gitignore index 40a844cf..fa19e01d 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ *.ko *.ko.cmd *.idea +**/.idea/ +**/.DS_Store Module.symvers modules.builtin modules.order diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 13566b81..00000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 53624c9e..00000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1ddf..00000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/source/.DS_Store b/source/.DS_Store deleted file mode 100644 index fe62bc3d37d7e43ee3c765a76137456f136a5618..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}T>S5Z>*NO({YS3Oz1(Em&hwikA@U3mDOZN=-=7V9b^#wTDv3SzpK}@p+ut z-9U@Mqlld;yWi~m>}Edb{xHV4H;;}Ovl(L+G(?U{g`m08RWreeT+NY%i)9 zZ<_Gin=EG%ehFLu{*Q2)#97|&fAU(r(cEcSEvsYQ`%iM==RrQtykK^Vqbn(su+)R_ zI-V@X_TITn^B_(qGnEiW69~DxiPK0fJULIJOy&C8VYRLH*zPWu&S2ORj&r)|iRIAg z_r&1%WVLEr`v-?-m*eN;C6#ZQNDhoE*)>?gJ1A=vz525>k?A8?%ZxITkQg8ahyh|? zI~g$Nfz{d0@~LWKfEf6J0o)%1G(^{6p;2ue(BbtN{S8DE(D5ySC=9v=3yt6b;W`yi zr*iYe;5r@b!o;}-3ynIRakVncV^%I7FI=q-cA>%U3u0s{KRB>)Dvk93t&`vvL{=Nc?D U;w)%a>40<*P=ruN4EzEEUjs->9{>OV diff --git a/source/tools/.DS_Store b/source/tools/.DS_Store deleted file mode 100644 index 12272494a9145ab77e6821d51a35aa9e656937e1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}T>S5Z-O8O({YT3Oz1(Em&hwikDF93mDOZN=-=7V45vSY7V84v%Zi|;`2DO zy8){?coMNQu=!@^XLsj=>vQO)mK6u{zc@(BuzyHo_^~TnA(`s66>&|_Yxtn>}A{}|j4fZaj zjQmpf{Ht)9PddA2GS0j(o+c_G45kordmYAs%tvw&2dT<+wZm#ztx2c5T-t+SPuTX! zswb91yWbOoqvO@8W$o=BoL)?xqFF3oG?g4!SF&TUgjZ0$RQ2p8aU|mh@RV6)G$AoS z3=jjvz-BRE&H<~vS!GkT!~iky0|sz^5YP}EgM~)5b-)L&&**O;qJWNX2}EJgF<58> z4+z((fI5|%CkEH)U>7FNF<5BS>5QwHVH`7a`FP=KcCZT-&bXtIzQh1Au+BhTcRP6g zpTjRx`N&^Sp&l_n4E!?&cw_92M^KbGTfdcuXRUyC4-Ez53RFNqU%LcA2ltV#a%#Uo a9pW5=g+?3&?J6CRE&_@W`Va%Zz`zH${7m8i diff --git a/source/tools/monitor/.DS_Store b/source/tools/monitor/.DS_Store deleted file mode 100644 index 54403a1caa441bc5047ab36847f73aef327fef34..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}T>S5Z<+|O({YS3Oz1(Em&hwikDF93mDOZN=-=7V9b^#u}CT8tS{t~_&m<+ zZlJ~BQN+%`?l(I>yO|HNKa4Rx%)>**Y{r-c4UwZ#A!x32ZJA(1uI6|R=IJzuWl%BA z-!$R3H(1UR7O{-2e*Z@>jiWT{^*;Glt=`yfT1~5M-Fc64?qz2*c_=ytv!aFD{6}@<~B$mkoSj&tul8_i628aP- zU^5vo=YiGU%<`#fVt^RkQQN(8lxs9DbR~ zNB(*Wjfeqa;GZ$T>mzS8grdyZ`mH=XYX!7>Xebz0paKH=+9d!6xQ}#{Q~L$#5a${! VG~z61SLuLs5m1CsM-2P|17G=VOEmxh -- Gitee From fd6570e91da885aecf16722c50f1ab55e5c7a1b1 Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Tue, 18 Jul 2023 11:14:19 +0800 Subject: [PATCH 06/16] proc_stat: Add cpu_util threshold warning events Signed-off-by: Hailong Liu --- .../monitor/unity/collector/proc_stat.lua | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/source/tools/monitor/unity/collector/proc_stat.lua b/source/tools/monitor/unity/collector/proc_stat.lua index e5834d75..dbdec60b 100644 --- a/source/tools/monitor/unity/collector/proc_stat.lua +++ b/source/tools/monitor/unity/collector/proc_stat.lua @@ -14,6 +14,9 @@ function CprocStat:_init_(proto, pffi, mnt, pFile) CvProc._init_(self, proto, pffi, mnt,pFile or "proc/stat") self._funs = self:setupTable() self._cpuArr = {} + self._total_warn = 0 + self._sys_warn = 0 + self._user_warn = 0 end function CprocStat:_cpuHead() @@ -23,6 +26,12 @@ end function CprocStat:_procCpu(now, last) if last then + local user_thresh = 40 + local sys_thresh = 25 + local total_thresh = 55 + local user_util = 0 + local sys_util = 0 + local warn = 0 local vs = {} local sum = 0 local index = self:_cpuHead() @@ -37,10 +46,39 @@ function CprocStat:_procCpu(now, last) local total = tonumber(sum) for i = 1, #vs do local v = tonumber(vs[i]) + + --for warn events + if index[i] == "user" or index[i] == "nice" then + user_util = user_util + v*100.0/total + end + if index[i] == "sys" or index[i] == "softirq" then + sys_util = sys_util + v*100.0/total + end + if index[i] == "idle" then + total_util = 100 - (v*100.0/total) + end + local cell = {name=index[i], value=tonumber(v * 100.0 / total)} table.insert(res, cell) end table.insert(res, {name="total", value=total}) + --warn events + if user_util > user_thresh then + self._user_warn = self._user_warn + 1 + end + local cell0 = {name="usr_warn", value=self._user_warn} + table.insert(res, cell0) + if sys_util > sys_thresh then + self._sys_warn = self._sys_warn + 1 + end + local cell1 = {name="sys_warn", value=self._sys_warn} + table.insert(res, cell1) + if total_util > user_thresh then + self._total_warn = self._total_warn + 1 + end + local cell2 = {name="total_warn", value=self._total_warn} + table.insert(res, cell2) + return res end end -- Gitee From eb25e8b37ce77e01ef701d8dc1ff60653447026a Mon Sep 17 00:00:00 2001 From: = Date: Mon, 10 Jul 2023 11:56:12 +0800 Subject: [PATCH 07/16] imc_latency: init in sysak tools --- source/tools/detect/mem/imc_latency/Makefile | 4 + source/tools/detect/mem/imc_latency/README.md | 46 ++ .../detect/mem/imc_latency/imc_latency.c | 770 ++++++++++++++++++ .../detect/mem/imc_latency/imc_latency.h | 122 +++ 4 files changed, 942 insertions(+) create mode 100644 source/tools/detect/mem/imc_latency/Makefile create mode 100644 source/tools/detect/mem/imc_latency/README.md create mode 100644 source/tools/detect/mem/imc_latency/imc_latency.c create mode 100644 source/tools/detect/mem/imc_latency/imc_latency.h diff --git a/source/tools/detect/mem/imc_latency/Makefile b/source/tools/detect/mem/imc_latency/Makefile new file mode 100644 index 00000000..282f8bfd --- /dev/null +++ b/source/tools/detect/mem/imc_latency/Makefile @@ -0,0 +1,4 @@ +target := imc_latency +mods := imc_latency.o + +include $(SRC)/mk/csrc.mk diff --git a/source/tools/detect/mem/imc_latency/README.md b/source/tools/detect/mem/imc_latency/README.md new file mode 100644 index 00000000..8b089785 --- /dev/null +++ b/source/tools/detect/mem/imc_latency/README.md @@ -0,0 +1,46 @@ +# imc_latency + +基于PMU事件的DDR内存访问延迟,用于检查微架构层级是否存在内存竞争。 + +## 原理与限制 + +基于IMC的PMU组件实现,需要硬件支持。目前仅支持Intel的Ice Lake(ICX)、Sky Lake(SKX)、Cascade Lake以及Sapphire Rapids(SPR)等架构。 + +| micro-architecture | code | cpu-model number | +| ------------------ | ---- | ---------------- | +| Sapphire Rapids | SPR | 143 | +| Ice Lake | ICX | 106/108 | +| Cascade Lake | | 106 | +| Sky Lake-X | SKX | 85 | + +可以通过`lscpu`的`Model`字段检查硬件是否支持。 + +```bash +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +CPU(s): 128 +On-line CPU(s) list: 0-127 +Thread(s) per core: 2 +Core(s) per socket: 32 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +BIOS Vendor ID: Intel(R) Corporation +CPU family: 6 +Model: 106 +Model name: Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz +BIOS Model name: Intel(R) Xeon(R) Platinum 8369B CPU @ 2.90GHz +Stepping: 6 +CPU MHz: 3500.000 +CPU max MHz: 3500.0000 +CPU min MHz: 800.0000 +BogoMIPS: 5800.00 +Virtualization: VT-x +L1d cache: 48K +L1i cache: 32K +L2 cache: 1280K +L3 cache: 49152K +NUMA node0 CPU(s): 0-31,64-95 +NUMA node1 CPU(s): 32-63,96-127 +``` diff --git a/source/tools/detect/mem/imc_latency/imc_latency.c b/source/tools/detect/mem/imc_latency/imc_latency.c new file mode 100644 index 00000000..e3ef2687 --- /dev/null +++ b/source/tools/detect/mem/imc_latency/imc_latency.c @@ -0,0 +1,770 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "imc_latency.h" + +// #define DEBUG + +struct Env { + uint32_t max_cpuid; + int32_t cpu_model; + int32_t cpu_family; + int32_t cpu_stepping; + bool vm; + int64_t nr_cpu; + int64_t nr_socket; + int64_t nr_core; + int64_t nr_channel; + int64_t* socket_ref_core; +} env = {.vm = false}; + +typedef struct event { + uint64_t rpq_occ; + uint64_t rpq_ins; + uint64_t wpq_occ; + uint64_t wpq_ins; + uint64_t dram_speed; +} event; + +typedef struct channel_record { + uint64_t rpq_occ; + uint64_t rpq_ins; + uint64_t wpq_occ; + uint64_t wpq_ins; + double read_latency; + double write_latency; +} channel_record; + +typedef struct socket_record { + channel_record* channel_record_arr; + uint64_t rpq_occ; + uint64_t rpq_ins; + uint64_t wpq_occ; + uint64_t wpq_ins; + double read_latency; + double write_latency; + uint64_t dram_clock; +} socket_record; + +typedef struct record { + socket_record* socket_record_arr; +} record; + +record before, after; + +time_t before_ts = 0, after_ts = 0; +imc_pmu* pmus = 0; + +int64_t read_sys_file(char* path, bool slient) { + int64_t val; + FILE* fp = fopen(path, "r"); + if (!fp) { + if (!slient) fprintf(stderr, "Failed open sys-file: %s\n", path); + return -1; + } + + fscanf(fp, "%ld\n", &val); +#ifdef DEBUG + fprintf(stderr, "read from=%s val=%ld\n", path, val); +#endif + if (fp) fclose(fp); + return val; +} + +static int write_reg(imc_event* ev, uint64_t val) { + int err = 0; + if (ev->fd >= 0) { + close(ev->fd); + ev->fd = -1; + } + + ev->attr.config = ev->fixed ? 0xff : val; + + if ((ev->fd = syscall(SYS_perf_event_open, &ev->attr, -1, ev->core_id, -1, + 0)) <= 0) { + fprintf(stderr, "Linux Perf: Error on programming PMU %d:%s\n", + ev->pmu_id, strerror(errno)); + fprintf(stderr, "config: 0x%llx config1: 0x%llx config2: 0x%llx\n", + ev->attr.config, ev->attr.config1, ev->attr.config2); + if (errno == EMFILE) fprintf(stderr, "%s", ULIMIT_RECOMMENDATION); + + return -1; + } + return err; +} + +static uint64_t read_reg(imc_event* ev) { + uint64_t result = 0; + if (ev->fd >= 0) { + int status = read(ev->fd, &result, sizeof(result)); + if (status != sizeof(result)) { + fprintf( + stderr, + "PCM Error: failed to read from Linux perf handle %d PMU %d\n", + ev->fd, ev->pmu_id); + } + } + return result; +} + +static bool is_cpu_online(int cpu_id) { + char path[BUF_SIZE]; + uint64_t val; + bool res = false; + + snprintf(path, BUF_SIZE, "/sys/devices/system/cpu/cpu%d/online", cpu_id); + + FILE* fp = fopen(path, "r"); + if (!fp) { + fprintf(stderr, "Failed open %s.\n", path); + goto cleanup; + } + + val = read_sys_file(path, true); + if (val == UINT64_MAX) { + goto cleanup; + } + + res = true; + +cleanup: + if (fp) fclose(fp); + return res; +} + +int64_t read_core_id(int cpu_id) { + char core_id_path[BUF_SIZE]; + int64_t val = -1; + + snprintf(core_id_path, BUF_SIZE, + "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu_id); + val = read_sys_file(core_id_path, true); + return val; +} + +int64_t read_physical_package_id(int cpu_id) { + char pkg_id_path[BUF_SIZE]; + + int64_t val = -1; + + snprintf(pkg_id_path, BUF_SIZE, + "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", + cpu_id); + val = read_sys_file(pkg_id_path, true); + + return val; +} + +static int get_topology(int id, struct topology_ent* ent) { + int err = 0; + ent->core_id = read_core_id(id); + ent->socket_id = read_physical_package_id(id); + if (ent->core_id == -1 || ent->socket_id == -1) { +#ifdef DEBUG + fprintf(stderr, "get coreid=%d socket_id=%d\n", ent->core_id, + ent->socket_id); +#endif + err = -1; + } + + return err; +} + +static int discovery_topology() { + int err = 0, i = 0; + struct topology_ent* topo = 0; + + env.nr_cpu = sysconf(_SC_NPROCESSORS_CONF); + + if (env.nr_cpu < 0) { + fprintf(stderr, "Failed get nr_cpu.\n"); + err = -1; + goto cleanup; + } + + topo = calloc(env.nr_cpu, sizeof(struct topology_ent)); + if (!topo) { + fprintf(stderr, "Faile calloc topology memory.\n"); + err = -1; + goto cleanup; + } + + int64_t max_skt_id = 0; + int64_t max_core_id = 0; + for (i = 0; i < env.nr_cpu; i++) { + err = get_topology(i, topo + i); + if (err) { + fprintf(stderr, "Failed get topology cpuid:%d\n", i); + goto cleanup; + } + + max_skt_id = + max_skt_id > topo[i].socket_id ? max_skt_id : topo[i].socket_id; + max_core_id = + max_core_id > topo[i].core_id ? max_core_id : topo[i].core_id; + } + + env.nr_socket = max_skt_id + 1; + env.nr_core = max_core_id + 1; + + env.socket_ref_core = calloc(env.nr_socket, sizeof(int64_t)); + if (!env.socket_ref_core) { + fprintf(stderr, "Failed calloc socket_ref_core. nr_socket=%d\n", + env.nr_socket); + err = -1; + goto cleanup; + } + + for (i = 0; i < env.nr_cpu; i++) { + if (!is_cpu_online(i)) continue; + env.socket_ref_core[topo[i].socket_id] = i; + } + +cleanup: + if (topo) free(topo); + topo = 0; + return err; +} + +static void cpuid_1(int leaf, CPUID_INFO* info) { + __asm__ __volatile__("cpuid" + : "=a"(info->reg.eax), "=b"(info->reg.ebx), + "=c"(info->reg.ecx), "=d"(info->reg.edx) + : "a"(leaf)); +} + +void cpuid_2(const unsigned leaf, const unsigned subleaf, CPUID_INFO* info) { + __asm__ __volatile__("cpuid" + : "=a"(info->reg.eax), "=b"(info->reg.ebx), + "=c"(info->reg.ecx), "=d"(info->reg.edx) + : "a"(leaf), "c"(subleaf)); +} + +static bool detect_model() { + char buffer[1024]; + union { + char cbuf[16]; + int ibuf[16 / sizeof(int)]; + } buf; + + CPUID_INFO cpuinfo; + + bzero(buffer, 1024); + bzero(buf.cbuf, 16); + cpuid_1(0, &cpuinfo); + + buf.ibuf[0] = cpuinfo.array[1]; + buf.ibuf[1] = cpuinfo.array[3]; + buf.ibuf[2] = cpuinfo.array[2]; + + if (strncmp(buf.cbuf, "GenuineIntel", 4 * 3) != 0) { + fprintf(stderr, "Not intel cpu.\n"); + return false; + } + + env.max_cpuid = cpuinfo.array[0]; + + cpuid_1(1, &cpuinfo); + env.cpu_family = (((cpuinfo.array[0]) >> 8) & 0xf) | + ((cpuinfo.array[0] & 0xf00000) >> 16); + env.cpu_model = (((cpuinfo.array[0]) & 0xf0) >> 4) | + ((cpuinfo.array[0] & 0xf0000) >> 12); + env.cpu_stepping = cpuinfo.array[0] & 0x0f; + + if (cpuinfo.reg.ecx & (1UL << 31UL)) { + env.vm = true; + fprintf(stderr, + "WARN: Detected a hypervisor/virtualization technology. Some " + "metrics might not be available due to configuration or " + "availability of virtual hardware features.\n"); + } + + if (env.cpu_family != 6) { + fprintf(stderr, "Unsupport CPU Family: %d\n", env.cpu_family); + return false; + } + + return true; +} + +bool is_model_support() { + switch (env.cpu_model) { + case NEHALEM: + env.cpu_model = NEHALEM_EP; + break; + case ATOM_2: + env.cpu_model = ATOM; + break; + case HASWELL_ULT: + case HASWELL_2: + env.cpu_model = HASWELL; + break; + case BROADWELL_XEON_E3: + env.cpu_model = BROADWELL; + break; + case ICX_D: + env.cpu_model = ICX; + break; + case CML_1: + env.cpu_model = CML; + break; + case ICL_1: + env.cpu_model = ICL; + break; + case TGL_1: + env.cpu_model = TGL; + break; + case ADL_1: + env.cpu_model = ADL; + break; + case RPL_1: + case RPL_2: + case RPL_3: + env.cpu_model = RPL; + break; + } + + return (env.cpu_model == ICX || env.cpu_model == SPR || + env.cpu_model == SKX); +} + +uint32_t* get_ddr_latency_metric_config() { + uint32_t* cfgs = 0; + cfgs = calloc(4, sizeof(uint32_t)); + if (!cfgs) { + fprintf(stderr, "Failed calloc cfgs memory.\n"); + return NULL; + } + + if (ICX == env.cpu_model || SPR == env.cpu_model) { + cfgs[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ occupancy pch 0 + cfgs[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ Insert.pch 0 + cfgs[2] = MC_CH_PCI_PMON_CTL_EVENT(0x82) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Occupancy pch 0 + cfgs[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM WPQ Insert.pch 0 + } else { + cfgs[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ occupancy + cfgs[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ Insert + cfgs[2] = MC_CH_PCI_PMON_CTL_EVENT(0x81) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Occupancy + cfgs[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Insert + } + + return cfgs; +} + +struct perf_event_attr init_perf_event_attr(bool group) { + struct perf_event_attr e; + bzero(&e, sizeof(struct perf_event_attr)); + e.type = -1; // must be set up later + e.size = sizeof(e); + e.config = -1; // must be set up later + e.read_format = group ? PERF_FORMAT_GROUP + : 0; /* PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_GROUP ; */ + return e; +} + +void init_imc_event(imc_event* event, int pmu_id, int core_id, bool fixed) { + struct perf_event_attr attr = init_perf_event_attr(false); + attr.type = pmu_id; + event->attr = attr; + event->fixed = fixed; + event->pmu_id = pmu_id; + event->core_id = core_id; + event->fd = -1; +} + +void init_imc_reggrp(imc_reg_group* grp, int socket_id, int pmu_id) { + int i = 0; +#ifdef DEBUG + + fprintf(stderr, "Init imc reg group: socketid=%d pmuid=%d\n", socket_id, + pmu_id); +#endif + init_imc_event(&grp->fixed_ev, pmu_id, env.socket_ref_core[socket_id], + true); + + for (i = 0; i < GENERAL_REG_NUM; i++) { + init_imc_event(&grp->general_ev[i], pmu_id, + env.socket_ref_core[socket_id], false); + } +} + +imc_pmu* init_imc_pmus(int64_t* pmu_ids, int64_t size) { + int skt_id = 0; + int pmu_id = 0; + + imc_pmu* pmus = calloc(env.nr_socket, sizeof(imc_pmu)); + + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + pmus[skt_id].reg_groups = calloc(size, sizeof(imc_reg_group)); + pmus[skt_id].socket_id = skt_id; + pmus[skt_id].nr_grp = size; + + for (pmu_id = 0; pmu_id < size; pmu_id++) { + init_imc_reggrp(&pmus[skt_id].reg_groups[pmu_id], skt_id, + pmu_ids[pmu_id]); + } + } + + return pmus; +} + +void program_imc(uint32_t* cfgs, imc_pmu* pmus) { + int skt_id = 0; + int pmu_id = 0; + int idx = 0; + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + imc_pmu* pmu = pmus + skt_id; + for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) { + imc_reg_group* grp = pmu->reg_groups + pmu_id; + /* enabel and reset fixed counter(DRAM clock) */ + write_reg(&grp->fixed_ev, MC_CH_PCI_PMON_FIXED_CTL_EN); + write_reg(&grp->fixed_ev, MC_CH_PCI_PMON_FIXED_CTL_EN + + MC_CH_PCI_PMON_FIXED_CTL_RST); + for (idx = 0; idx < GENERAL_REG_NUM; idx++) { + uint64_t event = cfgs[idx]; + if (SPR == env.cpu_model) { + write_reg(&grp->general_ev[idx], event); + } else { + write_reg(&grp->general_ev[idx], MC_CH_PCI_PMON_CTL_EN); + write_reg(&grp->general_ev[idx], + MC_CH_PCI_PMON_CTL_EN | event); + } + } + } + } +} + +socket_record* alloc_socket_record() { + int skt_id = 0; + socket_record* rec = calloc(env.nr_socket, sizeof(socket_record)); + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + rec[skt_id].channel_record_arr = + calloc(env.nr_channel, sizeof(channel_record)); + } + return rec; +} + +void free_socket_record(socket_record* rec) { + int skt_id = 0; + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + free(rec[skt_id].channel_record_arr); + } + free(rec); +} + +void init_data() { + before.socket_record_arr = alloc_socket_record(); + after.socket_record_arr = alloc_socket_record(); +} + +void free_data() { + free_socket_record(before.socket_record_arr); + free_socket_record(after.socket_record_arr); +} + +int64_t get_perf_pmuid(int num) { + int64_t id = -1; + char imc_path[BUF_SIZE]; + + if (num != -1) { + snprintf(imc_path, BUF_SIZE, + "/sys/bus/event_source/devices/uncore_imc_%d/type", num); + } else { + snprintf(imc_path, BUF_SIZE, + "/sys/bus/event_source/devices/uncore_imc/type"); + } + + id = read_sys_file(imc_path, true); + + return id; +} + +static int64_t* enumerate_imc_PMUs() { + int64_t* pmu_ids = 0; + int idx = 0, i = 0; + + pmu_ids = calloc(MAX_IMC_ID, sizeof(int64_t)); + + if (!pmu_ids) { + fprintf(stderr, "Failed calloc pmu ids memory.\n"); + return NULL; + } + + for (i = -1; i <= MAX_IMC_ID; ++i) { + int64_t pmu_id = get_perf_pmuid(i); + if (pmu_id != -1) pmu_ids[idx++] = pmu_id; + } + + env.nr_channel = idx; + +cleanup: + + if (env.nr_channel == 0 && pmu_ids) { + free(pmu_ids); + pmu_ids = 0; + } + + return pmu_ids; +} + +static int init_env() { + int err = 0; + int64_t* pmu_ids = 0; + uint32_t* cfgs = 0; + + // check model + if (!detect_model()) { + fprintf(stderr, "Failed detect model.\n"); + err = -1; + goto cleanup; + } + + if (!is_model_support()) { + fprintf(stderr, "Unsupport model.\n"); + err = -1; + goto cleanup; + } + + // get core/socket info + err = discovery_topology(); + if (err) { + fprintf(stderr, "Failed discovery topology.\n"); + err = -1; + goto cleanup; + } + + // get all imc-pmu id + pmu_ids = enumerate_imc_PMUs(); + if (!pmu_ids) { + fprintf(stderr, "Failed enumerate imc pmus.\n"); + err = -1; + goto cleanup; + } + + cfgs = get_ddr_latency_metric_config(); + if (!cfgs) { + fprintf(stderr, "Failed enumerate imc pmus.\n"); + err = -1; + goto cleanup; + } + + // init pmu + pmus = init_imc_pmus(pmu_ids, env.nr_channel); + + // write pmu register + program_imc(cfgs, pmus); + + // init data + init_data(); + + fprintf(stderr, "nr_socket=%d nr_core=%d nr_cpu=%d nr_channel=%d \n", + env.nr_socket, env.nr_core, env.nr_cpu, env.nr_channel); + int i = 0; + for (i = 0; i < env.nr_socket; i++) { + fprintf(stderr, "socket%d-ref cpu=%d\n", i, env.socket_ref_core[i]); + } + +cleanup: + + if (pmu_ids) { + free(pmu_ids); + pmu_ids = 0; + } + + if (cfgs) { + free(cfgs); + cfgs = 0; + } + + return err; +} + +void read_imc() { + int skt_id = 0, pmu_id = 0, counter_id = 0; + after_ts = time(0); + + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + imc_pmu* pmu = pmus + skt_id; + socket_record* socket_ev = &after.socket_record_arr[skt_id]; + for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) { + imc_reg_group* grp = pmu->reg_groups + pmu_id; + channel_record* channel_ev = + &after.socket_record_arr[skt_id].channel_record_arr[pmu_id]; + /* enabel and reset fixed counter(DRAM clock) */ + if (pmu_id == 0) { + socket_ev->dram_clock = read_reg(&grp->fixed_ev); + if (env.cpu_model == ICX || env.cpu_model == SNOWRIDGE) { + socket_ev->dram_clock = 2 * socket_ev->dram_clock; + } + } + + channel_ev->rpq_occ = read_reg(&grp->general_ev[RPQ_OCC]); + channel_ev->rpq_ins = read_reg(&grp->general_ev[RPQ_INS]); + channel_ev->wpq_occ = read_reg(&grp->general_ev[WPQ_OCC]); + channel_ev->wpq_ins = read_reg(&grp->general_ev[WPQ_INS]); + + socket_ev->rpq_occ += channel_ev->rpq_occ; + socket_ev->rpq_ins += channel_ev->rpq_ins; + socket_ev->wpq_occ += channel_ev->wpq_occ; + socket_ev->wpq_ins += channel_ev->wpq_ins; + } + } + + if (before_ts) { + double delta = after_ts - before_ts; + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + socket_record* before_socket_ev = &before.socket_record_arr[skt_id]; + socket_record* after_socket_ev = &after.socket_record_arr[skt_id]; + imc_pmu* pmu = pmus + skt_id; + double dram_speed = + (after_socket_ev->dram_clock - before_socket_ev->dram_clock) / + (delta * (double)1e9); + + for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) { + channel_record* before_channel_ev = + &before_socket_ev->channel_record_arr[pmu_id]; + channel_record* after_channel_ev = + &after_socket_ev->channel_record_arr[pmu_id]; + + if (after_channel_ev->rpq_ins - before_channel_ev->rpq_ins > + 0) { + after_channel_ev->read_latency = + (after_channel_ev->rpq_occ - + before_channel_ev->rpq_occ) / + (after_channel_ev->rpq_ins - + before_channel_ev->rpq_ins) / + dram_speed; + } + + if (after_channel_ev->wpq_ins - before_channel_ev->wpq_ins > + 0) { + after_channel_ev->write_latency = + (after_channel_ev->wpq_occ - + before_channel_ev->wpq_occ) / + (after_channel_ev->wpq_ins - + before_channel_ev->wpq_ins) / + dram_speed; + } + } + + if (after_socket_ev->rpq_ins - before_socket_ev->rpq_ins > 0) { + after_socket_ev->read_latency = + (after_socket_ev->rpq_occ - before_socket_ev->rpq_occ) / + (after_socket_ev->rpq_ins - before_socket_ev->rpq_ins) / + dram_speed; + } + + if (after_socket_ev->wpq_ins - before_socket_ev->wpq_ins > 0) { + after_socket_ev->write_latency = + (after_socket_ev->wpq_occ - before_socket_ev->wpq_occ) / + (after_socket_ev->wpq_ins - before_socket_ev->wpq_ins) / + dram_speed; + } + } + } +} + +#ifdef DEBUG +void print_socket(socket_record* rec) { + fprintf(stderr, + "rpq_occ=%ld rpq_ins=%ld wpq_occ=%ld wpq_ins=%ld dram_clocks=%ld " + "r_latency=%lf w_latency=%lf\n", + rec->rpq_occ, rec->rpq_ins, rec->wpq_occ, rec->wpq_ins, + rec->dram_clock, rec->read_latency, rec->write_latency); +} + +void print_channel(channel_record* rec) { + fprintf(stderr, + "rpq_occ=%ld rpq_ins=%ld wpq_occ=%ld wpq_ins=%ld r_latency = % lf " + "w_latency = % lf\n ", + rec->rpq_occ, rec->rpq_ins, rec->wpq_occ, rec->wpq_ins, + rec->read_latency, rec->write_latency); +} + +void print_record(record* rec) { + int i = 0; + int j = 0; + for (i = 0; i < env.nr_socket; i++) { + print_socket(&rec->socket_record_arr[i]); + for (j = 0; j < env.nr_channel; j++) { + print_channel(&rec->socket_record_arr[i].channel_record_arr[j]); + } + } +} +#endif + +static int collect_data() { + int32_t socket_id = 0, channel_id = 0, line_num = 0; + read_imc(); + + fprintf(stderr, "[SOCKET_LEVEL]\n"); + fprintf(stderr, "%16s %16s %16s\n", "socket", "rlat", "wlat"); + + for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { + char socket_name[32]; + snprintf(socket_name, 32, "%d", socket_id); + socket_record* srec = &after.socket_record_arr[socket_id]; + fprintf(stderr, "%16s %16lf %16lf\n", socket_name, srec->read_latency, + srec->write_latency); + } + + for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { + fprintf(stderr, "[CHANNEL_LEVEL-SOCKET%d]\n", socket_id); + char socket_name[32]; + snprintf(socket_name, 32, "%d", socket_id); + + socket_record* srec = &after.socket_record_arr[socket_id]; + + fprintf(stderr, "%16s %16s %16s\n", "channel", "rlat", "wlat"); + for (channel_id = 0; channel_id < env.nr_channel; channel_id++) { + channel_record* crec = &srec->channel_record_arr[channel_id]; + char channel_name[32]; + snprintf(channel_name, 32, "%d", channel_id); + fprintf(stderr, "%16s %16s %16s\n", channel_name, + crec->read_latency, crec->write_latency); + } + } + + /* swap data */ + socket_record* tmp = before.socket_record_arr; + before.socket_record_arr = after.socket_record_arr; + after.socket_record_arr = tmp; + + /* clear after data */ + free_socket_record(after.socket_record_arr); + after.socket_record_arr = alloc_socket_record(); + + /* reset before timestamp */ + before_ts = after_ts; + + return 0; +} + +static clean_env(void) { free_data(); } + +int main() { + init_env(); + while (1) { + sleep(1); + collect_data(); + } + + clean_env(); +} diff --git a/source/tools/detect/mem/imc_latency/imc_latency.h b/source/tools/detect/mem/imc_latency/imc_latency.h new file mode 100644 index 00000000..6c8ccde5 --- /dev/null +++ b/source/tools/detect/mem/imc_latency/imc_latency.h @@ -0,0 +1,122 @@ +#ifndef UNITY_SAMPLE_H +#define UNITY_SAMPLE_H + +#include +#include +#include +#include + +#define ULIMIT_RECOMMENDATION \ + ("try executing 'ulimit -n 1000000' to increase the limit on the number " \ + "of open files.\n") + +typedef union CPUID_INFO { + int array[4]; + struct { + unsigned int eax, ebx, ecx, edx; + } reg; +} CPUID_INFO; + +enum INTEL_CPU_MODEL { + NEHALEM_EP = 26, + NEHALEM = 30, + ATOM = 28, + ATOM_2 = 53, + CENTERTON = 54, + BAYTRAIL = 55, + AVOTON = 77, + CHERRYTRAIL = 76, + APOLLO_LAKE = 92, + GEMINI_LAKE = 122, + DENVERTON = 95, + SNOWRIDGE = 134, + CLARKDALE = 37, + WESTMERE_EP = 44, + NEHALEM_EX = 46, + WESTMERE_EX = 47, + SANDY_BRIDGE = 42, + JAKETOWN = 45, + IVY_BRIDGE = 58, + HASWELL = 60, + HASWELL_ULT = 69, + HASWELL_2 = 70, + IVYTOWN = 62, + HASWELLX = 63, + BROADWELL = 61, + BROADWELL_XEON_E3 = 71, + BDX_DE = 86, + SKL_UY = 78, + KBL = 158, + KBL_1 = 142, + CML = 166, + CML_1 = 165, + ICL = 126, + ICL_1 = 125, + RKL = 167, + TGL = 140, + TGL_1 = 141, + ADL = 151, + ADL_1 = 154, + RPL = 0xb7, + RPL_1 = 0xba, + RPL_2 = 0xbf, + RPL_3 = 0xbe, + BDX = 79, + KNL = 87, + SKL = 94, + SKX = 85, + ICX_D = 108, + ICX = 106, + SPR = 143, + END_OF_MODEL_LIST = 0x0ffff +}; + +#define MC_CH_PCI_PMON_CTL_EVENT(x) (x << 0) +#define MC_CH_PCI_PMON_CTL_UMASK(x) (x << 8) +#define MC_CH_PCI_PMON_CTL_RST (1 << 17) +#define MC_CH_PCI_PMON_CTL_EDGE_DET (1 << 18) +#define MC_CH_PCI_PMON_CTL_EN (1 << 22) +#define MC_CH_PCI_PMON_CTL_INVERT (1 << 23) +#define MC_CH_PCI_PMON_CTL_THRESH(x) (x << 24UL) +#define MC_CH_PCI_PMON_FIXED_CTL_RST (1 << 19) +#define MC_CH_PCI_PMON_FIXED_CTL_EN (1 << 22) +#define UNC_PMON_UNIT_CTL_FRZ_EN (1 << 16) +#define UNC_PMON_UNIT_CTL_RSV ((1 << 16) + (1 << 17)) + +#define RPQ_OCC 0 +#define RPQ_INS 1 +#define WPQ_OCC 2 +#define WPQ_INS 3 + +#define BUF_SIZE 1024 +#define MAX_IMC_ID 100 +#define GENERAL_REG_NUM 4 +#define FIXED_REG_NUM 1 + +typedef struct imc_event_t { + struct perf_event_attr attr; + int fd; + int core_id; + int pmu_id; + bool fixed; +} imc_event; + +typedef struct imc_reg_group_t { + imc_event general_ev[GENERAL_REG_NUM]; + imc_event fixed_ev; + int pmu_id; +} imc_reg_group; + +typedef struct imc_pmu_t { + imc_reg_group* reg_groups; + int socket_id; + int nr_grp; +} imc_pmu; + +struct topology_ent { + int64_t cpu_id; + int64_t core_id; + int64_t socket_id; +}; + +#endif // UNITY_SAMPLE_H -- Gitee From 21bfa8ad77496c27ff072749e75555b01659c353 Mon Sep 17 00:00:00 2001 From: ZouTao Date: Mon, 10 Jul 2023 15:18:53 +0800 Subject: [PATCH 08/16] imc_latency: add options --- source/tools/detect/mem/imc_latency/README.md | 48 ++- .../detect/mem/imc_latency/imc_latency.c | 275 ++++++++++++++---- .../detect/mem/imc_latency/imc_latency.h | 34 +++ 3 files changed, 294 insertions(+), 63 deletions(-) diff --git a/source/tools/detect/mem/imc_latency/README.md b/source/tools/detect/mem/imc_latency/README.md index 8b089785..fd669fb1 100644 --- a/source/tools/detect/mem/imc_latency/README.md +++ b/source/tools/detect/mem/imc_latency/README.md @@ -2,16 +2,52 @@ 基于PMU事件的DDR内存访问延迟,用于检查微架构层级是否存在内存竞争。 +## Usgae + +### 使用用例 + +```bash +Sample: + +imc_latency -f /dev/stdout #输出日子到控制台 +imc_latency -d 15 i 20 # 每15秒采集一次 输出20次采集结果 +``` + +### 结果说明 + +一次的采集结果如下,输出的的指标类型由read_latency(rlat)和write_latency(wlat),指标的level有socket和channel两种级别。 + +- SOCKET_LEVEL: socket层级的读写内存延迟,通过对channel级的指标求平均得到。 +- CHANNEL_LEVEL:channel级别的读写内存延迟 + +```bash +[TIME-STAMP] 2023-07-10 07:06:17 +[SOCKET_LEVEL] + 0 1 + rlat 13.75 14.37 + wlat 39.37 37.49 +[CHANNEL_LEVEL]-[SOCKET-0] + 0 1 2 3 4 5 6 7 8 9 10 11 + rlat 14.37 13.75 0.00 13.75 13.75 0.00 13.12 13.75 0.00 14.37 13.75 0.00 + wlat 40.62 39.99 0.00 39.37 38.74 0.00 40.62 39.37 0.00 39.99 38.74 0.00 +[CHANNEL_LEVEL]-[SOCKET-1] + 0 1 2 3 4 5 6 7 8 9 10 11 + rlat 15.00 13.75 0.00 13.75 13.75 0.00 13.75 14.37 0.00 14.37 14.37 0.00 + wlat 38.12 37.49 0.00 36.87 36.87 0.00 38.12 38.12 0.00 38.12 37.49 0.00 +``` + ## 原理与限制 基于IMC的PMU组件实现,需要硬件支持。目前仅支持Intel的Ice Lake(ICX)、Sky Lake(SKX)、Cascade Lake以及Sapphire Rapids(SPR)等架构。 -| micro-architecture | code | cpu-model number | -| ------------------ | ---- | ---------------- | -| Sapphire Rapids | SPR | 143 | -| Ice Lake | ICX | 106/108 | -| Cascade Lake | | 106 | -| Sky Lake-X | SKX | 85 | +| 微架构 | 代号 | cpu-model编号 | +| --------------- | ---- | ------------- | +| Sapphire Rapids | SPR | 143 | +| Ice Lake | ICX | 106/108 | +| Cascade Lake | | 106 | +| Sky Lake-X | SKX | 85 | + +### 检查是否支持 可以通过`lscpu`的`Model`字段检查硬件是否支持。 diff --git a/source/tools/detect/mem/imc_latency/imc_latency.c b/source/tools/detect/mem/imc_latency/imc_latency.c index e3ef2687..204d1343 100644 --- a/source/tools/detect/mem/imc_latency/imc_latency.c +++ b/source/tools/detect/mem/imc_latency/imc_latency.c @@ -1,11 +1,15 @@ #include #include #include -#include #include +#include +#include +#include #include #include -#include +#include +#include +#include #include #include @@ -13,6 +17,27 @@ // #define DEBUG +const char* argp_program_version = "imc_latency 0.1"; +const char argp_program_doc[] = + "Detect the memory latency based on IMC PMU.\n" + "\n" + + "USAGE: imc_latency [--help] [-d DELAY] [-i ITERATION] [-f LOGFILE]\n" + "\n" + + "EXAMPLES:\n" + " imc_latency # run forever, display the memory latency.\n" + " imc_latency -f foo.log # log to foo.log.\n"; + +static const struct argp_option opts[] = { + {"delay", 'd', "DELAY", 0, "Sample peroid, default is 3 seconds"}, + {"iter", 'i', "ITERATION", 0, "Output times, default run forever"}, + {"logfile", 'f', "LOGFILE", 0, + "Logfile for result, default /var/log/sysak/imc_latency/imc_latency.log"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + struct Env { uint32_t max_cpuid; int32_t cpu_model; @@ -24,44 +49,99 @@ struct Env { int64_t nr_core; int64_t nr_channel; int64_t* socket_ref_core; -} env = {.vm = false}; - -typedef struct event { - uint64_t rpq_occ; - uint64_t rpq_ins; - uint64_t wpq_occ; - uint64_t wpq_ins; - uint64_t dram_speed; -} event; - -typedef struct channel_record { - uint64_t rpq_occ; - uint64_t rpq_ins; - uint64_t wpq_occ; - uint64_t wpq_ins; - double read_latency; - double write_latency; -} channel_record; - -typedef struct socket_record { - channel_record* channel_record_arr; - uint64_t rpq_occ; - uint64_t rpq_ins; - uint64_t wpq_occ; - uint64_t wpq_ins; - double read_latency; - double write_latency; - uint64_t dram_clock; -} socket_record; - -typedef struct record { - socket_record* socket_record_arr; -} record; + int64_t nr_iter; + int64_t delay; +} env = {.vm = false, .nr_iter = INT64_MAX, .delay = DEFAUlT_PEROID}; record before, after; - time_t before_ts = 0, after_ts = 0; imc_pmu* pmus = 0; +char log_dir[FILE_PATH_LEN] = "/var/log/sysak/imc_latency"; +char default_log_path[FILE_PATH_LEN] = + "/var/log/sysak/imc_latency/imc_latency.log"; +char* log_path = 0; +FILE* log_fp = 0; +bool exiting = false; + +static void sigint_handler(int signo) { exiting = 1; } + +/* if out of range or no number found return nonzero */ +static int parse_long(const char* str, long* retval) { + int err = 0; + char* endptr; + errno = 0; + long val = strtol(str, &endptr, 10); + + /* Check for various possible errors */ + if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN)) || + (errno != 0 && val == 0)) { + fprintf(stderr, "Failed parse val.\n"); + err = errno; + return err; + } + + if (endptr == str) return err = -1; + *retval = val; + return err; +} + +static error_t parse_arg(int key, char* arg, struct argp_state* state) { + int err = 0; + long val; + switch (key) { + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + case 'd': + err = parse_long(arg, &val); + if (err || val <= 0) { + fprintf(stderr, "Failed parse delay.\n"); + argp_usage(state); + } + + env.delay = val; + break; + case 'i': + err = parse_long(arg, &val); + if (err || val <= 0) { + fprintf(stderr, "Failed parse iteration-num.\n"); + argp_usage(state); + } + env.nr_iter = val; + env.nr_iter++; + break; + case 'f': + log_path = arg; + break; + case ARGP_KEY_ARG: + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +static int prepare_directory(char* path) { + int ret; + + ret = mkdir(path, 0777); + if (ret < 0 && errno != EEXIST) + return errno; + else + return 0; +} + +static FILE* open_logfile() { + FILE* f = 0; + if (!log_path) { + log_path = default_log_path; + } + + f = fopen(log_path, "w"); + + return f; +} int64_t read_sys_file(char* path, bool slient) { int64_t val; @@ -573,13 +653,14 @@ static int init_env() { // init data init_data(); - +#ifdef DEBUG fprintf(stderr, "nr_socket=%d nr_core=%d nr_cpu=%d nr_channel=%d \n", env.nr_socket, env.nr_core, env.nr_cpu, env.nr_channel); int i = 0; for (i = 0; i < env.nr_socket; i++) { fprintf(stderr, "socket%d-ref cpu=%d\n", i, env.socket_ref_core[i]); } +#endif cleanup: @@ -710,38 +791,77 @@ void print_record(record* rec) { } #endif -static int collect_data() { - int32_t socket_id = 0, channel_id = 0, line_num = 0; - read_imc(); +static char* ts2str(time_t ts, char* buf, int size) { + struct tm* t = gmtime(&ts); + strftime(buf, size, "%Y-%m-%d %H:%M:%S", t); + return buf; +} + +static void output_ts(FILE* dest) { + char stime_str[BUF_SIZE] = {0}; + time_t now = time(0); + fprintf(dest, "[TIME-STAMP] %s\n", ts2str(now, stime_str, BUF_SIZE)); +} + +static void output_socket_lat(FILE* dest) { + int32_t socket_id = 0; - fprintf(stderr, "[SOCKET_LEVEL]\n"); - fprintf(stderr, "%16s %16s %16s\n", "socket", "rlat", "wlat"); + fprintf(dest, "%s\n", "[SOCKET_LEVEL]"); + // fprintf(dest, "%8s%16s%16s\n", "socket", "rlat", "wlat"); + fprintf(dest, "%8s", ""); for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { - char socket_name[32]; - snprintf(socket_name, 32, "%d", socket_id); + fprintf(dest, "%8d", socket_id); + } + fprintf(dest, "\n"); + + fprintf(dest, "%8s", "rlat"); + for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { socket_record* srec = &after.socket_record_arr[socket_id]; - fprintf(stderr, "%16s %16lf %16lf\n", socket_name, srec->read_latency, - srec->write_latency); + fprintf(dest, "%8.2lf", srec->read_latency); } + fprintf(dest, "\n"); + fprintf(dest, "%8s", "wlat"); + for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { + socket_record* srec = &after.socket_record_arr[socket_id]; + fprintf(dest, "%8.2lf", srec->write_latency); + } + fprintf(dest, "\n"); +} + +static void output_channel_lat(FILE* dest) { + int32_t socket_id = 0, channel_id = 0; for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { - fprintf(stderr, "[CHANNEL_LEVEL-SOCKET%d]\n", socket_id); char socket_name[32]; snprintf(socket_name, 32, "%d", socket_id); socket_record* srec = &after.socket_record_arr[socket_id]; - fprintf(stderr, "%16s %16s %16s\n", "channel", "rlat", "wlat"); + fprintf(dest, "[CHANNEL_LEVEL]-[SOCKET-%d]\n", socket_id); + fprintf(dest, "%8s", ""); + for (channel_id = 0; channel_id < env.nr_channel; channel_id++) { + fprintf(dest, "%8d", channel_id); + } + fprintf(dest, "\n"); + + fprintf(dest, "%8s", "rlat"); + for (channel_id = 0; channel_id < env.nr_channel; channel_id++) { + channel_record* crec = &srec->channel_record_arr[channel_id]; + fprintf(dest, "%8.2lf", crec->read_latency); + } + fprintf(dest, "\n"); + + fprintf(dest, "%8s", "wlat"); for (channel_id = 0; channel_id < env.nr_channel; channel_id++) { channel_record* crec = &srec->channel_record_arr[channel_id]; - char channel_name[32]; - snprintf(channel_name, 32, "%d", channel_id); - fprintf(stderr, "%16s %16s %16s\n", channel_name, - crec->read_latency, crec->write_latency); + fprintf(dest, "%8.2lf", crec->write_latency); } + fprintf(dest, "\n"); } +} +void swap_record() { /* swap data */ socket_record* tmp = before.socket_record_arr; before.socket_record_arr = after.socket_record_arr; @@ -753,17 +873,58 @@ static int collect_data() { /* reset before timestamp */ before_ts = after_ts; +} - return 0; +static void output_split(FILE* dest) { fprintf(dest, "\n"); } +static void collect_data() { + int32_t socket_id = 0, channel_id = 0, line_num = 0; + read_imc(); + + if (before_ts) { + output_ts(log_fp); + output_socket_lat(log_fp); + output_channel_lat(log_fp); + output_split(log_fp); + fflush(log_fp); + } + + swap_record(); } -static clean_env(void) { free_data(); } +static void clean_env(void) { free_data(); } + +int main(int argc, char** argv) { + int err; + /* parse args */ + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + + err = argp_parse(&argp, argc, argv, 0, 0, 0); + if (err) { + fprintf(stderr, "Failed parse args.\n"); + return -1; + } + + prepare_directory(log_dir); + log_fp = open_logfile(); + if (!log_fp) { + fprintf(stderr, "Failed open log file.\n"); + return -1; + } + + if (signal(SIGINT, sigint_handler) == SIG_ERR) { + fprintf(stderr, "Failed set signal handler.\n"); + return -errno; + } -int main() { init_env(); - while (1) { - sleep(1); + + while (env.nr_iter-- && !exiting) { collect_data(); + sleep(env.delay); } clean_env(); diff --git a/source/tools/detect/mem/imc_latency/imc_latency.h b/source/tools/detect/mem/imc_latency/imc_latency.h index 6c8ccde5..1d9fddda 100644 --- a/source/tools/detect/mem/imc_latency/imc_latency.h +++ b/source/tools/detect/mem/imc_latency/imc_latency.h @@ -92,6 +92,8 @@ enum INTEL_CPU_MODEL { #define MAX_IMC_ID 100 #define GENERAL_REG_NUM 4 #define FIXED_REG_NUM 1 +#define FILE_PATH_LEN 256 +#define DEFAUlT_PEROID 3 typedef struct imc_event_t { struct perf_event_attr attr; @@ -119,4 +121,36 @@ struct topology_ent { int64_t socket_id; }; +typedef struct event { + uint64_t rpq_occ; + uint64_t rpq_ins; + uint64_t wpq_occ; + uint64_t wpq_ins; + uint64_t dram_speed; +} event; + +typedef struct channel_record { + uint64_t rpq_occ; + uint64_t rpq_ins; + uint64_t wpq_occ; + uint64_t wpq_ins; + double read_latency; + double write_latency; +} channel_record; + +typedef struct socket_record { + channel_record* channel_record_arr; + uint64_t rpq_occ; + uint64_t rpq_ins; + uint64_t wpq_occ; + uint64_t wpq_ins; + double read_latency; + double write_latency; + uint64_t dram_clock; +} socket_record; + +typedef struct record { + socket_record* socket_record_arr; +} record; + #endif // UNITY_SAMPLE_H -- Gitee From 4ab6f360ca1bf0ab0ec54b5783b80069ece36d5a Mon Sep 17 00:00:00 2001 From: = Date: Mon, 10 Jul 2023 15:25:59 +0800 Subject: [PATCH 09/16] process unsupportde model --- .../detect/mem/imc_latency/imc_latency.c | 37 +++---------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/source/tools/detect/mem/imc_latency/imc_latency.c b/source/tools/detect/mem/imc_latency/imc_latency.c index 204d1343..e6f9340c 100644 --- a/source/tools/detect/mem/imc_latency/imc_latency.c +++ b/source/tools/detect/mem/imc_latency/imc_latency.c @@ -762,35 +762,6 @@ void read_imc() { } } -#ifdef DEBUG -void print_socket(socket_record* rec) { - fprintf(stderr, - "rpq_occ=%ld rpq_ins=%ld wpq_occ=%ld wpq_ins=%ld dram_clocks=%ld " - "r_latency=%lf w_latency=%lf\n", - rec->rpq_occ, rec->rpq_ins, rec->wpq_occ, rec->wpq_ins, - rec->dram_clock, rec->read_latency, rec->write_latency); -} - -void print_channel(channel_record* rec) { - fprintf(stderr, - "rpq_occ=%ld rpq_ins=%ld wpq_occ=%ld wpq_ins=%ld r_latency = % lf " - "w_latency = % lf\n ", - rec->rpq_occ, rec->rpq_ins, rec->wpq_occ, rec->wpq_ins, - rec->read_latency, rec->write_latency); -} - -void print_record(record* rec) { - int i = 0; - int j = 0; - for (i = 0; i < env.nr_socket; i++) { - print_socket(&rec->socket_record_arr[i]); - for (j = 0; j < env.nr_channel; j++) { - print_channel(&rec->socket_record_arr[i].channel_record_arr[j]); - } - } -} -#endif - static char* ts2str(time_t ts, char* buf, int size) { struct tm* t = gmtime(&ts); strftime(buf, size, "%Y-%m-%d %H:%M:%S", t); @@ -920,8 +891,12 @@ int main(int argc, char** argv) { return -errno; } - init_env(); - + err = init_env(); + if (err) { + fprintf(stderr, "Init env error.\n"); + return -1; + } + while (env.nr_iter-- && !exiting) { collect_data(); sleep(env.delay); -- Gitee From c2ca38fc4e129bf9f2e4a6932a5c52859a0ca6a2 Mon Sep 17 00:00:00 2001 From: = Date: Mon, 10 Jul 2023 17:45:44 +0800 Subject: [PATCH 10/16] imc_latency: fix readme.md --- source/tools/detect/mem/imc_latency/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/tools/detect/mem/imc_latency/README.md b/source/tools/detect/mem/imc_latency/README.md index fd669fb1..90c46acd 100644 --- a/source/tools/detect/mem/imc_latency/README.md +++ b/source/tools/detect/mem/imc_latency/README.md @@ -10,7 +10,7 @@ Sample: imc_latency -f /dev/stdout #输出日子到控制台 -imc_latency -d 15 i 20 # 每15秒采集一次 输出20次采集结果 +imc_latency -d 15 -i 20 # 每15秒采集一次 输出20次采集结果 ``` ### 结果说明 @@ -38,7 +38,7 @@ imc_latency -d 15 i 20 # 每15秒采集一次 输出20次采集结果 ## 原理与限制 -基于IMC的PMU组件实现,需要硬件支持。目前仅支持Intel的Ice Lake(ICX)、Sky Lake(SKX)、Cascade Lake以及Sapphire Rapids(SPR)等架构。 +基于IMC的PMU组件实现,需要硬件支持。目前仅支持Intel的Ice Lake(ICX)、Sky Lake(SKX)、Cascade Lake以及Sapphire Rapids(SPR)等微架构。 | 微架构 | 代号 | cpu-model编号 | | --------------- | ---- | ------------- | @@ -49,7 +49,7 @@ imc_latency -d 15 i 20 # 每15秒采集一次 输出20次采集结果 ### 检查是否支持 -可以通过`lscpu`的`Model`字段检查硬件是否支持。 +可以使用`lscpu`命令,通过CPU的`Model`字段查看微架构类型,检查硬件是否支持。 ```bash Architecture: x86_64 -- Gitee From 7c82e5cb8027d3fa67cb10dec9ebb3ec88de35ea Mon Sep 17 00:00:00 2001 From: = Date: Tue, 11 Jul 2023 16:32:36 +0800 Subject: [PATCH 11/16] imc_latency: rename macro --- source/tools/detect/mem/imc_latency/imc_latency.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/tools/detect/mem/imc_latency/imc_latency.h b/source/tools/detect/mem/imc_latency/imc_latency.h index 1d9fddda..d2b6387e 100644 --- a/source/tools/detect/mem/imc_latency/imc_latency.h +++ b/source/tools/detect/mem/imc_latency/imc_latency.h @@ -1,5 +1,5 @@ -#ifndef UNITY_SAMPLE_H -#define UNITY_SAMPLE_H +#ifndef IMC_LATENCY_H +#define IMC_LATENCY_H #include #include @@ -153,4 +153,4 @@ typedef struct record { socket_record* socket_record_arr; } record; -#endif // UNITY_SAMPLE_H +#endif -- Gitee From 1e27b878cd5e55b4581aaea2f82b4401dfbdf11b Mon Sep 17 00:00:00 2001 From: jietaoxiao Date: Thu, 13 Jul 2023 09:37:03 +0800 Subject: [PATCH 12/16] memcgoffline: add offline memcg dectect tool to /tools/detect/mem --- source/lib/uapi/Makefile | 3 +- source/lib/uapi/include/kcore_utils.h | 70 +++++ source/lib/uapi/kcore_utils.c | 295 ++++++++++++++++++ source/tools/detect/mem/memcgoffline/Makefile | 7 + .../mem/memcgoffline/include/btfparse.h | 39 +++ .../mem/memcgoffline/include/memcg_iter.h | 36 +++ .../detect/mem/memcgoffline/memcg_iter.c | 291 +++++++++++++++++ .../detect/mem/memcgoffline/memcgoffline.c | 187 +++++++++++ 8 files changed, 927 insertions(+), 1 deletion(-) create mode 100644 source/lib/uapi/include/kcore_utils.h create mode 100644 source/lib/uapi/kcore_utils.c create mode 100644 source/tools/detect/mem/memcgoffline/Makefile create mode 100644 source/tools/detect/mem/memcgoffline/include/btfparse.h create mode 100644 source/tools/detect/mem/memcgoffline/include/memcg_iter.h create mode 100644 source/tools/detect/mem/memcgoffline/memcg_iter.c create mode 100644 source/tools/detect/mem/memcgoffline/memcgoffline.c diff --git a/source/lib/uapi/Makefile b/source/lib/uapi/Makefile index b92c5bdc..d267d7a7 100644 --- a/source/lib/uapi/Makefile +++ b/source/lib/uapi/Makefile @@ -1,6 +1,7 @@ SOURCE := $(shell find . -name "*.c") OBJS :=$(patsubst %.c,%.o,$(SOURCE)) STATIC_OBJS := $(addprefix $(OBJPATH)/,$(OBJS)) +LIBS += -L /usr/lib64 -l:libelf.a libsysak: $(OBJ_LIB_PATH)/libsysak.a @@ -10,6 +11,6 @@ $(OBJ_LIB_PATH)/libsysak.a: $(STATIC_OBJS) $(STATIC_OBJS): $(OBJS) $(OBJS): %.o : %.c - gcc -c -o $(OBJPATH)/$@ $< -I$(SRC)/lib/uapi/include + gcc -c -o $(OBJPATH)/$@ $< -I$(SRC)/lib/uapi/include $(LIBS) diff --git a/source/lib/uapi/include/kcore_utils.h b/source/lib/uapi/include/kcore_utils.h new file mode 100644 index 00000000..a877ed9c --- /dev/null +++ b/source/lib/uapi/include/kcore_utils.h @@ -0,0 +1,70 @@ +#ifndef __KCORE_UTILS_H +#define __KCORE_UTLIS_H + +#include +#include +#include + +#define BUFF_MAX 4096 +#define MAX_KCORE_ELF_HEADER_SIZE 32768 + +#ifdef DEBUG +#define LOG_DEBUG(...) fprintf(stderr, __VA_ARGS__) +#else +#define LOG_DEBUG(...) do { } while (0) +#endif /* DEBUG */ + +#define LOG_INFO(...) fprintf(stdout, __VA_ARGS__) +#define LOG_WARN(...) fprintf(stderr, __VA_ARGS__) +#define LOG_ERROR(...) fprintf(stderr, __VA_ARGS__) + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +/* struct to record the kcore elf file data*/ +struct proc_kcore_data { + unsigned int flags; + unsigned int segments; + char *elf_header; + size_t header_size; + Elf64_Phdr *load64; + Elf64_Phdr *notes64; + Elf32_Phdr *load32; + Elf32_Phdr *notes32; + void *vmcoreinfo; + unsigned int size_vmcoreinfo; +}; + + +/** + * lookup_kernel_symbol - look up kernel symbol address from /proc/kallsyms + * + * @symbol_name: kernel symbol name to look up. + * @return: the address of the kernel symbol. + * + */ +uintptr_t lookup_kernel_symbol(const char *symbol_name); + +/* prepare_btf_file - check exist btf file, if not exist, download it */ +char *prepare_btf_file(); + +/* open /proc/kcore and read necessary data to interpret kcore */ +int kcore_init(); + +/* close /proc/kcore and do some cleanup */ +void kcore_uninit(); + +/** + * kcore_readmem - read data of certain kernel address from kcore + * + * @kvaddr: kernel address to read. + * @buf: buf for readed data. + * @size: size of the data to read. + * @return: size of the data beeing read if success. + * + * Note: must call after kcore_init() + */ +ssize_t kcore_readmem(unsigned long kvaddr, void *buf, ssize_t size); + + +#endif \ No newline at end of file diff --git a/source/lib/uapi/kcore_utils.c b/source/lib/uapi/kcore_utils.c new file mode 100644 index 00000000..e58b45c2 --- /dev/null +++ b/source/lib/uapi/kcore_utils.c @@ -0,0 +1,295 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kcore_utils.h" + +#define LEN (128) + +static struct proc_kcore_data proc_kcore_data = { 0 }; +static struct proc_kcore_data *pkd = &proc_kcore_data; + +static int kcore_fd = 0; + +/* + * Routines of kcore, i.e., /proc/kcore + */ +uintptr_t lookup_kernel_symbol(const char *symbol_name) +{ + const char *kallsyms_file = "/proc/kallsyms"; + FILE *fp; + char line[BUFF_MAX]; + char *pos; + uintptr_t addr = -1UL; + + fp = fopen(kallsyms_file, "r"); + if (fp == NULL) { + perror("fopen: /proc/kallsyms"); + return -1; + } + + while (fgets(line, BUFF_MAX, fp)) { + if ((pos = strstr(line, symbol_name)) == NULL) + continue; + + /* Remove trailing newline */ + line[strcspn(line, "\n")] = '\0'; + + /* Exact match */ + if (pos == line || !isspace(*(pos - 1))) + continue; + if (!strcmp(pos, symbol_name)) { + addr = strtoul(line, NULL, 16); + break; + } + } + + if (addr == -1UL) + LOG_ERROR("failed to lookup symbol: %s\n", symbol_name); + + fclose(fp); + return addr; +} + +static int kcore_elf_init() +{ + Elf64_Ehdr *elf64; + Elf64_Phdr *load64; + Elf64_Phdr *notes64; + char eheader[MAX_KCORE_ELF_HEADER_SIZE]; + size_t load_size, notes_size; + + if (read(kcore_fd, eheader, MAX_KCORE_ELF_HEADER_SIZE) != + MAX_KCORE_ELF_HEADER_SIZE) { + perror("read: /proc/kcore ELF header"); + return -1; + } + + elf64 = (Elf64_Ehdr *)&eheader[0]; + notes64 = (Elf64_Phdr *)&eheader[sizeof(Elf64_Ehdr)]; + load64 = (Elf64_Phdr *)&eheader[sizeof(Elf64_Ehdr) + + sizeof(Elf64_Phdr)]; + + pkd->segments = elf64->e_phnum - 1; + + notes_size = load_size = 0; + if (notes64->p_type == PT_NOTE) + notes_size = notes64->p_offset + notes64->p_filesz; + if (notes64->p_type == PT_LOAD) + load_size = (unsigned long)(load64+(elf64->e_phnum)) - + (unsigned long)elf64; + + pkd->header_size = MAX(notes_size, load_size); + if (!pkd->header_size) + pkd->header_size = MAX_KCORE_ELF_HEADER_SIZE; + + if ((pkd->elf_header = (char *)malloc(pkd->header_size)) == NULL) { + perror("malloc: /proc/kcore ELF header"); + return -1; + } + + memcpy(&pkd->elf_header[0], &eheader[0], pkd->header_size); + pkd->notes64 = (Elf64_Phdr *)&pkd->elf_header[sizeof(Elf64_Ehdr)]; + pkd->load64 = (Elf64_Phdr *)&pkd->elf_header[sizeof(Elf64_Ehdr) + + sizeof(Elf64_Phdr)]; + + return 0; +} + +int kcore_init() +{ + if ((kcore_fd = open("/proc/kcore", O_RDONLY)) < 0) { + perror("open: /proc/kcore"); + return -1; + } + + if (kcore_elf_init()) + goto failed; + + return 0; + +failed: + close(kcore_fd); + return -1; +} + +void kcore_uninit(void) +{ + if (pkd->elf_header) + free(pkd->elf_header); + if (kcore_fd > 0) + close(kcore_fd); +} + +/* + * We may accidentally access invalid pfns on some kernels + * like 4.9, due to known bugs. Just skip it. + */ +ssize_t kcore_readmem(unsigned long kvaddr, void *buf, ssize_t size) +{ + Elf64_Phdr *lp64; + unsigned long offset = -1UL; + ssize_t read_size; + unsigned int i; + + for (i = 0; i < pkd->segments; i++) { + lp64 = pkd->load64 + i; + if ((kvaddr >= lp64->p_vaddr) && + (kvaddr < (lp64->p_vaddr + lp64->p_memsz))) { + offset = (off_t)(kvaddr - lp64->p_vaddr) + + (off_t)lp64->p_offset; + break; + } + } + if (i == pkd->segments) { + for (i = 0; i < pkd->segments; i++) { + lp64 = pkd->load64 + i; + LOG_DEBUG("%2d: [0x%lx, 0x%lx)\n", i, lp64->p_vaddr, + lp64->p_vaddr + lp64->p_memsz); + } + //printf("invalid kvaddr 0x%lx\n", kvaddr); + goto failed; + } + + if (lseek(kcore_fd, offset, SEEK_SET) < 0) { + perror("lseek: /proc/kcore"); + goto failed; + } + + read_size = read(kcore_fd, buf, size); + if (read_size < size) { + perror("read: /proc/kcore"); + goto failed; + } + + return read_size; + +failed: + return -1; +} + +static void stripWhiteSpace(char *str) +{ + char tmp_str[strlen(str)]; + int i, j = 0; + + for (i = 0; str[i] != '\0'; i++) { + if (str[i] != ' ' && str[i] != '\t' + && str[i] != '\n') { + tmp_str[j] = str[i]; + j++; + } + } + + tmp_str[j] = '\0'; + strcpy(str, tmp_str); + + return; +} + +static int do_cmd(const char *cmd, char *result, int len) +{ + FILE *res; + char region[LEN] = {0}; + char *str; + + res = popen(cmd, "r"); + if (res == NULL) { + printf("get region id failed\n"); + return -1; + } + + if (feof(res)) { + printf("cmd line end\n"); + return 0; + } + fgets(region, sizeof(region)-1, res); + str = region; + stripWhiteSpace(str); + /* skip \n */ + strncpy(result, str, len - 1); + result[len - 1] = '\0'; + pclose(res); + return 0; +} + +static int download_btf() +{ + char region[LEN] = {0}; + char arch[LEN] = {0}; + char kernel[LEN] = {0}; + char dw[LEN+LEN] = {0}; + char timeout[LEN] = "-internal"; + char sysak_path[LEN] = "/boot"; + char *curl_cmd = "curl -s --connect-timeout 2 http://100.100.100.200/latest/meta-data/region-id 2>&1"; + char *arch_cmd = "uname -m"; + char *kernel_cmd = "uname -r"; + char *tmp; + + do_cmd(curl_cmd, region, LEN); + if (!strstr(region,"cn-")) { + strcpy(region, "cn-hangzhou"); + memset(timeout, 0, sizeof(timeout)); + } + + do_cmd(arch_cmd, arch, LEN); + + do_cmd(kernel_cmd, kernel, LEN); + + if((tmp = getenv("SYSAK_WORK_PATH")) != NULL) + { + memset(sysak_path, 0, sizeof(sysak_path)); + strcpy(sysak_path, tmp); + strcat(sysak_path, "/tools/"); + strcat(sysak_path, kernel); + } + + snprintf(dw, LEN + LEN + LEN, "wget -T 5 -t 2 -q -O %s/vmlinux-%s https://sysom-cn-%s.oss-cn-%s%s.aliyuncs.com/home/hive/btf/%s/vmlinux-%s", sysak_path, kernel, ®ion[3],®ion[3], timeout,arch, kernel); + + do_cmd(dw, kernel, LEN); + return 0; +} + +static int check_btf_file(char *btf) +{ + struct stat fstat; + int ret = 0; + + ret = stat(btf, &fstat); + if (ret) + return -1; + if (fstat.st_size < 10*1024) + return -1; + + return 0; +} + +char *prepare_btf_file() +{ + static char btf[LEN] = {0}; + char ver[LEN] = {0}; + char *cmd = "uname -r"; + + do_cmd(cmd, ver, LEN); + + if (getenv("SYSAK_WORK_PATH") != NULL) + sprintf(btf,"%s/tools/%s/vmlinux-%s", getenv("SYSAK_WORK_PATH"), ver, ver); + else + sprintf(btf,"/boot/vmlinux-%s", ver); + + if (check_btf_file(btf)) { + download_btf(); + }; + + if (check_btf_file(btf)) { + LOG_ERROR("btf file:%s not found \n", btf); + return NULL; + } + + return btf; +} \ No newline at end of file diff --git a/source/tools/detect/mem/memcgoffline/Makefile b/source/tools/detect/mem/memcgoffline/Makefile new file mode 100644 index 00000000..de3835b5 --- /dev/null +++ b/source/tools/detect/mem/memcgoffline/Makefile @@ -0,0 +1,7 @@ +target := memcgoffline +LIBS += -L ${OBJ_LIB_PATH}/lib -l:libcoolbpf.a -l:libsysak.a -lelf -lz +INCLUDES += -I$(SRC)/tools/detect/mem/memcgoffline/include +LDFLAGS += -Wall $(LIBS) +mods := memcg_iter.o memcgoffline.o + +include $(SRC)/mk/csrc.mk \ No newline at end of file diff --git a/source/tools/detect/mem/memcgoffline/include/btfparse.h b/source/tools/detect/mem/memcgoffline/include/btfparse.h new file mode 100644 index 00000000..84204f54 --- /dev/null +++ b/source/tools/detect/mem/memcgoffline/include/btfparse.h @@ -0,0 +1,39 @@ + + +#ifndef __BTF_PARSE_H +#define __BTF_PARSE_H + + + +/** + * btf_load: load btf from btf_custom_path + * + * @btf_custom_path: path of btf file + */ +struct btf *btf_load(char *btf_custom_path); +typedef unsigned int uint32_t; + +struct member_attribute +{ + uint32_t size; // size of structure's member + uint32_t real_size; // real_size mean + uint32_t offset; // offset of member in strucutre +}; + +/** + * btf_find_struct_member - find struct btfid by structure's name + * + * @btf: + * @struct_name: name of struct + * @member_name: name of structure's member + * @return: NULL mean error, get error number from errno. + * + * Note: Remember to free pointer of struct member_attribute + */ +struct member_attribute *btf_find_struct_member(struct btf *btf, char *struct_name, char *member_name); + +int btf_get_member_offset(struct btf *btf, char *name, char *member_name); +void btf__free(struct btf *btf); + +#endif + diff --git a/source/tools/detect/mem/memcgoffline/include/memcg_iter.h b/source/tools/detect/mem/memcgoffline/include/memcg_iter.h new file mode 100644 index 00000000..300a82b0 --- /dev/null +++ b/source/tools/detect/mem/memcgoffline/include/memcg_iter.h @@ -0,0 +1,36 @@ +#ifndef __MEMCG_ITER_H_ +#define __MEMCG_ITER_H_ + +#include "btfparse.h" + +#define PATH_MAX (2048) +#define LEN (255) +#define CSS_DYING (1 << 4) /* css is dying */ + +/* iterator function of "for_each_mem_cgroup" */ +unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev, + struct btf* handle); + +/* find out and set root_mem_cgroup from kallsyms*/ +int memcg_iter_init(); + +/* Iter all memory cgroups, must call after memcg_iter_init() */ +#define for_each_mem_cgroup(iter, start, btf) \ + for (iter = _mem_cgroup_iter(start, (unsigned long)NULL, btf); \ + iter != (unsigned long)NULL; \ + iter = _mem_cgroup_iter(start, iter, btf)) + +/* + * get member offset of certain struct, need to read from btf file, + * (don't call it in loop which may cause huge overhead) + */ +struct member_attribute *get_offset_no_cache(char *struct_name, + char *member_name, struct btf *handle); + +int get_member_offset(char *struct_name, char *member_name, + struct btf *handle); + +void memcg_get_name(unsigned long memcg, char *name, + int len, struct btf *btf_handle); + +#endif \ No newline at end of file diff --git a/source/tools/detect/mem/memcgoffline/memcg_iter.c b/source/tools/detect/mem/memcgoffline/memcg_iter.c new file mode 100644 index 00000000..541d0367 --- /dev/null +++ b/source/tools/detect/mem/memcgoffline/memcg_iter.c @@ -0,0 +1,291 @@ +#include +#include +#include +#include +#include +#include + +#include "memcg_iter.h" +#include "kcore_utils.h" + +static unsigned long root_mem_cgroup; + +struct member_attribute *get_offset_no_cache(char *struct_name, + char *member_name, struct btf *handle) +{ + struct member_attribute *att; + + att = btf_find_struct_member(handle, struct_name, member_name); + if (!att) { + return NULL; + } + + att->offset = att->offset/8; + + return att; +} + +int get_member_offset(char *struct_name, char *member_name, struct btf *handle) +{ + char prefix[LEN] = "struct "; + + strcat(prefix, struct_name); + + return btf_get_member_offset(handle, prefix, member_name)/8; +} + +static unsigned long _css_next_child(unsigned long pos, unsigned long parent, + struct btf *btf_handle) +{ + struct member_attribute *att, *att2; + unsigned long next; + + att = get_offset_no_cache("cgroup_subsys_state", "sibling", btf_handle); + if (!att) + return 0; + + att2 = get_offset_no_cache("cgroup_subsys_state", "children", btf_handle); + if (!att2) + return 0; + + if(!pos) { + kcore_readmem(parent + att2->offset, &next, sizeof(next)); + next = next - att->offset; + } else { + kcore_readmem(pos + att->offset, &next, sizeof(next)); + next = next - att->offset; + } + + if(next + att->offset != parent + att2->offset) + return next; + + return 0; +} + +unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev, + struct btf *btf_handle) +{ + struct member_attribute *att, *att2; + unsigned long css, root_css; + unsigned long memcg = 0; + unsigned long pos = 0; + unsigned long next = 0; + unsigned long tmp1, tmp2; + + if(!root) + root = root_mem_cgroup; + if(!prev) + return root; + + //printf("root:%lx, prev:%lx\n", root, prev); + + att = get_offset_no_cache("mem_cgroup", "css", btf_handle); + if (!att) + return 0; + + att2 = get_offset_no_cache("cgroup_subsys_state", "parent", btf_handle); + if (!att2) + return 0; + + pos = prev; + //kcore_readmem(pos + att->offset, &css, sizeof(css)); + css = pos + att->offset; + + //kcore_readmem(root+att->offset, &root_css, sizeof(root_css)); + root_css = root + att->offset; + next = _css_next_child(0, css, btf_handle); + if(!next) + { + tmp1 = css; + while(tmp1 != root_css) + { + kcore_readmem(tmp1 + att2->offset, &tmp2, sizeof(tmp2)); + next = _css_next_child(tmp1, tmp2, btf_handle); + if(next) + break; + tmp1 = tmp2; + } + } + + if(!next) + return 0; + + memcg = next - att->offset; + + return memcg; +} + +int memcg_iter_init() +{ + unsigned long tmp; + size_t size; + + tmp = lookup_kernel_symbol("root_mem_cgroup"); + if (tmp == (unsigned long )-1) { + LOG_ERROR("unable to get root_mem_cgroup\n"); + return -1; + } else { + size = kcore_readmem(tmp, &root_mem_cgroup, 8); + if (size < 8) { + LOG_ERROR("get incorrect address where root_mem_cgroup point to\n"); + return -1; + } + } + + return 0; +} + +static int prepend(char **buffer, int *buflen, const char *str, + int namelen, int off) +{ + *buflen -= namelen + off; + if (*buflen < 0) + return -1; + *buffer -= namelen + off; + if (off) + **buffer = '/'; + memcpy(*buffer + off, str, namelen); + return 0; +} + +static int cgroup_path(unsigned long cgrp, char *buf, + int buflen, struct btf *btf_handle) +{ + int ret = -1; + char *start; + unsigned long cgp; + char tmpname[PATH_MAX]; + struct member_attribute *cg_pa_att, *cg_name_att; + struct member_attribute *cgn_name_attr; + + cg_pa_att = get_offset_no_cache("cgroup", "parent", btf_handle); + if (!cg_pa_att) + return -1; + + cg_name_att = get_offset_no_cache("cgroup", "name", btf_handle); + if (!cg_name_att) + return -1; + + cgn_name_attr = get_offset_no_cache("cgroup_name", "name", btf_handle); + if (!cgn_name_attr) + return -1; + + + kcore_readmem(cgrp + cg_pa_att->offset, &cgp, sizeof(cgp)); + if (!cgp) { + if (strncpy(buf, "/", buflen) == NULL) + return -1; + return 0; + } + + start = buf + buflen - 1; + *start = '\0'; + + do { + int len; + unsigned long name; + + kcore_readmem(cgrp + cg_name_att->offset, &name, sizeof(name)); + + name += cgn_name_attr->offset; + kcore_readmem(name, tmpname,sizeof(tmpname)); + + len = strlen(tmpname); + if ((start -= len) < buf) + goto out; + + memcpy(start, tmpname, len); + + if (--start < buf) + goto out; + + *start = '/'; + cgrp = cgp; + + kcore_readmem(cgp + cg_pa_att->offset, &cgp, sizeof(cgp)); + + } while (cgp); + + ret = 0; + memmove(buf, start, buf + buflen - start); +out: + return ret; +} + +void memcg_get_name(unsigned long memcg, char *name, + int len, struct btf *btf_handle) +{ + char *end; + int pos; + unsigned long cg, knname; + char subname[257]; + struct member_attribute *att; + + memset(subname, 0, sizeof(subname)); + att = get_offset_no_cache("mem_cgroup", "css", btf_handle); + if (!att) + return; + + cg = memcg + att->offset; + + att = get_offset_no_cache("cgroup_subsys_state", "cgroup", btf_handle); + if (!att) + return; + + kcore_readmem(cg + att->offset, &cg, sizeof(cg)); + +#ifdef LINUX_310 + if (!cg) + return; + cgroup_path(cg, name, PATH_MAX); + end = name+strlen("/sys/fs/cgroup/memory/"); + memmove(end, name, strlen(name)+1); + prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0); +#else + unsigned long kn; + unsigned long pkn; + int kn_name_offset, kn_pa_offset; + + att = get_offset_no_cache("cgroup", "kn", btf_handle); + if (!att) + return; + + kcore_readmem(cg + att->offset, &kn, sizeof(kn)); + + if (!cg || !kn) + return; + + end = name + len - 1; + prepend(&end, &len, "\0", 1, 0); + pkn = kn; + + kn_name_offset = get_member_offset("kernfs_node", "name", btf_handle); + if (kn_name_offset < 0) + return; + + kn_pa_offset = get_member_offset("kernfs_node", "parent", btf_handle); + if (kn_pa_offset < 0) + return; + + while (pkn) { + kcore_readmem(pkn + kn_name_offset, &knname, sizeof(knname)); + kcore_readmem(knname, subname, sizeof(subname)); + + pos = prepend(&end, &len, subname, strlen(subname), 0); + if (pos) + break; + + kcore_readmem(pkn + kn_pa_offset, &kn, sizeof(kn)); + if ((pkn == kn) || !kn) + break; + pos = prepend(&end, &len, "/", 1, 0); + if (pos) + break; + pkn = kn; + } + + prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0); + + memmove(name, end, strlen(end) + 1); +#endif +} \ No newline at end of file diff --git a/source/tools/detect/mem/memcgoffline/memcgoffline.c b/source/tools/detect/mem/memcgoffline/memcgoffline.c new file mode 100644 index 00000000..16fe17db --- /dev/null +++ b/source/tools/detect/mem/memcgoffline/memcgoffline.c @@ -0,0 +1,187 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kcore_utils.h" +#include "memcg_iter.h" + +static struct btf *btf_handle = NULL; +int total_memcg_num = 0; + +struct environment { + int print_cg_num; /* unused */ +} env = { + .print_cg_num = 10000, +}; + +static int caculate_offline(unsigned long start_memcg) +{ + int offline_num = 0; + unsigned long css, css_flags, cnt, iter = 0; + long refcnt_value; + unsigned int flags_value; + char fileName[PATH_MAX]; + struct member_attribute *css_attr, *css_flag_attr, *refcnt_attr; + struct member_attribute *cnt_attr; + + css_attr = get_offset_no_cache("mem_cgroup", "css", btf_handle); + if (!css_attr) { + LOG_ERROR("get css offset of mem_cgroup failed!\n"); + return -1; + } + + css_flag_attr = get_offset_no_cache("cgroup_subsys_state", + "flags", btf_handle); + if (!css_flag_attr) { + LOG_ERROR("get flags offset of cgroup_subsys_state failed!\n"); + return -1; + } + + refcnt_attr = get_offset_no_cache("cgroup_subsys_state", + "refcnt", btf_handle); + if (!refcnt_attr) { + LOG_ERROR("get refcnt offset of cgroup_subsys_state failed!\n"); + return -1; + } + + cnt_attr = get_offset_no_cache("percpu_ref", "count", btf_handle); + if (!cnt_attr) { + LOG_ERROR("get cnt offset of percpu_ref failed!\n"); + return -1; + } + + for_each_mem_cgroup(iter, start_memcg, btf_handle) { + css = iter + css_attr->offset; + css_flags = css + css_flag_attr->offset; + + kcore_readmem(css_flags, &flags_value, sizeof(flags_value)); + + if (flags_value & CSS_DYING) { + cnt = css + refcnt_attr->offset + cnt_attr->offset; + + offline_num++; + kcore_readmem(cnt, &refcnt_value, sizeof(refcnt_value)); + + if (env.print_cg_num > 0) { + memcg_get_name(iter, fileName, PATH_MAX, btf_handle); + printf("cgroup path:%s\trefcount=%ld\n", fileName, refcnt_value); + env.print_cg_num--; + } + } + total_memcg_num++; + } + + return offline_num; +} + +static void show_usage(char *prog) +{ + const char *str = + " Usage: %s [OPTIONS]\n" + " Options:\n" + " -n PRINT_MAX_CG_NUM Max offline memcg paths to printf(default 10000)\n" + " -h HELP help\n" + " \n" + + " EXAMPLE:\n " + " memcgoffline # display number of offline memcg and all their paths.\n" + " memcgoffline -n 10 # display number of offline memcg and " + "10 of offline memcg paths.\n" + ; + + fprintf(stderr, str, prog); + exit(EXIT_FAILURE); +} + +static int parse_args(int argc, char **argv, struct environment *env) +{ + int c, option_index; + char *prog_name = "memcgoffline"; + + for (;;) { + c = getopt_long(argc, argv, "n:h", NULL, &option_index); + if (c == -1) + break; + + switch (c) { + case 'n': + env->print_cg_num = (int)strtol(optarg, NULL, 10); + if (!errno) + return -errno; + break; + case 'h': + show_usage(prog_name); /* would exit */ + break; + default: + show_usage(prog_name); + } + } + + return 0; +} + +struct btf *btf_init() +{ + char *btf_path; + + btf_path = prepare_btf_file(); + if (!btf_path) + return NULL; + + return btf_load(btf_path); +} + +void btf_uninit(struct btf *btf) +{ + return btf__free(btf); +} + +int main(int argc, char *argp[]) +{ + int offline_memcg = 0, ret = 0; + + ret = parse_args(argc, argp, &env); + if (ret) { + LOG_ERROR("parse arg error!\n"); + return -1; + } + + btf_handle = btf_init(); + if (!btf_handle) { + LOG_ERROR("btf init failed!\n"); + return -1; + } + + ret = kcore_init(); + if (ret) { + LOG_ERROR("kcore init failed!\n"); + goto uninit_btf; + } + + ret = memcg_iter_init(); + if (ret) { + LOG_ERROR("memcg_iter_init failed!\n"); + goto uninit_kcore; + } + + offline_memcg = caculate_offline((unsigned long)NULL); + if (offline_memcg < 0) { + LOG_ERROR("caculate offline memcg failed!\n"); + ret = offline_memcg; + goto uninit_kcore; + } + printf("Offline memory cgroup num: %d\n", offline_memcg); + printf("Total memory cgroup num: %d\n", total_memcg_num); + +uninit_kcore: + kcore_uninit(); +uninit_btf: + btf_uninit(btf_handle); + + return ret; +} -- Gitee From 9e83af508eed22c08b878cc9dbb769006633e1e7 Mon Sep 17 00:00:00 2001 From: jietaoxiao Date: Wed, 19 Jul 2023 11:16:52 +0800 Subject: [PATCH 13/16] tools/detect/mem/memcgoffline: fix make error --- source/mk/csrc.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/mk/csrc.mk b/source/mk/csrc.mk index 8701dc77..e3fdf57c 100644 --- a/source/mk/csrc.mk +++ b/source/mk/csrc.mk @@ -1,6 +1,6 @@ objs := $(foreach n, $(mods), $(OBJPATH)/$(n)) -CFLAGS += $(EXTRA_CFLAGS) -I$(SRC)/lib/uapi/include +CFLAGS += $(EXTRA_CFLAGS) $(INCLUDES) -I$(SRC)/lib/uapi/include LDFLAGS += $(EXTRA_LDFLAGS) ifeq ($(KERNEL_DEPEND), Y) -- Gitee From 0a83d759a7fb9a0b2fc9c03fde3ac18251260f67 Mon Sep 17 00:00:00 2001 From: "muya.zj" Date: Wed, 19 Jul 2023 10:33:50 +0800 Subject: [PATCH 14/16] sysak.service: change MemoryLimit Signed-off-by: muya.zj --- rpm/sysak.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpm/sysak.service b/rpm/sysak.service index 4379ea4c..447ecd20 100644 --- a/rpm/sysak.service +++ b/rpm/sysak.service @@ -7,7 +7,7 @@ Restart=always RestartSec=10 CPUQuota=30% - MemoryLimit=60M + MemoryLimit=300M ExecStart=/usr/local/sysak/.sysak_components/tools/dist/app/beeQ/run.sh ExecStop=kill -9 $(pidof unity-mon) ExecReload=kill -1 $(pidof unity-mon) -- Gitee From e5345e5d0595afd305a97e8a0d761c14b8593fea Mon Sep 17 00:00:00 2001 From: = Date: Tue, 11 Jul 2023 16:33:11 +0800 Subject: [PATCH 15/16] unity: imc_latency rename macro --- .../unity/collector/plugin/imc_latency/imc_latency.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h b/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h index f42a0752..e3cc818b 100644 --- a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h +++ b/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h @@ -1,5 +1,5 @@ -#ifndef UNITY_SAMPLE_H -#define UNITY_SAMPLE_H +#ifndef UNITY_IMC_LATENCY_H +#define UNITY_IMC_LATENCY_H #include #include @@ -123,4 +123,4 @@ struct topology_ent { int64_t socket_id; }; -#endif // UNITY_SAMPLE_H +#endif -- Gitee From 697c1b8fa57a200d1c4098222eeeebb605454523 Mon Sep 17 00:00:00 2001 From: liaozhaoyan Date: Wed, 19 Jul 2023 23:25:11 +0800 Subject: [PATCH 16/16] change metrics description title, use sysom_ instead. --- source/tools/monitor/unity/etc/base.yaml | 70 +++++++++++------------ source/tools/monitor/unity/etc/group.yaml | 16 +++--- source/tools/monitor/unity/etc/k8s.yaml | 18 +++--- 3 files changed, 52 insertions(+), 52 deletions(-) diff --git a/source/tools/monitor/unity/etc/base.yaml b/source/tools/monitor/unity/etc/base.yaml index c784e642..9350c344 100644 --- a/source/tools/monitor/unity/etc/base.yaml +++ b/source/tools/monitor/unity/etc/base.yaml @@ -49,92 +49,92 @@ plugins: metrics: - - title: sysak_proc_cpu_total + title: sysom_proc_cpu_total from: cpu_total head: mode help: "cpu usage info for total." type: "gauge" - - title: sysak_proc_cpus + - title: sysom_proc_cpus from: cpus head: mode help: "cpu usage info for per-cpu." type: "gauge" - - title: sysak_proc_sirq + - title: sysom_proc_sirq from: sirq head: type help: "system soft irq times." type: "gauge" - - title: sysak_proc_stat_counters + - title: sysom_proc_stat_counters from: stat_counters head: counter help: "system state counter." type: "gauge" - - title: sysak_proc_meminfo + - title: sysom_proc_meminfo from: meminfo head: value help: "meminfo from /proc/meminfo." type: "gauge" - - title: sysak_proc_vmstat + - title: sysom_proc_vmstat from: vmstat head: value help: "vmstat info from /proc/vmstat." type: "gauge" - - title: sysak_proc_self_statm + - title: sysom_proc_self_statm from: self_statm head: value help: "statm info from /proc/self/statm." type: "gauge" - - title: sysak_proc_networks + - title: sysom_proc_networks from: networks head: counter help: "networks info from /proc/net/dev." type: "gauge" - - title: sysak_proc_disks + - title: sysom_proc_disks from: disks head: counter help: "disk info from /proc/diskstats." type: "gauge" - - title: sysak_proc_pkt_status + - title: sysom_proc_pkt_status from: pkt_status head: counter help: "net status info from /proc/net/snmp and /proc/net/status." type: "gauge" - - title: sysak_fs_stat + - title: sysom_fs_stat from: fs_stat head: counter help: "file system information." type: "gauge" - - title: sysak_sock_stat + - title: sysom_sock_stat from: sock_stat head: value help: "sock stat counters from /proc/net/sockstat" type: "gauge" - - title: sysak_proc_schedstat + - title: sysom_proc_schedstat from: proc_schedstat head: value help: "schedule state of percpu." type: "gauge" - - title: sysak_proc_loadavg + - title: sysom_proc_loadavg from: proc_loadavg head: value help: "loadavg of system from /proc/loadavg" type: "gauge" - - title: sysak_proc_buddyinfo + - title: sysom_proc_buddyinfo from: buddyinfo head: value help: "buddyinfo of system from /proc/buddyinfo" type: "gauge" - - title: sysak_IOMonIndForDisksIO + - title: sysom_IOMonIndForDisksIO from: IOMonIndForDisksIO head: value help: "Disk IO indicators and abnormal events" type: "gauge" - - title: sysak_IOMonIndForSystemIO + - title: sysom_IOMonIndForSystemIO from: IOMonIndForSystemIO head: value help: "System indicators and abnormal events about IO" type: "gauge" - - title: sysak_IOMonDiagLog + - title: sysom_IOMonDiagLog from: IOMonDiagLog head: value help: "Diagnose log for IO exception" @@ -144,87 +144,87 @@ metrics: head: value help: "nosched/irqoff:sys and irqoff hold cpu and didn't scheduling" type: "gauge" - - title: sysak_cpu_dist + - title: sysom_cpu_dist from: cpu_dist head: value help: "task cpu sched dist." type: "gauge" - - title: sysak_net_health_hist + - title: sysom_net_health_hist from: net_health_hist head: value help: "net_health_hist" type: "gauge" - - title: sysak_net_health_count + - title: sysom_net_health_count from: net_health_count head: value help: "net_health_count" type: "gauge" - - title: sysak_net_retrans_count + - title: sysom_net_retrans_count from: net_retrans_count head: value help: "net_retrans_count" type: "gauge" - - title: sysak_gpuinfo + - title: sysom_gpuinfo from: gpuinfo head: value help: "gpuinfo of system from nvidia-smi" type: "gauge" - - title: sysak_uname + - title: sysom_uname from: uname head: value help: "uname info" type: "gauge" - - title: sysak_uptime + - title: sysom_uptime from: uptime head: value help: "uptime from /proc/uptime" type: "gauge" - - title: sysak_system_release + - title: sysom_system_release from: system_release head: value help: "system_release from /etc/os-release" type: "gauge" - - title: sysak_cgroups + - title: sysom_cgroups from: cgroups head: value help: "cgroup number." type: "gauge" - - title: sysak_per_sirqs + - title: sysom_per_sirqs from: per_sirqs head: value help: "per_sirqs." type: "gauge" - - title: sysak_softnets + - title: sysom_softnets from: softnets head: value help: "cgroup number." type: "gauge" - - title: sysak_interrupts + - title: sysom_interrupts from: interrupts head: value help: "interrupts." type: "gauge" - - title: sysak_net_ip_count + - title: sysom_net_ip_count from: net_ip_count head: value help: "net snmp net_ip_count" type: "gauge" - - title: sysak_net_icmp_count + - title: sysom_net_icmp_count from: net_icmp_count head: value help: "net snmp net_icmp_count" type: "gauge" - - title: sysak_net_udp_count + - title: sysom_net_udp_count from: net_udp_count head: value help: "net snmp net_udp_count" type: "gauge" - - title: sysak_net_tcp_count + - title: sysom_net_tcp_count from: net_tcp_count head: value help: "net snmp net_tcp_count" type: "gauge" - - title: sysak_net_tcp_ext_count + - title: sysom_net_tcp_ext_count from: net_tcp_ext_count head: value help: "net stat net_tcp_ext_count" diff --git a/source/tools/monitor/unity/etc/group.yaml b/source/tools/monitor/unity/etc/group.yaml index 3208ecc2..6e3d8302 100644 --- a/source/tools/monitor/unity/etc/group.yaml +++ b/source/tools/monitor/unity/etc/group.yaml @@ -62,33 +62,33 @@ plugins: description: "summary retrans out put." metrics: - - title: sysak_proc_pkt_status + - title: sysom_proc_pkt_status from: pkt_status head: counter help: "net status info from /proc/net/snmp and /proc/net/status." type: "gauge" - - title: sysak_net_health_hist + - title: sysom_net_health_hist from: net_health_hist head: value help: "net_health_hist" type: "gauge" - - title: sysak_net_health_count + - title: sysom_net_health_count from: net_health_count head: value help: "net_health_count" type: "gauge" - - title: sysak_net_retrans_count + - title: sysom_net_retrans_count from: net_retrans_count head: value help: "net_retrans_count" type: "gauge" - - title: sysak_virtout_dist + - title: sysom_virtout_dist from: virtout_dist head: value - help: "sysak_virtout_dist" + help: "sysom_virtout_dist" type: "gauge" - - title: sysak_retrans + - title: sysom_retrans from: retrans head: value - help: "sysak_retrans" + help: "sysom_retrans" type: "gauge" \ No newline at end of file diff --git a/source/tools/monitor/unity/etc/k8s.yaml b/source/tools/monitor/unity/etc/k8s.yaml index c4f63b2e..bddd9864 100644 --- a/source/tools/monitor/unity/etc/k8s.yaml +++ b/source/tools/monitor/unity/etc/k8s.yaml @@ -281,7 +281,7 @@ metrics: head: value help: "sysom_cg_memory_util" type: "gauge" - - title: sysak_cg_mem_glob_drcm_latency + - title: sysom_cg_mem_glob_drcm_latency from: cgGlbDrcmLatency head: value help: "sysom global memory latency" @@ -296,22 +296,22 @@ metrics: head: value help: "sysom_cg_memmcmp_latency" type: "gauge" - - title: sysak_cg_mem_dcmp_latency + - title: sysom_cg_mem_dcmp_latency from: cg_memdcmp_latency head: value - help: "sysak_cg_mem_dcmp_latency" + help: "sysom_cg_mem_dcmp_latency" type: "gauge" - - title: sysak_cg_cpuacct_wait_latency + - title: sysom_cg_cpuacct_wait_latency from: cg_wait_latency head: value - help: "sysak_cg_cpuacct_wait_latency" + help: "sysom_cg_cpuacct_wait_latency" type: "gauge" - - title: sysak_cg_cpuacct_proc_stat + - title: sysom_cg_cpuacct_proc_stat from: cg_cpuacct_proc_stat head: value - help: "sysak_cg_cpuacct_proc_stat" + help: "sysom_cg_cpuacct_proc_stat" type: "gauge" - - title: sysak_cg_cpu_quota + - title: sysom_cg_cpu_quota from: cgCpuQuota head: value help: "quota_us,peroid_us and quota/period" @@ -326,7 +326,7 @@ metrics: head: value help: "cpuacct/cpuacct.stat" type: "gauge" - - title: sysak_cg_pmu_events + - title: sysom_cg_pmu_events from: pmu_cg_events head: value help: "pmu events of cgroups" -- Gitee