diff --git a/README.en.md b/README.en.md index 234f2cc0f154fa11903ef296e3c6f9f15bae4855..8ea06f35b15d7d0916b29ca5fef882fb93f3bcaa 100644 --- a/README.en.md +++ b/README.en.md @@ -127,82 +127,106 @@ Here are some examples: * Get pmu count for a process ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// Initialize event list and pid list in PmuAttr. -// There is one event in list, named 'cycles'. -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// Call PmuOpen and pmu descriptor is return. -// is an identity for current task. -int pd = PmuOpen(COUNTING, &attr); -// Start collection. -PmuEnable(pd); -// Collect for one second. -sleep(1); -// Stop collection. -PmuDisable(pd); -PmuData *data = NULL; -// Read pmu data. You can also read data before PmuDisable. -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // Initialize event list and pid list in PmuAttr. + // There is one event in list, named 'cycles'. + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // Call PmuOpen and pmu descriptor is return. + // is an identity for current task. + int pd = PmuOpen(COUNTING, &attr); + // Start collection. + PmuEnable(pd); + // Collect for one second. + sleep(1); + // Stop collection. + PmuDisable(pd); + PmuData *data = NULL; + // Read pmu data. You can also read data before PmuDisable. + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + PmuData *d = &data[i]; + std::cout << "evt=" << d->evt << "count=" << d->count << std::endl; + } + // To free PmuData, call PmuDataFree. + PmuDataFree(data); + // Like fd, call PmuClose if pd will not be used. + PmuClose(pd); } -// To free PmuData, call PmuDataFree. -PmuDataFree(data); -// Like fd, call PmuClose if pd will not be used. -PmuClose(pd); + ``` * Sample a process ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// Initialize event list and pid list in PmuAttr. -// There is one event in list, named 'cycles'. -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// Call PmuOpen and pmu descriptor is return. -// is an identity for current task. -// Use SAMPLING for sample task. -int pd = PmuOpen(SAMPLING, &attr); -// Start collection. -PmuEnable(pd); -// Collect for one second. -sleep(1); -// Stop collection. -PmuDisable(pd); -PmuData *data = NULL; -// Read pmu data. You can also read data before PmuDisable. -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - // Get an element from array. - PmuData *d = &data[i]; - // Get stack object which is a linked list. - Stack *stack = d->stack; - while (stack) { - // Get symbol object. - if (stack->symbol) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // Initialize event list and pid list in PmuAttr. + // There is one event in list, named 'cycles'. + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // Call PmuOpen and pmu descriptor is return. + // is an identity for current task. + // Use SAMPLING for sample task. + int pd = PmuOpen(SAMPLING, &attr); + // Start collection. + PmuEnable(pd); + // Collect for one second. + sleep(1); + // Stop collection. + PmuDisable(pd); + PmuData *data = NULL; + // Read pmu data. You can also read data before PmuDisable. + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + // Get an element from array. + PmuData *d = &data[i]; + // Get stack object which is a linked list. + Stack *stack = d->stack; + while (stack) { + // Get symbol object. + if (stack->symbol) { + Symbol *data = stack->symbol; + std::cout << std::hex << data->addr << " " << data->symbolName << "+0x" << data->offset << " " + << data->codeMapAddr << " (" << data->module << ")" + << " (" << std::dec << data->fileName << ":" << data->lineNum << ")" << std::endl; + } + stack = stack->next; } - stack = stack->next; } + // To free PmuData, call PmuDataFree. + PmuDataFree(data); + // Like fd, call PmuClose if pd will not be used. + PmuClose(pd); } -// To free PmuData, call PmuDataFree. -PmuDataFree(data); -// Like fd, call PmuClose if pd will not be used. -PmuClose(pd); + ``` * Python examples diff --git a/README.md b/README.md index 555c062d73d10d84407df11ba1493d38f3a03b23..5b4bfea1f44ce251deb7552d1b28773a3086eada 100644 --- a/README.md +++ b/README.md @@ -107,77 +107,101 @@ Go API文档可以参考GO_API.md: - 获取进程的pmu计数 ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// 初始化事件列表,指定需要计数的事件cycles。 -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// 调用PmuOpen,返回pd。pd表示该任务的id。 -int pd = PmuOpen(COUNTING, &attr); -// 开始采集。 -PmuEnable(pd); -// 采集1秒。 -sleep(1); -// 停止采集。 -PmuDisable(pd); -PmuData *data = NULL; -// 读取PmuData,它是一个数组,长度是len。 -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - PmuData *d = &data[i]; - ... +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pidList[1]; + pidList[0] = getpid(); + char *evtList[1]; + evtList[0] = "cycles"; + // 初始化事件列表,指定需要计数的事件cycles。 + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // 调用PmuOpen,返回pd。pd表示该任务的id。 + int pd = PmuOpen(COUNTING, &attr); + // 开始采集。 + PmuEnable(pd); + // 采集1秒。 + sleep(1); + // 停止采集。 + PmuDisable(pd); + PmuData *data = NULL; + // 读取PmuData,它是一个数组,长度是len。 + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + PmuData *d = &data[i]; + std::cout << "evt=" << d->evt << "count=" << d->count << std::endl; + } + // 释放PmuData。 + PmuDataFree(data); + // 类似fd,当任务结束时调用PmuClose释放资源。 + PmuClose(pd); } -// 释放PmuData。 -PmuDataFree(data); -// 类似fd,当任务结束时调用PmuClose释放资源。 -PmuClose(pd); ``` - 对进程进行采样 ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// 初始化事件列表,指定需要计数的事件cycles。 -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// 调用PmuOpen,返回pd。pd表示该任务的id。 -int pd = PmuOpen(SAMPLING, &attr); -// 开始采集。 -PmuEnable(pd); -// 采集1秒。 -sleep(1); -// 停止采集。 -PmuDisable(pd); -PmuData *data = NULL; -// 读取PmuData,它是一个数组,长度是len。 -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - // 获取数组的一个元素。 - PmuData *d = &data[i]; - // 获取调用栈对象,它是一个链表。 - Stack *stack = d->stack; - while (stack) { - // 获取符号对象。 - if (stack->symbol) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // 初始化事件列表,指定需要计数的事件cycles。 + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + attr.symbolMode = RESOLVE_ELF_DWARF; + attr.callStack = 1; + attr.freq = 200; + attr.useFreq = 1; + // 调用PmuOpen,返回pd。pd表示该任务的id。 + int pd = PmuOpen(SAMPLING, &attr); + // 开始采集。 + PmuEnable(pd); + // 采集1秒。 + sleep(1); + // 停止采集。 + PmuDisable(pd); + PmuData *data = NULL; + // 读取PmuData,它是一个数组,长度是len。 + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + // 获取数组的一个元素。 + PmuData *d = &data[i]; + // 获取调用栈对象,它是一个链表。 + Stack *stack = d->stack; + while (stack) { + // 获取符号对象。 + if (stack->symbol) { + Symbol *data = stack->symbol; + std::cout << std::hex << data->addr << " " << data->symbolName << "+0x" << data->offset << " " + << data->codeMapAddr << " (" << data->module << ")" + << " (" << std::dec << data->fileName << ":" << data->lineNum << ")" << std::endl; + + } + stack = stack->next; } - stack = stack->next; } + // 释放PmuData。 + PmuDataFree(data); + // 类似fd,当任务结束时调用PmuClose释放资源。 + PmuClose(pd); } -// 释放PmuData。 -PmuDataFree(data); -// 类似fd,当任务结束时调用PmuClose释放资源。 -PmuClose(pd); ``` - Python 例子 diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index fe89cc6beb4873e2d7c416197ec9bed3c17951e1..6c1d75e286a42ebd9eef42c21a1c5a4495f6834a 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -25,6 +25,7 @@ int pd = PmuOpen(COUNTING, &attr); # python代码示例 import time import kperf + evtList = ["cycles", "branch-misses"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) @@ -134,15 +135,38 @@ perf record -e cycles,branch-misses 设置PmuAttr的方式和Counting一样,在调用PmuOpen的时候,把任务类型设置为SAMPLING,并且设置采样频率: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +PmuAttr attr = {0}; +char* evtList[1] = {"cycles"}; attr.freq = 1000; // 采样频率是1000HZ attr.useFreq = 1; +attr.evtList = evtList; +attr.numEvt = 1; int pd = PmuOpen(SAMPLING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("cpu=%d pid=%d tid=%d period=%ld\n", data[i].cpu, data[i].pid, data[i].tid, data[i].period); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf +import ksym import time + evtList = ["branch-misses", "cycles"] pmu_attr = kperf.PmuAttr( evtList=evtList, @@ -150,6 +174,16 @@ pmu_attr = kperf.PmuAttr( symbolMode=kperf.SymbolMode.RESOLVE_ELF ) pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) +if pd == -1: + print(f"kperf pmuopen sample failed, expect err is nil, but is {kperf.error()}\n") +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) + +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"cpu {item.cpu} pid {item.pid} tid {item.tid} period {item.period}") +kperf.close(pd) ``` ```go @@ -165,6 +199,18 @@ func main() { fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v\n", err) return } + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + return + } + for _, o := range dataVo.GoData { + fmt.Printf("cpu=%d pid=%d tid=%d period=%v\n", o.Cpu, o.Pid, o.Tid, o.Period) + } + kperf.PmuClose(pd) } ``` @@ -197,23 +243,55 @@ perf record -e arm_spe_0/load_filter=1/ 对于libkperf,可以这样设置PmuAttr: ```c++ // c++代码示例 +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + PmuAttr attr = {0}; attr.period = 8192; // 采样周期是8192 attr.dataFilter = LOAD_FILTER; // 设置filter属性为load_filter + +int pd = PmuOpen(SPE_SAMPLING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + auto o = data[i]; + printf("spe base info comm=%s, pid=%d, tid=%d, coreId=%d, numaId=%d, sockedId=%d\n", o.comm, o.pid, o.tid, o.cpuTopo->coreId, o.cpuTopo->numaId, o.cpuTopo->socketId); + printf("spe ext info pa=%lu, va=%lu, event=%lu, latency=%lu\n", o.ext->pa, o.ext->va, o.ext->event, o.ext->lat); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf +import ksym +import time + pmu_attr = kperf.PmuAttr( - sampleRate = 1000, - symbolMode = kperf.SymbolMode.RESOLVE_ELF, - dataFilter = kperf.SpeFilter.SPE_DATA_ALL, - evFilter = kperf.SpeEventFilter.SPE_EVENT_RETIRED, - minLatency = 0x40 + sampleRate = 8192, + dataFilter = kperf.SpeFilter.LOAD_FILTER, ) # 需要root权限才能运行 pd = kperf.open(kperf.PmuTaskType.SPE_SAMPLING, pmu_attr) + +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) + +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"spe base info comm={item.comm}, pid={item.pid}, tid={item.tid}, coreId={item.cpuTopo.coreId}, numaId={item.cpuTopo.numaId}, sockedId={item.cpuTopo.socketId}") + print(f"spe ext info pa={item.ext.pa}, va={item.ext.va}, event={item.ext.event}, latency={item.ext.lat}\n") +kperf.close(pd) ``` ```go @@ -222,12 +300,28 @@ import "libkperf/kperf" import "time" func main() { - attr := kperf.PmuAttr{MinLatency:0x40, SymbolMode: kperf.ELF, SampleRate: 1000, DataFilter: kperf.SPE_DATA_ALL, EvFilter: kperf.SPE_EVENT_RETIRED} + attr := kperf.PmuAttr{SampleRate:8192, DataFilter: kperf.LOAD_FILTER} pd, err := kperf.PmuOpen(kperf.SPE, attr) if err != nil { fmt.Printf("kperf pmuopen spe failed, expect err is nil, but is %v\n", err) return } + + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + } + + for _, o := range dataVo.GoData { + fmt.Printf("spe base info comm=%v, pid=%v, tid=%v, coreId=%v, numaId=%v, sockedId=%v\n", o.Comm, o.Pid, o.Tid, o.CpuTopo.CoreId, o.CpuTopo.NumaId, o.CpuTopo.SocketId) + fmt.Printf("spe ext info pa=%v, va=%v, event=%v, latency=%v\n", o.SpeExt.Pa, o.SpeExt.Va, o.SpeExt.Event, o.SpeExt.Lat) + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(pd) } ``` @@ -311,13 +405,35 @@ PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = 1; int pd = PmuOpen(COUNTING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("evt=%s, count=%d\n", data[i].evt, data[i].count); +} +PmuClose(pd); ``` + ```python # python代码示例 import kperf +import time + evtList = ["hisi_sccl1_ddrc0/flux_rd/"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"evt={item.evt} count={item.count}") +kperf.close(pd) ``` ```go @@ -334,6 +450,18 @@ func main() { fmt.Printf("kperf pmuopen counting failed, expect err is nil, but is %v\n", err) return } + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + return + } + for _, o := range dataVo.GoData { + fmt.Printf("evt=%v count=%v \n", o.Evt, o.Count) + } + kperf.PmuClose(pd) } ``` @@ -350,7 +478,7 @@ evtList[0] = "hisi_sccl1_ddrc/flux_rd/"; evtList = ["hisi_sccl1_ddrc/flux_rd/"] ``` -```go +```goa // go代码示例 evtList := []string{"hisi_sccl1_ddrc/flux_rd/"} ``` @@ -380,16 +508,27 @@ libkperf支持tracepoint的采集,支持的tracepoint事件可以通过perf li 可以这样设置PmuAttr: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + char *evtList[1]; evtList[0] = "sched:sched_switch"; PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = 1; +attr.period = 1000; int pd = PmuOpen(SAMPLING, &attr); ``` ```python # python代码示例 +import kperf +import ksym +import time +from ctypes import * + evtList = ["sched:sched_switch"] pmu_attr = kperf.PmuAttr( evtList=evtList, @@ -404,7 +543,6 @@ pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) import "libkperf/kperf" import "fmt" - func main() { evtList := []string{"sched:sched_switch"} attr := kperf.PmuAttr{EvtList:evtList, SymbolMode:kperf.ELF, SampleRate: 1000} @@ -424,10 +562,24 @@ tracepoint能够获取每个事件特有的数据,比如sched:sched_switch包 libkperf提供了接口PmuGetField来获取tracepoint的数据。比如对于sched:sched_switch,可以这样调用: ```c++ // c++代码示例 -int prev_pid; -PmuGetField(pmuData->rawData, "prev_pid", &prev_pid, sizeof(prev_pid)); -char next_comm[16]; -PmuGetField(pmuData->rawData, "next_comm", &next_comm, sizeof(next_comm)); +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + auto pmuData = &data[i]; + int prev_pid; + PmuGetField(pmuData->rawData, "prev_pid", &prev_pid, sizeof(prev_pid)); + char next_comm[16]; + PmuGetField(pmuData->rawData, "next_comm", &next_comm, sizeof(next_comm)); + printf("next_comm=%s;prev_pid=%d\n", next_comm, prev_pid); +} ``` ```python @@ -480,10 +632,10 @@ func main() { var cArray [15]C.char nextErr := v.GetField("next_comm", unsafe.Pointer(&cArray)) if nextErr != nil { - fmt.Printf("get next_comm failed err is%v ",nextErr) + fmt.Printf("get next_comm failed err is%v\n",nextErr) } else { ptr := (*C.char)(unsafe.Pointer(&cArray[0])) - fmt.Printf("next_comm=%v;", C.GoString(ptr)) + fmt.Printf("next_comm=%v\n", C.GoString(ptr)) } prevPid := C.int(0) @@ -509,6 +661,11 @@ perf stat -e "{cycles,branch-loads,branch-load-misses,iTLB-loads}",inst_retired 比如,可以这样调用: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + unsigned numEvt = 5; char *evtList[numEvt] = {"cycles","branch-loads","branch-load-misses","iTLB-loads","inst_retired"}; // 前四个事件是一个分组 @@ -517,12 +674,27 @@ PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = numEvt; attr.evtAttr = groupId; + +int pd = PmuOpen(COUNTING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("evt=%s, count=%d evt=%d\n", data[i].evt, data[i].count, data[i].evt); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf import time + evtList = ["cycles","branch-loads","branch-load-misses","iTLB-loads","inst_retired"] # 前四个事件是一个分组 evtAttrList = [1,1,1,1,-1] @@ -535,6 +707,7 @@ pmu_data = kperf.read(pd) pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) for data in pmu_data.iter: print(f"cpu {data.cpu} count {data.count} evt {data.evt}") +kperf.close(pd) ``` ```go @@ -605,6 +778,10 @@ pmu_attr = kperf.PmuAttr(evtList=evtList, includeNewFork=True) 参考代码: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + PmuDeviceAttr devAttr[2]; // DDR读带宽 devAttr[0].metric = PMU_DDR_READ_BW; @@ -627,10 +804,10 @@ for (int i = 0; i < len / 2; ++i) { // channelID表示数据对应的通道ID。 // count是距离上次采集的DDR总读/写包长,单位是Byte, // 需要除以时间间隔得到带宽(这里的时间间隔是1秒)。 - cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; + std::cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } for (int i = len / 2; i < len; ++i) { - cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; + std::cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } DevDataFree(devData); PmuDataFree(oriData); @@ -639,6 +816,9 @@ PmuDisable(pd); ```python # python代码示例 +import kperf +import time + dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_READ_BW), kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW) @@ -658,6 +838,10 @@ for data in dev_data.iter: ```go // go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_DDR_READ_BW}, kperf.PmuDeviceAttr{Metric: kperf.PMU_DDR_WRITE_BW}} fd, _ := kperf.PmuDeviceOpen(deviceAttrs) kperf.PmuEnable(fd) @@ -705,6 +889,10 @@ libkperf提供了采集L3 cache平均时延的能力,用于分析访存型应 参考代码: ```c++ +#include +#include "symbol.h" +#include "pmu.h" + // c++代码示例 PmuDeviceAttr devAttr[1]; // L3平均时延 @@ -721,7 +909,7 @@ auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 1, &devData); // devData的长度等于cluster个数 for (int i=0;i +#include "symbol.h" +#include "pmu.h" + PmuDeviceAttr devAttr[1]; // 采集PCIE设备RX的读带宽 devAttr[0].metric = PMU_PCIE_RX_MRD_BW; @@ -802,6 +1001,9 @@ PmuDisable(pd); ```python # python代码示例 +import kperf +import time + dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_PCIE_RX_MRD_BW, bdf="16:04.0") ] @@ -817,6 +1019,10 @@ for data in dev_data.iter: ```go // go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_PCIE_RX_MRD_BW, Bdf: "16:04.0"}} fd, _ := kperf.PmuDeviceOpen(deviceAttrs) kperf.PmuEnable(fd) @@ -837,6 +1043,102 @@ kperf.PmuClose(fd) pcie bw(16:04.0): 124122412 Bytes/ns ``` +### 采集跨numa/跨socket访问HHA比例 +libkperf提供了采集跨numa/跨socket访问HHA的操作比例的能力,用于分析访存型应用的性能瓶颈,采集以numa为粒度。 + +参考代码: +```c++ +// c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + +PmuDeviceAttr devAttr[2]; +// 采集跨numa访问HHA的操作比例 +devAttr[0].metric = PMU_HHA_CROSS_NUMA; +// 采集跨socket访问HHA的操作比例 +devAttr[1].metric = PMU_HHA_CROSS_SOCKET; +// 初始化采集任务 +int pd = PmuDeviceOpen(devAttr, 2); +// 开始采集 +PmuEnable(pd); +sleep(1); +PmuData *oriData = nullptr; +int oriLen = PmuRead(pd, &oriData); +PmuDeviceData *devData = nullptr; +auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); +// devData的长度等于设备numa的个数 +for (int i = 0; i < len / 2; ++i) { + cout << "HHA cross-numa operations ratio (Numa: " << devData[i].numaId << "): " << devData[i].count<< "\n"; +} +for (int i = len / 2; i < len; ++i) { + cout << "HHA cross-socket operations ratio (Numa: " << devData[i].numaId << "): " << devData[i].count<< "\n"; +} +DevDataFree(devData); +PmuDataFree(oriData); +PmuDisable(pd); +``` + +```python +# python代码示例 +import kperf +import time + +dev_attr = [ + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET) +] +pd = kperf.device_open(dev_attr) +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) +ori_data = kperf.read(pd) +dev_data = kperf.get_device_metric(ori_data, dev_attr) +for data in dev_data.iter: + if data.metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA: + print(f"HHA cross-numa operations ratio (Numa: {data.numaId}): {data.count}") + if data.metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET: + print(f"HHA cross-socket operations ratio (Numa: {data.numaId}): {data.count}") +``` + +```go +// go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + +deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_HHA_CROSS_NUMA}, kperf.PmuDeviceAttr{Metric: kperf.PMU_HHA_CROSS_SOCKET}} +fd, _ := kperf.PmuDeviceOpen(deviceAttrs) +kperf.PmuEnable(fd) +time.Sleep(1 * time.Second) +kperf.PmuDisable(fd) +dataVo, _ := kperf.PmuRead(fd) +deivceDataVo, _ := kperf.PmuGetDevMetric(dataVo, deviceAttrs) +for _, v := range deivceDataVo.GoDeviceData { + if v.Metric == kperf.PMU_HHA_CROSS_NUMA { + fmt.Printf("HHA cross-numa operations ratio (Numa: %v): %v\n", v.NumaId, v.Count) + } + if v.Metric == kperf.PMU_HHA_CROSS_SOCKET { + fmt.Printf("HHA cross-socket operations ratio (Numa: %v): %v\n", v.NumaId, v.Count) + } +} +kperf.DevDataFree(deivceDataVo) +kperf.PmuDataFree(dataVo) +kperf.PmuClose(fd) +``` + +执行上述代码,输出的结果类似如下: +``` +HHA cross-numa operations ratio (Numa: 0): 0.438888 +HHA cross-numa operations ratio (Numa: 1): 0.0248052 +HHA cross-numa operations ratio (Numa: 2): 0.0277224 +HHA cross-numa operations ratio (Numa: 3): 0.181404 +HHA cross-socket operations ratio (Numa: 0): 0.999437 +HHA cross-socket operations ratio (Numa: 1): 0.0253748 +HHA cross-socket operations ratio (Numa: 2): 0.329864 +HHA cross-socket operations ratio (Numa: 3): 0.18956 +``` + ### 采集系统调用函数耗时信息 libkperf基于tracepoint事件采集能力,在原有能力的基础上,重新封装了一组相关的调用API,来提供采集系统调用函数耗时信息的能力,类似于perf trace命令 @@ -848,6 +1150,10 @@ perf trace -e read,write 比如,可以这样调用: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + unsigned numFunc = 2; const char *funs1 = "read"; const char *funs2 = "write"; @@ -862,7 +1168,7 @@ PmuTraceDisable(pd); PmuTraceData *data = nullptr; int len = PmuTraceRead(pd, &data); for(int i = 0; i < len; ++i) { - printf("funcName: %s, elspsedTime: %f ms pid: %d tid: %d cpu: %d comm: %s", data[i].funcs, data[i].elapsedTime, data[i].pid, data[i].tid, data[i].cpu, data[i].comm) + printf("funcName: %s, elapsedTime: %f ms pid: %d tid: %d cpu: %d comm: %s", data[i].funcs, data[i].elapsedTime, data[i].pid, data[i].tid, data[i].cpu, data[i].comm); } PmuTraceClose(pd); ``` @@ -871,6 +1177,7 @@ PmuTraceClose(pd); # python代码示例 import kperf import time + funcList = ["read","write"] pmu_trace_attr = kperf.PmuTraceAttr(funcs=funcList) pd = kperf.trace_open(kperf.PmuTraceType.TRACE_SYS_CALL, pmu_trace_attr) @@ -930,6 +1237,10 @@ funcName: write elapsedTime: 0.00118 ms pid: 997235 tid: 997235 cpu: 110 comm: t ### 采集BRBE数据 libkperf基于sampling的能力,增加了对branch sample stack数据的采集能力,用于获取CPU的跳转记录, 通过branchSampleFilter可指定获取不同类型的分支跳转记录。 ```c++ +#include +#include "symbol.h" +#include "pmu.h" + char* evtList[1] = {"cycles"}; int* cpuList = nullptr; PmuAttr attr = {0}; @@ -980,6 +1291,7 @@ ffff88f60aa0->ffff88f60618 1 ```python import time +import ksym import kperf evtList = ["cycles"] diff --git a/docs/Go_API.md b/docs/Go_API.md index 71991427844c6267e9ad5ad4ae1554cdbbdfa787..33d7a7cfaa71eba13f8bff14b7599319086aa6d5 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -300,7 +300,7 @@ import "fmt" func main() { syscallList := kperf.PmuSysCallFuncList() if syscallList == nil { - fmt.Printf("sys call list is empty") + fmt.Printf("sys call list is empty\n") } else { for _, funcName := range syscallList { fmt.Printf("func name %v\n", funcName) @@ -346,6 +346,8 @@ func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事 * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_SMMU_TRAN 采集指定smmu设备的地址转换次数,单位:count + * PMU_HHA_CROSS_NUMA 采集每个numa的跨numa访问HHA的操作比例 + * PMU_HHA_CROSS_SOCKET 采集每个numa的跨socket访问HHA的操作比例 * Bdf: 指定需要采集设备的bdf号,只对pcie和smmu指标有效 * 返回值是int和error,pd > 0表示初始化成功,pd == -1初始化失败,可通过kperf.error()查看错误信息,以下是一个kperf.device_open的示例 @@ -438,7 +440,8 @@ func main() { ``` -### kperf.PmuGetCpuFreq +### kperf.PmuGetCpuFreq + func PmuGetCpuFreq(core uint) (int64, error) 查询当前系统指定core的实时CPU频率 * core cpu coreId @@ -457,4 +460,80 @@ func main() { } fmt.Printf("coreId %v freq is %v\n", coreId, freq) } +``` + +### kperf.PmuOpenCpuFreqSampling + +func PmuOpenCpuFreqSampling(period uint) (error) 开启cpu频率采集 + +### kperf.PmuCloseCpuFreqSampling + +func PmuCloseCpuFreqSampling() 关闭cpu频率采集 + +### kperf.PmuReadCpuFreqDetail + +func PmuReadCpuFreqDetail() ([]PmuCpuFreqDetail) 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 +```go +import "libkperf/kperf" +import "fmt" + +func main() { + err := kperf.PmuOpenCpuFreqSampling(100) + if err != nil { + fmt.Printf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v\n", err) + } + + freqList := kperf.PmuReadCpuFreqDetail() + for _, v := range freqList { + fmt.Printf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d\n", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) + } + + kperf.PmuCloseCpuFreqSampling() +} +``` + +### kperf.ResolvePmuDataSymbol + +func ResolvePmuDataSymbol(dataVo PmuDataVo) error 当SymbolMode不设置或者设置为0时,可通过该接口解析PmuRead返回的PmuData数据中的符号 +```go +import "libkperf/kperf" +import "fmt" + +func main() { + attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} + fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) + if err != nil { + fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v\n", err) + return + } + + kperf.PmuEnable(fd) + time.Sleep(time.Second) + kperf.PmuDisable(fd) + + dataVo, err := kperf.PmuRead(fd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + return + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) != 0 { + fmt.Printf("expect symbol data is empty, but is not\n") + } + } + + parseErr := kperf.ResolvePmuDataSymbol(dataVo) + if parseErr != nil { + fmt.Printf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v\n", parseErr) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) == 0 { + fmt.Printf("expect symbol data is not empty, but is empty\n") + } + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(fd) +} ``` \ No newline at end of file diff --git a/docs/Python_API.md b/docs/Python_API.md index de323e92803839303f52b3a0be6bebc4ced7b6fe..ffebcb233e88243eef6d34d5e1ea3f5ffdaef5e9 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -82,8 +82,10 @@ kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) ```python # python代码示例 -import time import kperf +import ksym +import time + evtList = ["cycles", "branch-misses"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) @@ -196,6 +198,7 @@ get_field(pmu_data: ImplPmuData, field_name: str, value: c_void_p) ```python import kperf +import ksym import time from ctypes import * @@ -272,8 +275,9 @@ kperf.trace_open(trace_type: kperf.PmuTraceType, pmu_trace_attr: kperf.PmuTraceA ```python # python代码示例 -import time import kperf +import time + funcs = ["read", "write"] pmu_trace_attr = kperf.PmuTraceAttr(funcs=funcs) pd = kperf.trace_open(kperf.PmuTraceType.TRACE_SYS_CALL, pmu_trace_attr) @@ -335,17 +339,21 @@ kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指 * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count - * PMU_L3_LAT 采集每个numa的L3的总时延,单位:cycles + * PMU_L3_LAT 采集每个cluster的L3的总时延,单位:cycles * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/ns * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_SMMU_TRAN 采集指定smmu设备的地址转换次数,单位:count + * PMU_HHA_CROSS_NUMA 采集每个numa的跨numa访问HHA的操作比例 + * PMU_HHA_CROSS_SOCKET 采集每个numa的跨socket访问HHA的操作比例 * bdf: 指定需要采集设备的bdf号,只对pcie和smmu指标有效 * 返回值是int类型,pd > 0表示初始化成功,pd == -1初始化失败,可通过kperf.error()查看错误信息,以下是一个kperf.device_open的示例 ```python # python代码示例 +import kperf +import time dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_L3_TRAFFIC) ] @@ -428,4 +436,61 @@ kperf.get_numa_core(numaId: int): 查询指定numaId下对应的core列表 # python代码示例 numaId = 1 numa_cores = kperf.get_numa_core(numaId) +``` + +### kperf.open_cpu_freq_sampling + +def open_cpu_freq_sampling(period: int) 开启cpu频率采集 + +### kperf.close_cpu_freq_sampling + +def close_cpu_freq_sampling() 关闭cpu频率采集 + +### kperf.read_cpu_freq_detail + +def read_cpu_freq_detail() -> CpuFreqDetail 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 +```python +#python代码示例 +import kperf +import time + +err = kperf.open_cpu_freq_sampling(100) +if err != 0: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +dataList = kperf.read_cpu_freq_detail() +for item in dataList.iter: + print(f"cpuId={item.cpuId} minFreq={item.minFreq} maxFreq={item.maxFreq} avgFreq={item.avgFreq}") + +kperf.close_cpu_freq_sampling() +``` + +### kperf.resolvePmuDataSymbol + +def resolvePmuDataSymbol(pmuData: PmuData) -> int: 当SymbolMode不设置或者设置为0时,可通过该接口解析read返回的PmuData数据中的符号 +```python +#python代码示例 +import kperf +import time + +event_name = "cycles" +pmu_attr = kperf.PmuAttr( + evtList=[event_name], + sampleRate=1000, + callStack=True, + useFreq=True, +) +fd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) +if fd == -1: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +kperf.enable(fd) +time.sleep(1) +kperf.disable(fd) +pmu_data = kperf.read(fd) +err = kperf.resolvePmuDataSymbol(pmu_data) +if err != 0: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +kperf.close(fd) ``` \ No newline at end of file diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 99fb52dfd420b8202f8ea0c23c1315c0f6adba6c..9d84290df8fd89ad64121caa865cf62ed142790d 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -245,12 +245,12 @@ var ( // PmuDeviceMetric var ( - // Pernuma metric. - // Collect ddr read bandwidth for each numa node. + // Perchannel metric. + // Collect ddr read bandwidth for each channel. // Unit: Bytes/s PMU_DDR_READ_BW C.enum_PmuDeviceMetric = C.PMU_DDR_READ_BW - // Pernuma metric. - // Collect ddr write bandwidth for each numa node. + // Perchannel metric. + // Collect ddr write bandwidth for each channel. // Unit: Bytes/s PMU_DDR_WRITE_BW C.enum_PmuDeviceMetric = C.PMU_DDR_WRITE_BW // Percore metric. @@ -265,8 +265,8 @@ var ( // Collect L3 total reference count, including miss and hit count. // Unit: count PMU_L3_REF C.enum_PmuDeviceMetric = C.PMU_L3_REF - // Pernuma metric. - // Collect L3 total latency for each numa node. + // Percluster metric. + // Collect L3 total latency for each cluster node. // Unit: cycles PMU_L3_LAT C.enum_PmuDeviceMetric = C.PMU_L3_LAT // Collect pcie rx bandwidth. @@ -284,6 +284,12 @@ var ( // Collect smmu address transaction. // Unit: count PMU_SMMU_TRAN C.enum_PmuDeviceMetric = C.PMU_SMMU_TRAN + // Pernuma metric. + // Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA C.enum_PmuDeviceMetric = C.PMU_HHA_CROSS_NUMA + // Pernuma metric. + // Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET C.enum_PmuDeviceMetric = C.PMU_HHA_CROSS_SOCKET ) // PmuBdfType @@ -728,6 +734,31 @@ func PmuDumpData(dataVo PmuDataVo, filePath string, dumpDwf bool) error { } return nil } + +// When symbol mode is SNO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function +// param PmuDataVo the data from PmuRead +// return nil indicates resolve success, otherwise return error code +func ResolvePmuDataSymbol(dataVo PmuDataVo) error { + err := C.ResolvePmuDataSymbol(dataVo.cData) + if int(err) != 0 { + return errors.New(C.GoString(C.Perror())) + } + dataLen := len(dataVo.GoData) + ptr := unsafe.Pointer(dataVo.cData) + slice := reflect.SliceHeader { + Data: uintptr(ptr), + Len: dataLen, + Cap: dataLen, + } + cPmuDatas := *(*[]C.struct_PmuData)(unsafe.Pointer(&slice)) + for i := 0; i < dataLen; i++ { + dataObj := cPmuDatas[i] + if dataObj.stack != nil { + dataVo.GoData[i].appendSymbols(dataObj) + } + } + return nil +} // Initialize the trace collection target // On success, a trace collect task id is returned which is the unique identity for the task diff --git a/go/src/libkperf_test/libkperf_test.go b/go/src/libkperf_test/libkperf_test.go index 28848fe7ad2194045f04f95136127427eedad6e1..d06683a8fa91a8e376f034d23b211ced03b4063a 100644 --- a/go/src/libkperf_test/libkperf_test.go +++ b/go/src/libkperf_test/libkperf_test.go @@ -287,3 +287,39 @@ func TestPmuGetCpuFreqDetail(t *testing.T) { kperf.PmuCloseCpuFreqSampling() } + +func TestResolvePmuDataSymbol(t *testing.T) { + attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} + fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) + if err != nil { + t.Fatalf("kperf pmuopen sample failed, expect err is nil, but is %v", err) + } + + kperf.PmuEnable(fd) + time.Sleep(time.Second) + kperf.PmuDisable(fd) + + dataVo, err := kperf.PmuRead(fd) + if err != nil { + t.Fatalf("kperf pmuread failed, expect err is nil, but is %v", err) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) != 0 { + t.Fatalf("expect symbol data is empty, but is not") + } + } + + parseErr := kperf.ResolvePmuDataSymbol(dataVo) + if parseErr != nil { + t.Fatalf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v", parseErr) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) == 0 { + t.Fatalf("expect symbol data is not empty, but is empty") + } + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(fd) +} \ No newline at end of file diff --git a/include/pcerrc.h b/include/pcerrc.h index 259befacddaf35162596ff0f2f0a31bb0dcede1e..909ae4e9d8529d50677965d2f09e08a85f1432df 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -109,6 +109,7 @@ extern "C" { #define LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 1065 #define LIBPERF_ERR_NOT_SUPPORT_METRIC 1066 #define LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD 1067 +#define LIBPERF_ERR_PMU_DATA_NO_FOUND 1068 #define UNKNOWN_ERROR 9999 diff --git a/include/pmu.h b/include/pmu.h index 5ec17265a809d0a122bb24d48e6d7f60331a143d..3d9d68487a20f3c805a7cade1525889982470e70 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -352,6 +352,14 @@ void PmuStop(int pd); */ int PmuRead(int pd, struct PmuData** pmuData); +/** +* @brief +* When symbol mode is NO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function +* @param pmuData the data from PmuRead +* @return 0 indicates resolve success, otherwise return error code +*/ +int ResolvePmuDataSymbol(struct PmuData* pmuData); + /** * @brief * Append data list to another data list <*toData>. @@ -411,12 +419,12 @@ int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *valu struct SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char *fieldName); enum PmuDeviceMetric { - // Pernuma metric. - // Collect ddr read bandwidth for each numa node. + // Perchannel metric. + // Collect ddr read bandwidth for each channel. // Unit: Bytes PMU_DDR_READ_BW, - // Pernuma metric. - // Collect ddr write bandwidth for each numa node. + // Perchannel metric. + // Collect ddr write bandwidth for each channel. // Unit: Bytes PMU_DDR_WRITE_BW, // Percore metric. @@ -449,7 +457,13 @@ enum PmuDeviceMetric { // Perpcie metric. // Collect smmu address transaction. // Unit: count - PMU_SMMU_TRAN + PMU_SMMU_TRAN, + // Pernuma metric. + // Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA, + // Pernuma metric. + // Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET }; struct PmuDeviceAttr { diff --git a/pmu/CMakeLists.txt b/pmu/CMakeLists.txt index c68bfe0d51db80c280cebbf74fb3b5efa980511b..4af6e76f9cdfc75afd4cdf8acaa2e4cd5b45ce64 100644 --- a/pmu/CMakeLists.txt +++ b/pmu/CMakeLists.txt @@ -31,8 +31,11 @@ include_directories(${SYMBOL_FILE_DIR}) include_directories(${PMU_DECODER_DIR}) ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC}) +ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC}) +set_target_properties(kperf_static PROPERTIES OUTPUT_NAME "kperf") target_link_libraries(kperf numa sym) target_compile_options(kperf PRIVATE -fPIC) install(TARGETS kperf DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) +install(TARGETS kperf_static DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) file(GLOB HEADER_FILES ${PROJECT_TOP_DIR}/include/*.h) install(FILES ${HEADER_FILES} DESTINATION ${CMAKE_INSTALL_PREFIX}/include) diff --git a/pmu/dummy_event.cpp b/pmu/dummy_event.cpp index 723a753342d3ecdbf61a70eeca44cad37b866d6e..9137db89afa8c9af634139b94b5da5c2b357be97 100644 --- a/pmu/dummy_event.cpp +++ b/pmu/dummy_event.cpp @@ -76,6 +76,7 @@ namespace KUNPENG_PMU { if (forkPidQueue.empty()) { continue; } + std::lock_guard lg(dummyMutex); auto& pid = forkPidQueue.front(); for (const auto& evtList: evtLists) { auto groupId = evtList->GetGroupId(); @@ -83,7 +84,6 @@ namespace KUNPENG_PMU { DummyContext ctx = {evtList, static_cast(pid), evtGroupInfo.first, evtGroupInfo.second}; forkStrategy.DoHandler(ctx, evtGroupInfo.first, evtGroupInfo.second); } - std::lock_guard lg(dummyMutex); forkPidQueue.pop(); } }); @@ -146,7 +146,9 @@ namespace KUNPENG_PMU { if (header->type == PERF_RECORD_FORK) { auto sample = (KUNPENG_PMU::PerfRecordFork*) header; std::lock_guard lg(dummyMutex); - forkPidQueue.push(sample->tid); + if((uint8_t*)page + MAP_LEN > ringBuf + off + sizeof(KUNPENG_PMU::PerfRecordFork)) { + forkPidQueue.push(sample->tid); + } } if (header->type == PERF_RECORD_EXIT) { auto sample = (KUNPENG_PMU::PerfRecordFork*) header; diff --git a/pmu/dummy_event.h b/pmu/dummy_event.h index 04686773d07c35f6f3015b980032163b596fe712..dc25970c37ea255644e1790d94a279d7c0313e16 100644 --- a/pmu/dummy_event.h +++ b/pmu/dummy_event.h @@ -65,7 +65,7 @@ namespace KUNPENG_PMU { std::thread dummyThread; std::thread consumeThread; - std::atomic dummyFlag; + volatile std::atomic dummyFlag; std::vector>& evtLists; std::vector ppids; diff --git a/pmu/evt_list.cpp b/pmu/evt_list.cpp index 2af3e2f44451fc93e716e6f4e8326bd2c3c7c908..73ca53a2a8d28194b337734526b6368555ce5c6a 100644 --- a/pmu/evt_list.cpp +++ b/pmu/evt_list.cpp @@ -97,10 +97,10 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrIsMainPid()) { continue; } + if (err == LIBPERF_ERR_INVALID_EVENT) { if (branchSampleFilter != KPERF_NO_BRANCH_SAMPLE) { pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", PMU Hardware or event type doesn't support branch stack sampling"); @@ -108,6 +108,16 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrGetEvtName() + ", " + std::string{strerror(errno)}); } } + + if (err == LIBPERF_ERR_NO_PERMISSION) { + pcerr::SetCustomErr(LIBPERF_ERR_NO_PERMISSION, "Current user does not have the permission to collect the event." + "Swtich to the root user and run the 'echo -1 > /proc/sys/kernel/perf_event_paranoid'"); + } + + if (err == UNKNOWN_ERROR) { + pcerr::SetCustomErr(err, std::string{strerror(errno)}); + } + return err; } fdList.insert(perfEvt->GetFd()); @@ -171,6 +181,9 @@ void KUNPENG_PMU::EvtList::FillFields( int KUNPENG_PMU::EvtList::Read(vector& data, std::vector& sampleIps, std::vector& extPool, std::vector& switchData) { + + std::unique_lock lg(mutex); + for (unsigned int row = 0; row < numCpu; row++) { for (unsigned int col = 0; col < numPid; col++) { int err = this->xyCounterArray[row][col]->BeginRead(); @@ -240,13 +253,15 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons return; } std::unique_lock lock(mutex); + this->pidList.emplace_back(shared_ptr(topology, FreeProcTopo)); + bool hasInitErr = false; + std::map perfEvtMap; for (unsigned int row = 0; row < numCpu; row++) { - this->pidList.emplace_back(shared_ptr(topology, FreeProcTopo)); - procMap[pid] = this->pidList.back(); PerfEvtPtr perfEvt = this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList.back()->tid, this->pmuEvt.get()); if (perfEvt == nullptr) { - return; + hasInitErr = true; + break; } perfEvt->SetSymbolMode(symMode); perfEvt->SetBranchSampleFilter(branchSampleFilter); @@ -258,22 +273,36 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons err = perfEvt->Init(groupEnable, -1, -1); } if (err != SUCCESS) { - return; + hasInitErr = true; + break; } - fdList.insert(perfEvt->GetFd()); + perfEvtMap.emplace(row, perfEvt); + } + + if (!hasInitErr) { + procMap[pid] = this->pidList.back(); numPid++; - this->xyCounterArray[row].emplace_back(perfEvt); - /** - * If the current status is enable, start, read, other existing perfEvt may have been enabled and is counting, - * so the new perfEvt must also be added to enable. If the current status is read, the status of all perfEvt - * may be disable. At this time No need to collect counts. - */ - if (evtStat == ENABLE || evtStat == START) { - perfEvt->Enable(); + for (unsigned int row = 0; row < numCpu; row++) { + auto perfEvt = perfEvtMap[row]; + fdList.insert(perfEvt->GetFd()); + this->xyCounterArray[row].emplace_back(perfEvt); + /** + * If the current status is enable, start, read, other existing perfEvt may have been enabled and is counting, + * so the new perfEvt must also be added to enable. If the current status is read, the status of all perfEvt + * may be disable. At this time No need to collect counts. + */ + if (evtStat == ENABLE || evtStat == START) { + perfEvt->Enable(); + } + if (evtStat == READ && prevStat != DISABLE) { + perfEvt->Enable(); + } } - if (evtStat == READ && prevStat != DISABLE) { - perfEvt->Enable(); + } else { + for (const auto& evtPtr : perfEvtMap) { + close(evtPtr.second->GetFd()); } + this->pidList.erase(this->pidList.end() - 1); } } @@ -297,7 +326,7 @@ void KUNPENG_PMU::EvtList::ClearExitFd() int pid = it->get()->GetPid(); if (exitPidVet.find(pid) != exitPidVet.end()) { int fd = it->get()->GetFd(); - this->fdList.erase(fd); + this->fdList.erase(this->fdList.find(fd)); close(fd); it = perfVet.erase(it); continue; diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 1fd80eaffe8441a4bc91de391a70a0c1764702df..4ffd5d72e510bdd779303808826ed1042cdd0ce0 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -427,6 +427,7 @@ static void PmuTaskAttrFree(PmuTaskAttr *taskAttr) int PmuOpen(enum PmuTaskType collectType, struct PmuAttr *attr) { SetWarn(SUCCESS); + New(SUCCESS); PmuAttr copiedAttr = *attr; pair previousEventList = {0, nullptr}; try { @@ -769,6 +770,11 @@ int PmuRead(int pd, struct PmuData** pmuData) } } +int ResolvePmuDataSymbol(struct PmuData* pmuData) +{ + return PmuList::GetInstance()->ResolvePmuDataSymbol(pmuData); +} + void PmuClose(int pd) { SetWarn(SUCCESS); @@ -846,6 +852,9 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att } else { pmuEvt = GetPmuEvent(evtName, collectType); if (pmuEvt == nullptr) { + if (Perrorno() != SUCCESS) { + return nullptr; + } #ifdef IS_X86 New(LIBPERF_ERR_INVALID_EVENT, "Invalid event: " + string(evtName) + ";x86 just supports core event and raw event"); #else diff --git a/pmu/pmu_event_list.cpp b/pmu/pmu_event_list.cpp index ab83ffdd1bd9afe1cea7cc4fcc968bdbbfde82a3..547ccb2dbb735453701a8c15f66cb2d3c089a6d6 100644 --- a/pmu/pmu_event_list.cpp +++ b/pmu/pmu_event_list.cpp @@ -101,24 +101,6 @@ static void GetTraceSubFolder(const std::string& traceFolder, const string& devN closedir(dir); } -static bool PerfEventSupported(__u64 type, __u64 config) -{ - perf_event_attr attr{}; - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(struct perf_event_attr); - attr.type = type; - attr.config = config; - attr.disabled = 1; - attr.inherit = 1; - attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; - int fd = KUNPENG_PMU::PerfEventOpen(&attr, -1, 0, -1, 0); - if (fd < 0) { - return false; - } - close(fd); - return true; -} - const char** QueryCoreEvent(unsigned *numEvt) { if (!coreEventList.empty()) { @@ -128,9 +110,6 @@ const char** QueryCoreEvent(unsigned *numEvt) auto coreEventMap = KUNPENG_PMU::CORE_EVENT_MAP.at(GetCpuType()); for (auto& pair : coreEventMap) { auto eventName = pair.first; - if (!PerfEventSupported(pair.second.type, pair.second.config)) { - continue; - } char* eventNameCopy = new char[eventName.length() + 1]; strcpy(eventNameCopy, eventName.c_str()); coreEventList.emplace_back(eventNameCopy); @@ -203,6 +182,11 @@ const char** QueryTraceEvent(unsigned *numEvt) struct dirent *entry; const string &traceFolder = GetTraceEventDir(); if (traceFolder.empty()) { + if (errno == EACCES) { + New(LIBPERF_ERR_NO_PERMISSION, "no permission to access '/sys/kernel/tracing/events/' or '/sys/kernel/debug/tracing/events/'"); + } else { + New(LIBPERF_ERR_INVALID_EVENT, "can't find '/sys/kernel/tracing/events/' or '/sys/kernel/debug/tracing/events/'"); + } return traceEventList.data(); } DIR *dir = opendir(traceFolder.c_str()); @@ -282,7 +266,6 @@ const char** PmuEventList(enum PmuEventType eventType, unsigned *numEvt) New(LIBPERF_ERR_QUERY_EVENT_LIST_FAILED, "Query event failed."); return nullptr; } - New(SUCCESS); return eventList; } diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index c0470c0b366dc24933b29e6b8acadd9907ba65f9..d82e33e099c5e669f9eccf3f4c9a121b98e7ceac 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -413,12 +413,14 @@ namespace KUNPENG_PMU { } auto findToData = userDataList.find(*toData); - if (findFromData == userDataList.end()) { + if (findToData == userDataList.end()) { return LIBPERF_ERR_INVALID_PMU_DATA; } // For non-null target data list, append source list to end of target vector. auto& dataVec = findToData->second.data; + auto& ipsVec = findToData->second.sampleIps; dataVec.insert(dataVec.end(), findFromData->second.data.begin(), findFromData->second.data.end()); + ipsVec.insert(ipsVec.end(), findFromData->second.sampleIps.begin(), findFromData->second.sampleIps.end()); len = dataVec.size(); if (*toData != dataVec.data()) { @@ -442,6 +444,7 @@ namespace KUNPENG_PMU { void PmuList::Close(const int pd) { + EraseDummyEvent(pd); auto evtList = GetEvtList(pd); for (auto item: evtList) { item->Close(); @@ -453,7 +456,6 @@ namespace KUNPENG_PMU { EraseDataEvtGroupList(pd); RemoveEpollFd(pd); EraseSpeCpu(pd); - EraseDummyEvent(pd); EraseParentEventMap(pd); SymResolverDestroy(); PmuEventListFree(); @@ -625,9 +627,6 @@ namespace KUNPENG_PMU { void PmuList::FillStackInfo(EventData& eventData) { auto symMode = symModeList[eventData.pd]; - if (symMode == NO_SYMBOL_RESOLVE) { - return; - } // Parse dwarf and elf info of each pid and get stack trace for each pmu data. for (size_t i = 0; i < eventData.data.size(); ++i) { auto& pmuData = eventData.data[i]; @@ -636,13 +635,48 @@ namespace KUNPENG_PMU { SymResolverRecordModuleNoDwarf(pmuData.pid); } else if (symMode == RESOLVE_ELF_DWARF) { SymResolverRecordModule(pmuData.pid); + } else if (symMode == NO_SYMBOL_RESOLVE) { + SymResolverRecordModule(pmuData.pid); + continue; } else { continue; } + if (pmuData.stack == nullptr) { pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); } } + //Exceptions generated by the symbol interface are not directly exposed and are processed as warnings. + int err = Perrorno(); + if (err < LIBPERF_ERR_NO_AVAIL_PD && err >= LIBSYM_ERR_BASE) { + pcerr::SetWarn(err, Perror()); + New(SUCCESS); + } + } + + int PmuList::ResolvePmuDataSymbol(struct PmuData* iPmuData) + { + if (iPmuData == nullptr) { + New(LIBPERF_ERR_INVALID_PMU_DATA, "ipmuData is nullptr"); + return LIBPERF_ERR_INVALID_PMU_DATA; + } + auto userData = userDataList.find(iPmuData); + if (userData == userDataList.end()) { + New(LIBPERF_ERR_PMU_DATA_NO_FOUND, "ipmuData isn't in userDataList"); + return LIBPERF_ERR_PMU_DATA_NO_FOUND; + } + + auto& eventData = userDataList[iPmuData]; + auto symMode = symModeList[eventData.pd]; + for (size_t i = 0; i < eventData.data.size(); ++i) { + auto& pmuData = eventData.data[i]; + auto& ipsData = eventData.sampleIps[i]; + if (pmuData.stack == nullptr) { + pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); + } + } + New(SUCCESS); + return SUCCESS; } void PmuList::AggregateData(const std::vector& evData, std::vector& newEvData) @@ -1025,9 +1059,6 @@ namespace KUNPENG_PMU { int PmuList::InitSymbolRecordModule(const unsigned pd, PmuTaskAttr* taskParam) { SymbolMode symMode = GetSymbolMode(pd); - if (symMode == NO_SYMBOL_RESOLVE) { - return SUCCESS; - } if (taskParam->pmuEvt->collectType == COUNTING) { return SUCCESS; @@ -1053,7 +1084,7 @@ namespace KUNPENG_PMU { } } - if (this->symModeList[pd] == RESOLVE_ELF_DWARF) { + if (this->symModeList[pd] == RESOLVE_ELF_DWARF || this->symModeList[pd] == NO_SYMBOL_RESOLVE) { for (const auto& pid: pidList) { int rt = SymResolverRecordModule(pid); if (rt != SUCCESS) { diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h index b44ff55e50927db9c8ca322078a2fc5fd90ee4ee..523e2ad96b18c34f34fa99665897aba9054ff56c 100644 --- a/pmu/pmu_list.h +++ b/pmu/pmu_list.h @@ -76,6 +76,7 @@ public: void StoreSplitData(unsigned pd, std::pair& previousEventList, std::unordered_map& eventSplitMap); bool IsAllPidExit(const unsigned pd); + int ResolvePmuDataSymbol(struct PmuData* iPmuData); private: using ProcPtr = std::shared_ptr; diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 8fc5d12c6bbeb6a6e31c4be182ca210021a77a9e..1fe04d7ee57576f730022bf1caabfa5719aaf5d8 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -38,6 +38,7 @@ using namespace std; using namespace pcerr; +using IdxMap = unordered_map>; static unsigned maxCpuNum = 0; static vector coreArray; @@ -83,11 +84,13 @@ namespace KUNPENG_PMU { {PmuDeviceMetric::PMU_PCIE_RX_MWR_BW, "PMU_PCIE_RX_MWR_BW"}, {PmuDeviceMetric::PMU_PCIE_TX_MRD_BW, "PMU_PCIE_TX_MRD_BW"}, {PmuDeviceMetric::PMU_PCIE_TX_MWR_BW, "PMU_PCIE_TX_MWR_BW"}, - {PmuDeviceMetric::PMU_SMMU_TRAN, "PMU_SMMU_TRAN"} + {PmuDeviceMetric::PMU_SMMU_TRAN, "PMU_SMMU_TRAN"}, + {PmuDeviceMetric::PMU_HHA_CROSS_NUMA, "PMU_HHA_CROSS_NUMA"}, + {PmuDeviceMetric::PMU_HHA_CROSS_SOCKET, "PMU_HHA_CROSS_SOCKET"}, }; set percoreMetric = {PMU_L3_TRAFFIC, PMU_L3_MISS, PMU_L3_REF}; - set pernumaMetric = {PMU_L3_LAT}; + set pernumaMetric = {PMU_HHA_CROSS_NUMA, PMU_HHA_CROSS_SOCKET}; set perClusterMetric = {PMU_L3_LAT}; set perChannelMetric = {PMU_DDR_READ_BW, PMU_DDR_WRITE_BW}; set perpcieMetric = {PMU_PCIE_RX_MRD_BW, @@ -268,6 +271,30 @@ namespace KUNPENG_PMU { 2 } }; + + PMU_METRIC_PAIR HHA_CROSS_NUMA = { + PmuDeviceMetric::PMU_HHA_CROSS_NUMA, + { + "hisi_sccl", + "hha", + {"0x0", "0x02"}, + "", + "", + 0 + } + }; + + PMU_METRIC_PAIR HHA_CROSS_SOCKET = { + PmuDeviceMetric::PMU_HHA_CROSS_SOCKET, + { + "hisi_sccl", + "hha", + {"0x0", "0x01"}, + "", + "", + 0 + } + }; } static const map HIP_A_UNCORE_METRIC_MAP { @@ -277,6 +304,8 @@ namespace KUNPENG_PMU { METRIC_CONFIG::L3_MISS, METRIC_CONFIG::L3_REF, METRIC_CONFIG::SMMU_TRAN, + METRIC_CONFIG::HHA_CROSS_NUMA, + METRIC_CONFIG::HHA_CROSS_SOCKET, }; static const map HIP_B_UNCORE_METRIC_MAP { @@ -291,6 +320,8 @@ namespace KUNPENG_PMU { METRIC_CONFIG::PCIE_TX_MRD_BW, METRIC_CONFIG::PCIE_TX_MWR_BW, METRIC_CONFIG::SMMU_TRAN, + METRIC_CONFIG::HHA_CROSS_NUMA, + METRIC_CONFIG::HHA_CROSS_SOCKET, }; const UNCORE_METRIC_MAP UNCORE_METRIC_CONFIG_MAP = { @@ -851,7 +882,7 @@ namespace KUNPENG_PMU { } // remove duplicate device attribute - static int RemoveDupDeviceAttr(struct PmuDeviceAttr *attr, unsigned len, std::vector& deviceAttr, bool l3ReDup) + static int RemoveDupDeviceAttr(struct PmuDeviceAttr *attr, unsigned len, std::vector& deviceAttr) { std::unordered_set uniqueSet; for (int i = 0; i < len; ++i) { @@ -863,17 +894,6 @@ namespace KUNPENG_PMU { } if (uniqueSet.find(key) == uniqueSet.end()) { - // when in deviceopen remove the same PMU_L3_TRAFFIC and PMU_L3_REF, - // but when getDevMetric we need to keep them. - if (l3ReDup == true && - (attr[i].metric == PmuDeviceMetric::PMU_L3_TRAFFIC || attr[i].metric == PmuDeviceMetric::PMU_L3_REF)) { - if (uniqueSet.find(std::to_string(PmuDeviceMetric::PMU_L3_TRAFFIC)) != uniqueSet.end()) { - continue; - } - if (uniqueSet.find(std::to_string(PmuDeviceMetric::PMU_L3_REF)) != uniqueSet.end()) { - continue; - } - } uniqueSet.insert(key); deviceAttr.emplace_back(attr[i]); } @@ -956,6 +976,9 @@ namespace KUNPENG_PMU { case PMU_PCIE_TX_MWR_BW: case PMU_SMMU_TRAN: return PMU_METRIC_BDF; + case PMU_HHA_CROSS_NUMA: + case PMU_HHA_CROSS_SOCKET: + return PMU_METRIC_NUMA; } return PMU_METRIC_INVALID; } @@ -976,25 +999,57 @@ namespace KUNPENG_PMU { int AggregateByNuma(const PmuDeviceMetric metric, const vector &rawData, vector &devData) { - map devDataByNuma; + const auto& deviceConfig = GetDeviceMtricConfig(); + const auto& findConfig = deviceConfig.find(metric); + if (findConfig == deviceConfig.end()) { + return SUCCESS; + } + auto &evts = findConfig->second.events; + if (evts.size() != 2) { + return SUCCESS; + } + // Event name for total access count. + string totalEvt = evts[0]; + // Event name for cross-numa/cross-socket count. + string crossEvt = evts[1]; + // Sort data by numa, and then sort by event string. + map> devDataByNuma; for (auto &data : rawData) { + string devName; + string evtName; + if (!GetDeviceName(data.evtName, devName, evtName)) { + continue; + } + auto evtConfig = ExtractEvtStr("config", evtName); auto findData = devDataByNuma.find(data.numaId); if (findData == devDataByNuma.end()) { - PmuDeviceData outData; - outData.metric = data.metric; - outData.count = data.count; - outData.mode = GetMetricMode(data.metric); - outData.numaId = data.numaId; - devDataByNuma[data.numaId] = outData; + devDataByNuma[data.numaId][evtConfig] = data; } else { - findData->second.count += data.count; + devDataByNuma[data.numaId][evtConfig].count += data.count; } } for (auto &data : devDataByNuma) { - devData.push_back(data.second); + // Get events of cross-numa/cross-socket access count and total access count. + auto findcrossData = data.second.find(crossEvt); + auto findtotalData = data.second.find(totalEvt); + if (findcrossData == data.second.end() || findtotalData == data.second.end()) { + continue; + } + // Compute ratio: cross access count / total access count + double ratio = 0.0; + if (findtotalData->second.count != 0) { + ratio = (double)(findcrossData->second.count) / findtotalData->second.count; + } else { + ratio = -1; + } + PmuDeviceData outData; + outData.metric = metric; + outData.count = ratio; + outData.mode = GetMetricMode(metric); + outData.numaId = data.first; + devData.push_back(outData); } - return SUCCESS; } @@ -1071,11 +1126,34 @@ namespace KUNPENG_PMU { return SUCCESS; } - static unordered_map> DDRC_CHANNEL_MAP = { - {CHIP_TYPE::HIPA, {0, 1, 2, 3}}, - {CHIP_TYPE::HIPB, {0, 2, 3, 5}} + static IdxMap DDRC_CHANNEL_MAP_HIPA = { + {1, {{0, 0}, {1, 1}, {2, 2}, {3, 3}}}, + {3, {{0, 4}, {1, 5}, {2, 6}, {3, 7}}}, + {5, {{0, 0}, {1, 1}, {2, 2}, {3, 3}}}, + {7, {{0, 4}, {1, 5}, {2, 6}, {3, 7}}}, + }; + static IdxMap DDRC_CHANNEL_MAP_HIPB = { + {3, {{0, 0}, {2, 1}, {3, 2}, {5, 3}}}, + {1, {{0, 4}, {2, 5}, {3, 6}, {5, 7}}}, + {11, {{0, 0}, {2, 1}, {3, 2}, {5, 3}}}, + {9, {{0, 4}, {2, 5}, {3, 6}, {5, 7}}}, + }; + + static unordered_map DDRC_CHANNEL_MAP = { + {HIPA, DDRC_CHANNEL_MAP_HIPA}, + {HIPB, DDRC_CHANNEL_MAP_HIPB}, }; + static int ParseDDRIdx(const string &devName, const string prefix) + { + size_t ddrcPos = devName.find(prefix); + size_t channelIndex = ddrcPos + prefix.length(); + string ddrcIndexStr = devName.substr(channelIndex); + size_t separatorPos = ddrcIndexStr.find("_"); + int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); + return ddrcIndex; + } + static bool getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) { string devName; @@ -1084,28 +1162,22 @@ namespace KUNPENG_PMU { return false; } // ddrc channel index. eg: hisi_sccl3_ddrc3_1 --> 3_1 - string ddrcStr = "ddrc"; - size_t ddrcPos = devName.find(ddrcStr); - size_t channelIndex = ddrcPos + ddrcStr.length(); - string ddrcIndexStr = devName.substr(channelIndex); - // find index in DDRC_CHANNEL_MAP. eg: 3_1 --> 3, corresponds to channel 2 in HIPB - size_t separatorPos = ddrcIndexStr.find("_"); - int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); + int ddrcIndex = ParseDDRIdx(devName, "ddrc"); + int scclIndex = ParseDDRIdx(devName, "sccl"); - unsigned channelAddNum = 0; - if((ddrNumaId & 1) == 1) { // channel id + 4 in sequence - channelAddNum = 4; - } CHIP_TYPE chipType = GetCpuType(); //get channel index if (DDRC_CHANNEL_MAP.find(chipType) == DDRC_CHANNEL_MAP.end()) { return false; } - auto ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; - auto it = find(ddrcChannelList.begin(), ddrcChannelList.end(), ddrcIndex); - if (it != ddrcChannelList.end()) { - size_t index = distance(ddrcChannelList.begin(), it); - channelId = index + channelAddNum; - return true; + + auto &ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; + auto ddrIdxMap = ddrcChannelList.find(scclIndex); + if (ddrIdxMap != ddrcChannelList.end()) { + auto channelIdx = ddrIdxMap->second.find(ddrcIndex); + if (channelIdx != ddrIdxMap->second.end()) { + channelId = channelIdx->second; + return true; + } } return false; } @@ -1136,7 +1208,7 @@ namespace KUNPENG_PMU { outData.mode = GetMetricMode(data.metric); outData.channelId = channelId; outData.ddrNumaId = data.ddrNumaId; - outData.socketId = data.ddrNumaId < 2 ? 0 : 1; // numa id 0-1 --> socket id 0; numa id 2-3 --> socket id 1 + outData.socketId = data.socketId; devDataByChannel[ddrDatakey] = outData; } else { findData->second.count += data.count; @@ -1246,6 +1318,8 @@ namespace KUNPENG_PMU { {PMU_PCIE_TX_MRD_BW, PcieBWAggregate}, {PMU_PCIE_TX_MWR_BW, PcieBWAggregate}, {PMU_SMMU_TRAN, SmmuTransAggregate}, + {PMU_HHA_CROSS_NUMA, AggregateByNuma}, + {PMU_HHA_CROSS_SOCKET, AggregateByNuma}, }; static bool IsMetricEvent(const string &devName, const string &evtName, const PmuDeviceAttr &devAttr) @@ -1348,7 +1422,7 @@ namespace KUNPENG_PMU { if (perClusterMetric.find(devAttr.metric) != perClusterMetric.end()) { devData.clusterId = pmuData[i].cpuTopo->coreId / clusterWidth; } - if (perChannelMetric.find(devAttr.metric) != pernumaMetric.end()) { + if (perChannelMetric.find(devAttr.metric) != perChannelMetric.end()) { devData.ddrNumaId = pmuData[i].cpuTopo->numaId; devData.socketId = pmuData[i].cpuTopo->socketId; } @@ -1436,7 +1510,7 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) } // Remove duplicate device attributes. vector deviceAttr; - if (RemoveDupDeviceAttr(attr, len, deviceAttr, true) != SUCCESS) { + if (RemoveDupDeviceAttr(attr, len, deviceAttr) != SUCCESS) { return -1; } vector configEvtList; @@ -1448,8 +1522,17 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) configEvtList.insert(configEvtList.end(), temp.begin(), temp.end()); } - vector evts; + //remove the same event of PMU_L3_TRAFFIC and PMU_L3_REF, PMU_HHA_CROSS_NUMA and PMU_HHA_CROSS_SOCKET + unordered_set tmpEvents; + vector filteredEvtList; for (auto& evt : configEvtList) { + if (tmpEvents.find(evt) == tmpEvents.end()) { + tmpEvents.insert(evt); + filteredEvtList.push_back(evt); + } + } + vector evts; + for (auto& evt : filteredEvtList) { evts.push_back(const_cast(evt.c_str())); } @@ -1501,7 +1584,7 @@ int PmuGetDevMetric(struct PmuData *pmuData, unsigned len, } // Remove duplicate device attributes. vector deviceAttr; - if (RemoveDupDeviceAttr(attr, attrLen, deviceAttr, false) != SUCCESS) { + if (RemoveDupDeviceAttr(attr, attrLen, deviceAttr) != SUCCESS) { return -1; } // Filter pmuData by metric and generate InnerDeviceData, @@ -1565,7 +1648,7 @@ int64_t PmuGetCpuFreq(unsigned core) cpuPath << SYS_CPU_INFO_PATH << core << "/cpufreq/scaling_cur_freq"; if (!ExistPath(cpuPath.str())) { - New(LIBPERF_ERR_CPUFREQ_NOT_CONFIG, "Kernel not config cpuFreq Or core exceed cpuNums. Not exist " + cpuPath.str()); + New(LIBPERF_ERR_CPUFREQ_NOT_CONFIG, "Kernel not config cpuFreq or core exceed cpuNums. Not exist " + cpuPath.str()); return -1; } std::string curFreqStr = ReadFileContent(cpuPath.str()); diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index a12e7096ad040a0d361de39b7a9b7f3b21883146..cab155621d5ddf0b295aad7042d2a558b70c0ce8 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -217,22 +217,20 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( return; } KUNPENG_PMU::PerfRawSample *sample = (KUNPENG_PMU::PerfRawSample *)event->sample.array; - if (symMode != NO_SYMBOL_RESOLVE) { - // Copy ips from ring buffer and get stack info later. - if (evt->callStack == 0) { - int i = 0; - while (i < sample->nr && !IsValidIp(sample->ips[i])) { - i++; - } - if (i < sample->nr) { + // Copy ips from ring buffer and get stack info later. + if (evt->callStack == 0) { + int i = 0; + while (i < sample->nr && !IsValidIp(sample->ips[i])) { + i++; + } + if (i < sample->nr) { + ips->ips.push_back(sample->ips[i]); + } + } else { + for (int i = sample->nr - 1; i >= 0; --i) { + if (IsValidIp(sample->ips[i])) { ips->ips.push_back(sample->ips[i]); } - } else { - for (int i = sample->nr - 1; i >= 0; --i) { - if (IsValidIp(sample->ips[i])) { - ips->ips.push_back(sample->ips[i]); - } - } } } current->cpu = sample->cpu; diff --git a/pmu/spe.cpp b/pmu/spe.cpp index 0d1c7d9ceda7f8d30419b2a6cdbd86259853bebd..2bc42c400bb7ba277a6da589d52262e2d5f06ea8 100644 --- a/pmu/spe.cpp +++ b/pmu/spe.cpp @@ -359,9 +359,9 @@ void Spe::CoreDummyData(struct SpeCoreContext *context, struct ContextSwitchData uint64_t off = dataTail % mpage->data_size; struct perf_event_header *header = (struct perf_event_header *)(ringBuf + off); - if (header->type == PERF_RECORD_MMAP && symbolMode != NO_SYMBOL_RESOLVE) { + if (header->type == PERF_RECORD_MMAP) { struct PerfRecordMmap *sample = (struct PerfRecordMmap *)header; - if (symbolMode == RESOLVE_ELF_DWARF) { + if (symbolMode == RESOLVE_ELF_DWARF || symbolMode == NO_SYMBOL_RESOLVE) { int ret = SymResolverUpdateModule(sample->tid, sample->filename, sample->addr); if (ret != SUCCESS) { // if the module fails to be updated, a warning is recorded to overwrite the failure error code. diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index c65413e7b94a253bff84a26b134f7cebabf5cc65..2cffe19b5da9c202b6b127743cda904d09f3d855 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -1619,6 +1619,15 @@ def PmuRead(pd: int) -> PmuData: c_data_len = c_PmuRead(c_pd, ctypes.byref(c_data_pointer)) return PmuData(c_data_pointer, c_data_len) +def ResolvePmuDataSymbol(pmuData: ctypes.POINTER(CtypesPmuData)) -> int: + """ + int ResolvePmuDataSymbol(struct PmuData* pmuData); + """ + c_ResolvePmuDataSymbol = kperf_so.ResolvePmuDataSymbol + c_ResolvePmuDataSymbol.argtypes = [ctypes.POINTER(CtypesPmuData)] + c_ResolvePmuDataSymbol.restype = ctypes.c_int + + return c_ResolvePmuDataSymbol(pmuData) def PmuAppendData(fromData: ctypes.POINTER(CtypesPmuData), toData: ctypes.POINTER(ctypes.POINTER(CtypesPmuData))) -> int: @@ -2086,4 +2095,5 @@ __all__ = [ 'PmuReadCpuFreqDetail', 'PmuCloseCpuFreqSampling', 'PmuCpuFreqDetail', + 'ResolvePmuDataSymbol' ] diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index 4ba96b21ba7259c6c8a181b2516af745a90316bf..20776de6a05a9301ac787d816e54c528b6bd9ac1 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -108,6 +108,7 @@ class Error: LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 =1065 LIBPERF_ERR_NOT_SUPPORT_METRIC = 1066 LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD = 1067 + LIBPERF_ERR_PMU_DATA_NO_FOUND = 1068 UNKNOWN_ERROR = 9999 diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 672a060b43f9e2096d3d036d930b3738f4df0619..213125d81941bda7e0e571afd4efc7f3c8580f7b 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -107,12 +107,12 @@ class SymbolMode: RESOLVE_ELF_DWARF = 2 # Resolve elf and dwarf. All fields in Symbol will be valid. class PmuDeviceMetric: - # Pernuma metric. - # Collect ddr read bandwidth for each numa node. + # Perchannel metric. + # Collect ddr read bandwidth for each channel. # Unit: Bytes/s PMU_DDR_READ_BW = 0 - # Pernuma metric. - # Collect ddr write bandwidth for each numa node. + # Perchannel metric. + # Collect ddr write bandwidth for each channel. # Unit: Bytes/s PMU_DDR_WRITE_BW = 1 # Percore metric. @@ -127,8 +127,8 @@ class PmuDeviceMetric: # Collect L3 total reference count, including miss and hit count. # Unit: count PMU_L3_REF = 4 - # Pernuma metric. - # Collect L3 total latency for each numa node. + # Percluster metric. + # Collect L3 total latency for each cluster node. # Unit: cycles PMU_L3_LAT = 5 # Collect pcie rx bandwidth. @@ -146,6 +146,12 @@ class PmuDeviceMetric: # Collect smmu address transaction. # Unit: count PMU_SMMU_TRAN = 10 + # Pernuma metric. + # Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA = 11 + # Pernuma metric. + # Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET = 12 class PmuDeviceAttr(_libkperf.PmuDeviceAttr): """ @@ -390,6 +396,14 @@ def read(pd: int) -> PmuData: """ return _libkperf.PmuRead(pd) +def resolvePmuDataSymbol(pmuData: PmuData) -> int: + """ + when kperf symbol mode is NO_SYMBOL_RESOLVE during PmuRead(), this function can be used to resolve stack symbols + :param: pmuData + :return: pmu data + """ + return _libkperf.ResolvePmuDataSymbol(pmuData.pointer()) + def stop(pd: int) -> None: """ @@ -595,4 +609,5 @@ __all__ = [ 'open_cpu_freq_sampling', 'close_cpu_freq_sampling', 'read_cpu_freq_detail', + 'resolvePmuDataSymbol' ] diff --git a/python/tests/test_metric.py b/python/tests/test_metric.py index bf653ce808d95364bc75264ed3ad0a2cda85915a..90c254be317d202ad78bd4a0523b726996d61d92 100644 --- a/python/tests/test_metric.py +++ b/python/tests/test_metric.py @@ -256,6 +256,28 @@ def test_get_metric_smmu_transaction(): print_dev_data_details(dev_data) kperf.close(pd) +def test_collect_hha_cross(): + dev_attr = [ + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA) + ] + pd = kperf.device_open(dev_attr) + print(kperf.error()) + assert pd != -1, f"Expected non-negative pd, but got {pd}" + kperf.enable(pd) + time.sleep(1) + kperf.disable(pd) + ori_data = kperf.read(pd) + assert len(ori_data) != -1, f"Expected non-negative ori_len, but got {len(ori_data)}" + + dev_data = kperf.get_device_metric(ori_data, dev_attr) + assert dev_data[0].metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET + assert dev_data[0].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + assert dev_data[-1].metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA + assert dev_data[-1].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + print_dev_data_details(dev_data) + kperf.close(pd) + if __name__ == '__main__': # 提示用户使用pytest 运行测试文件 print("This is a pytest script. Run it using the 'pytest' command.") diff --git a/symbol/CMakeLists.txt b/symbol/CMakeLists.txt index 920d59e34034f97592b4676276cbf76ce099a689..aaa8988e1e967e5d72bd7c9f6f503bbbc975ab34 100644 --- a/symbol/CMakeLists.txt +++ b/symbol/CMakeLists.txt @@ -16,6 +16,9 @@ include_directories(${INCLUDE_DIR}) message(${THIRD_PARTY}/elfin-parser/elf) ADD_LIBRARY(sym SHARED ${SYMBOL_SRC}) +ADD_LIBRARY(sym_static STATIC ${SYMBOL_SRC}) +set_target_properties(sym_static PROPERTIES OUTPUT_NAME "sym") target_link_libraries(sym elf_static dwarf_static pthread) install(TARGETS sym DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) +install(TARGETS sym_static DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) install(FILES ${SYMBOL_FILE_DIR}/symbol.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include) diff --git a/symbol/symbol_resolve.cpp b/symbol/symbol_resolve.cpp index 46f6ad5c515400fa40297563e4a1b24f3a17db86..2f971f402e24961ef9b1fb126f6ca62621a239d3 100644 --- a/symbol/symbol_resolve.cpp +++ b/symbol/symbol_resolve.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include "name_resolve.h" #include "pcerr.h" #include "symbol_resolve.h" @@ -70,16 +71,15 @@ namespace { flag = false; } - static inline bool CheckIfFile(std::string mapline) + static inline bool CheckIfFile(const std::string& mapline) { - return (!((mapline.find(HUGEPAGE) != std::string::npos) || (mapline.find(DEV_ZERO) != std::string::npos) || - (mapline.find(ANON) != std::string::npos) || (mapline.find(STACK) != std::string::npos) || - (mapline.find(SOCKET) != std::string::npos) || (mapline.find(VSYSCALL) != std::string::npos) || - (mapline.find(HEAP) != std::string::npos) || (mapline.find(VDSO) != std::string::npos) || - (mapline.find(SYSV) != std::string::npos) || (mapline.find(VVAR) != std::string::npos)) && - (mapline.find(R_XP) != std::string::npos)) - ? true - : false; + const std::vector patterns = {HUGEPAGE, DEV_ZERO, ANON, STACK, SOCKET, VSYSCALL, HEAP ,VDSO, SYSV, VVAR}; + for (const auto& pattern :patterns) { + if (mapline.find(pattern) != std::string::npos) { + return false; + } + } + return mapline.find(R_XP) != std::string::npos; } static inline char* InitChar(int len) @@ -398,7 +398,7 @@ bool MyElf::IsExecFile() void MyElf::Emplace(unsigned long addr, const ELF_SYM& elfSym) { - this->symTab.insert({addr, elfSym}); + this->symTab.emplace(addr, elfSym); } ELF_SYM* MyElf::FindSymbol(unsigned long addr) @@ -548,15 +548,11 @@ int SymbolResolve::RecordModule(int pid, RecordModuleType recordModuleType) moduleSafeHandler.releaseLock(pid); return 0; } - char mapFile[MAP_LEN]; - if (snprintf(mapFile, MAP_LEN, "/proc/%d/maps", pid) < 0) { - moduleSafeHandler.releaseLock(pid); - return LIBSYM_ERR_SNPRINF_OPERATE_FAILED; - } + std::string mapFile = "/proc/" + std::to_string(pid) + "/maps"; std::ifstream file(mapFile); if (!file.is_open()) { pcerr::New(LIBSYM_ERR_OPEN_FILE_FAILED, - "libsym can't open file named " + std::string{mapFile} + " because of " + std::string{strerror(errno)}); + "libsym can't open file named " + mapFile + " because of " + std::string{strerror(errno)}); moduleSafeHandler.releaseLock(pid); return LIBSYM_ERR_OPEN_FILE_FAILED; } @@ -588,15 +584,11 @@ int SymbolResolve::UpdateModule(int pid, RecordModuleType recordModuleType) return SUCCESS; } // Get memory maps of pid. - char mapFile[MAP_LEN]; - if (snprintf(mapFile, MAP_LEN, "/proc/%d/maps", pid) < 0) { - moduleSafeHandler.releaseLock(pid); - return LIBSYM_ERR_SNPRINF_OPERATE_FAILED; - } + std::string mapFile = "/proc/" + std::to_string(pid) + "/maps"; std::ifstream file(mapFile); if (!file.is_open()) { pcerr::New(LIBSYM_ERR_OPEN_FILE_FAILED, - "libsym can't open file named " + std::string{mapFile} + " because of " + std::string{strerror(errno)}); + "libsym can't open file named " + mapFile + " because of " + std::string{strerror(errno)}); moduleSafeHandler.releaseLock(pid); return LIBSYM_ERR_OPEN_FILE_FAILED; } @@ -618,8 +610,8 @@ int SymbolResolve::UpdateModule(int pid, RecordModuleType recordModuleType) this->RecordDwarf(item->moduleName.c_str()); } } - for (auto mod : diffModVec) { - oldModVec.push_back(mod); + for (auto& mod : diffModVec) { + oldModVec.emplace_back(mod); } pcerr::New(SUCCESS); moduleSafeHandler.releaseLock(pid); @@ -1176,11 +1168,13 @@ std::vector> SymbolResolve::FindDiffMaps( const std::vector>& newMaps) const { std::vector> diffMaps; + std::set oldStarts; + for (const auto& oldMod : oldMaps) { + oldStarts.insert(oldMod->start); + } for (auto newMod : newMaps) { - for (auto oldMod : oldMaps) { - if (newMod->start != oldMod->start) { - diffMaps.push_back(newMod); - } + if (oldStarts.find(newMod->start) == oldStarts.end()) { + diffMaps.emplace_back(newMod); } } diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index d10ca39265bd6c9a9ffacb35661be5085b9c7d0f..68710cb5a9815368bf0151ac0712fccc3f45b43e 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -309,4 +309,29 @@ TEST_F(TestMetric, GetMetricSmmuTransaction) DevDataFree(devData); PmuDataFree(oriData); PmuClose(pd); +} + +TEST_F(TestMetric, GetMetricHHACross) +{ + PmuDeviceAttr devAttr[2] = {}; + devAttr[0].metric = PMU_HHA_CROSS_NUMA; + devAttr[1].metric = PMU_HHA_CROSS_SOCKET; + int pd = PmuDeviceOpen(devAttr, 2); + ASSERT_NE(pd, -1); + PmuEnable(pd); + sleep(1); + PmuDisable(pd); + PmuData* oriData = nullptr; + int oriLen = PmuRead(pd, &oriData); + ASSERT_NE(oriLen, -1); + + PmuDeviceData *devData = nullptr; + auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); + ASSERT_EQ(devData[0].metric, PMU_HHA_CROSS_NUMA); + ASSERT_EQ(devData[0].mode, PMU_METRIC_NUMA); + ASSERT_EQ(devData[len - 1].metric, PMU_HHA_CROSS_SOCKET); + ASSERT_EQ(devData[len - 1].mode, PMU_METRIC_NUMA); + DevDataFree(devData); + PmuDataFree(oriData); + PmuClose(pd); } \ No newline at end of file diff --git a/util/common.h b/util/common.h index 1c3bcb7635f0df557659248418a879db8dd6810d..77b8a9b6c33052f98b59d040ebbf9f10d7a4844b 100644 --- a/util/common.h +++ b/util/common.h @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef __x86_64__ #define IS_X86 1