diff --git a/CMakeLists.txt b/CMakeLists.txt index a96e84866701aae052bdf00107ce1d3a2ac9c4b3..f013847e946f13209dc6488db54862bd1f5294e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,6 +35,14 @@ endif() set(TOP_DIR ${PROJECT_SOURCE_DIR}) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funroll-loops -fno-plt") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -funroll-loops -fno-plt ") + +if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a") +endif() + message("TOP_DIR is ${TOP_DIR}") include(${CMAKE_CURRENT_LIST_DIR}/Common.cmake) add_subdirectory(symbol) diff --git a/README.en.md b/README.en.md index 234f2cc0f154fa11903ef296e3c6f9f15bae4855..1ef5287ec9ba9c7c6e3b4456d08231f835a88c95 100644 --- a/README.en.md +++ b/README.en.md @@ -127,82 +127,106 @@ Here are some examples: * Get pmu count for a process ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// Initialize event list and pid list in PmuAttr. -// There is one event in list, named 'cycles'. -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// Call PmuOpen and pmu descriptor is return. -// is an identity for current task. -int pd = PmuOpen(COUNTING, &attr); -// Start collection. -PmuEnable(pd); -// Collect for one second. -sleep(1); -// Stop collection. -PmuDisable(pd); -PmuData *data = NULL; -// Read pmu data. You can also read data before PmuDisable. -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // Initialize event list and pid list in PmuAttr. + // There is one event in list, named 'cycles'. + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // Call PmuOpen and pmu descriptor is return. + // is an identity for current task. + int pd = PmuOpen(COUNTING, &attr); + // Start collection. + PmuEnable(pd); + // Collect for one second. + sleep(1); + // Stop collection. + PmuDisable(pd); + PmuData *data = NULL; + // Read pmu data. You can also read data before PmuDisable. + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + PmuData *d = &data[i]; + std::cout << "evt=" << d->evt << "count=" << d->count << std::endl; + } + // To free PmuData, call PmuDataFree. + PmuDataFree(data); + // Like fd, call PmuClose if pd will not be used. + PmuClose(pd); } -// To free PmuData, call PmuDataFree. -PmuDataFree(data); -// Like fd, call PmuClose if pd will not be used. -PmuClose(pd); + ``` * Sample a process ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// Initialize event list and pid list in PmuAttr. -// There is one event in list, named 'cycles'. -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// Call PmuOpen and pmu descriptor is return. -// is an identity for current task. -// Use SAMPLING for sample task. -int pd = PmuOpen(SAMPLING, &attr); -// Start collection. -PmuEnable(pd); -// Collect for one second. -sleep(1); -// Stop collection. -PmuDisable(pd); -PmuData *data = NULL; -// Read pmu data. You can also read data before PmuDisable. -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - // Get an element from array. - PmuData *d = &data[i]; - // Get stack object which is a linked list. - Stack *stack = d->stack; - while (stack) { - // Get symbol object. - if (stack->symbol) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // Initialize event list and pid list in PmuAttr. + // There is one event in list, named 'cycles'. + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // Call PmuOpen and pmu descriptor is return. + // is an identity for current task. + // Use SAMPLING for sample task. + int pd = PmuOpen(SAMPLING, &attr); + // Start collection. + PmuEnable(pd); + // Collect for one second. + sleep(1); + // Stop collection. + PmuDisable(pd); + PmuData *data = NULL; + // Read pmu data. You can also read data before PmuDisable. + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + // Get an element from array. + PmuData *d = &data[i]; + // Get stack object which is a linked list. + Stack *stack = d->stack; + while (stack) { + // Get symbol object. + if (stack->symbol) { + Symbol *data = stack->symbol; + std::cout << std::hex << data->addr << " " << data->symbolName << "+0x" << data->offset << " " + << data->codeMapAddr << " (" << data->module << ")" + << " (" << std::dec << data->fileName << ":" << data->lineNum << ")" << std::endl; + } + stack = stack->next; } - stack = stack->next; } + // To free PmuData, call PmuDataFree. + PmuDataFree(data); + // Like fd, call PmuClose if pd will not be used. + PmuClose(pd); } -// To free PmuData, call PmuDataFree. -PmuDataFree(data); -// Like fd, call PmuClose if pd will not be used. -PmuClose(pd); + ``` * Python examples @@ -291,6 +315,13 @@ python example.py You can directly go to the go/src/libkperf_test directory. ```shell +export GO111MODULE=off +export LD_LIBRARY_PATH=../libkperf/lib:$LD_LIBRARY_PATH go test -v # run all go test -v -test.run TestCount #specify the test case to run ``` + +* **GO language static mode compilation:** +```shell +go build -tags="static" +``` \ No newline at end of file diff --git a/README.md b/README.md index 555c062d73d10d84407df11ba1493d38f3a03b23..9df2dec48770ed6ab582eb084726da6f726d0fe5 100644 --- a/README.md +++ b/README.md @@ -107,77 +107,101 @@ Go API文档可以参考GO_API.md: - 获取进程的pmu计数 ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// 初始化事件列表,指定需要计数的事件cycles。 -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// 调用PmuOpen,返回pd。pd表示该任务的id。 -int pd = PmuOpen(COUNTING, &attr); -// 开始采集。 -PmuEnable(pd); -// 采集1秒。 -sleep(1); -// 停止采集。 -PmuDisable(pd); -PmuData *data = NULL; -// 读取PmuData,它是一个数组,长度是len。 -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - PmuData *d = &data[i]; - ... +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pidList[1]; + pidList[0] = getpid(); + char *evtList[1]; + evtList[0] = "cycles"; + // 初始化事件列表,指定需要计数的事件cycles。 + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // 调用PmuOpen,返回pd。pd表示该任务的id。 + int pd = PmuOpen(COUNTING, &attr); + // 开始采集。 + PmuEnable(pd); + // 采集1秒。 + sleep(1); + // 停止采集。 + PmuDisable(pd); + PmuData *data = NULL; + // 读取PmuData,它是一个数组,长度是len。 + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + PmuData *d = &data[i]; + std::cout << "evt=" << d->evt << "count=" << d->count << std::endl; + } + // 释放PmuData。 + PmuDataFree(data); + // 类似fd,当任务结束时调用PmuClose释放资源。 + PmuClose(pd); } -// 释放PmuData。 -PmuDataFree(data); -// 类似fd,当任务结束时调用PmuClose释放资源。 -PmuClose(pd); ``` - 对进程进行采样 ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// 初始化事件列表,指定需要计数的事件cycles。 -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// 调用PmuOpen,返回pd。pd表示该任务的id。 -int pd = PmuOpen(SAMPLING, &attr); -// 开始采集。 -PmuEnable(pd); -// 采集1秒。 -sleep(1); -// 停止采集。 -PmuDisable(pd); -PmuData *data = NULL; -// 读取PmuData,它是一个数组,长度是len。 -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - // 获取数组的一个元素。 - PmuData *d = &data[i]; - // 获取调用栈对象,它是一个链表。 - Stack *stack = d->stack; - while (stack) { - // 获取符号对象。 - if (stack->symbol) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // 初始化事件列表,指定需要计数的事件cycles。 + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + attr.symbolMode = RESOLVE_ELF_DWARF; + attr.callStack = 1; + attr.freq = 200; + attr.useFreq = 1; + // 调用PmuOpen,返回pd。pd表示该任务的id。 + int pd = PmuOpen(SAMPLING, &attr); + // 开始采集。 + PmuEnable(pd); + // 采集1秒。 + sleep(1); + // 停止采集。 + PmuDisable(pd); + PmuData *data = NULL; + // 读取PmuData,它是一个数组,长度是len。 + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + // 获取数组的一个元素。 + PmuData *d = &data[i]; + // 获取调用栈对象,它是一个链表。 + Stack *stack = d->stack; + while (stack) { + // 获取符号对象。 + if (stack->symbol) { + Symbol *data = stack->symbol; + std::cout << std::hex << data->addr << " " << data->symbolName << "+0x" << data->offset << " " + << data->codeMapAddr << " (" << data->module << ")" + << " (" << std::dec << data->fileName << ":" << data->lineNum << ")" << std::endl; + + } + stack = stack->next; } - stack = stack->next; } + // 释放PmuData。 + PmuDataFree(data); + // 类似fd,当任务结束时调用PmuClose释放资源。 + PmuClose(pd); } -// 释放PmuData。 -PmuDataFree(data); -// 类似fd,当任务结束时调用PmuClose释放资源。 -PmuClose(pd); ``` - Python 例子 @@ -266,6 +290,13 @@ python example.py * **针对Go示例代码:** 可以直接跳转到 go/src/libkperf_test目录下 ```shell +export GO111MODULE=off +export LD_LIBRARY_PATH=../libkperf/lib:$LD_LIBRARY_PATH go test -v # 全部运行 go test -v -test.run TestCount #指定运行的用例 +``` + +* **GO静态模式编译:** +```shell +go build -tags="static" ``` \ No newline at end of file diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index f8efa2988121cf82e7008cd1b249787e7ebf8a77..d2bfa43873749a5fd93eaa866793ee7467c62dc8 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -25,6 +25,7 @@ int pd = PmuOpen(COUNTING, &attr); # python代码示例 import time import kperf + evtList = ["cycles", "branch-misses"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) @@ -134,15 +135,38 @@ perf record -e cycles,branch-misses 设置PmuAttr的方式和Counting一样,在调用PmuOpen的时候,把任务类型设置为SAMPLING,并且设置采样频率: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +PmuAttr attr = {0}; +char* evtList[1] = {"cycles"}; attr.freq = 1000; // 采样频率是1000HZ attr.useFreq = 1; +attr.evtList = evtList; +attr.numEvt = 1; int pd = PmuOpen(SAMPLING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("cpu=%d pid=%d tid=%d period=%ld\n", data[i].cpu, data[i].pid, data[i].tid, data[i].period); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf +import ksym import time + evtList = ["branch-misses", "cycles"] pmu_attr = kperf.PmuAttr( evtList=evtList, @@ -150,6 +174,16 @@ pmu_attr = kperf.PmuAttr( symbolMode=kperf.SymbolMode.RESOLVE_ELF ) pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) +if pd == -1: + print(f"kperf pmuopen sample failed, expect err is nil, but is {kperf.error()}\n") +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) + +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"cpu {item.cpu} pid {item.pid} tid {item.tid} period {item.period}") +kperf.close(pd) ``` ```go @@ -165,6 +199,18 @@ func main() { fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v\n", err) return } + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + return + } + for _, o := range dataVo.GoData { + fmt.Printf("cpu=%d pid=%d tid=%d period=%v\n", o.Cpu, o.Pid, o.Tid, o.Period) + } + kperf.PmuClose(pd) } ``` @@ -197,23 +243,55 @@ perf record -e arm_spe_0/load_filter=1/ 对于libkperf,可以这样设置PmuAttr: ```c++ // c++代码示例 +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + PmuAttr attr = {0}; attr.period = 8192; // 采样周期是8192 attr.dataFilter = LOAD_FILTER; // 设置filter属性为load_filter + +int pd = PmuOpen(SPE_SAMPLING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + auto o = data[i]; + printf("spe base info comm=%s, pid=%d, tid=%d, coreId=%d, numaId=%d, sockedId=%d\n", o.comm, o.pid, o.tid, o.cpuTopo->coreId, o.cpuTopo->numaId, o.cpuTopo->socketId); + printf("spe ext info pa=%lu, va=%lu, event=%lu, latency=%lu\n", o.ext->pa, o.ext->va, o.ext->event, o.ext->lat); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf +import ksym +import time + pmu_attr = kperf.PmuAttr( - sampleRate = 1000, - symbolMode = kperf.SymbolMode.RESOLVE_ELF, - dataFilter = kperf.SpeFilter.SPE_DATA_ALL, - evFilter = kperf.SpeEventFilter.SPE_EVENT_RETIRED, - minLatency = 0x40 + sampleRate = 8192, + dataFilter = kperf.SpeFilter.LOAD_FILTER, ) # 需要root权限才能运行 pd = kperf.open(kperf.PmuTaskType.SPE_SAMPLING, pmu_attr) + +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) + +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"spe base info comm={item.comm}, pid={item.pid}, tid={item.tid}, coreId={item.cpuTopo.coreId}, numaId={item.cpuTopo.numaId}, sockedId={item.cpuTopo.socketId}") + print(f"spe ext info pa={item.ext.pa}, va={item.ext.va}, event={item.ext.event}, latency={item.ext.lat}\n") +kperf.close(pd) ``` ```go @@ -222,12 +300,28 @@ import "libkperf/kperf" import "time" func main() { - attr := kperf.PmuAttr{MinLatency:0x40, SymbolMode: kperf.ELF, SampleRate: 1000, DataFilter: kperf.SPE_DATA_ALL, EvFilter: kperf.SPE_EVENT_RETIRED} + attr := kperf.PmuAttr{SampleRate:8192, DataFilter: kperf.LOAD_FILTER} pd, err := kperf.PmuOpen(kperf.SPE, attr) if err != nil { fmt.Printf("kperf pmuopen spe failed, expect err is nil, but is %v\n", err) return } + + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + } + + for _, o := range dataVo.GoData { + fmt.Printf("spe base info comm=%v, pid=%v, tid=%v, coreId=%v, numaId=%v, sockedId=%v\n", o.Comm, o.Pid, o.Tid, o.CpuTopo.CoreId, o.CpuTopo.NumaId, o.CpuTopo.SocketId) + fmt.Printf("spe ext info pa=%v, va=%v, event=%v, latency=%v\n", o.SpeExt.Pa, o.SpeExt.Va, o.SpeExt.Event, o.SpeExt.Lat) + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(pd) } ``` @@ -311,13 +405,35 @@ PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = 1; int pd = PmuOpen(COUNTING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("evt=%s, count=%d\n", data[i].evt, data[i].count); +} +PmuClose(pd); ``` + ```python # python代码示例 import kperf +import time + evtList = ["hisi_sccl1_ddrc0/flux_rd/"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"evt={item.evt} count={item.count}") +kperf.close(pd) ``` ```go @@ -334,6 +450,18 @@ func main() { fmt.Printf("kperf pmuopen counting failed, expect err is nil, but is %v\n", err) return } + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + return + } + for _, o := range dataVo.GoData { + fmt.Printf("evt=%v count=%v \n", o.Evt, o.Count) + } + kperf.PmuClose(pd) } ``` @@ -350,7 +478,7 @@ evtList[0] = "hisi_sccl1_ddrc/flux_rd/"; evtList = ["hisi_sccl1_ddrc/flux_rd/"] ``` -```go +```goa // go代码示例 evtList := []string{"hisi_sccl1_ddrc/flux_rd/"} ``` @@ -380,16 +508,27 @@ libkperf支持tracepoint的采集,支持的tracepoint事件可以通过perf li 可以这样设置PmuAttr: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + char *evtList[1]; evtList[0] = "sched:sched_switch"; PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = 1; +attr.period = 1000; int pd = PmuOpen(SAMPLING, &attr); ``` ```python # python代码示例 +import kperf +import ksym +import time +from ctypes import * + evtList = ["sched:sched_switch"] pmu_attr = kperf.PmuAttr( evtList=evtList, @@ -404,7 +543,6 @@ pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) import "libkperf/kperf" import "fmt" - func main() { evtList := []string{"sched:sched_switch"} attr := kperf.PmuAttr{EvtList:evtList, SymbolMode:kperf.ELF, SampleRate: 1000} @@ -424,10 +562,24 @@ tracepoint能够获取每个事件特有的数据,比如sched:sched_switch包 libkperf提供了接口PmuGetField来获取tracepoint的数据。比如对于sched:sched_switch,可以这样调用: ```c++ // c++代码示例 -int prev_pid; -PmuGetField(pmuData->rawData, "prev_pid", &prev_pid, sizeof(prev_pid)); -char next_comm[16]; -PmuGetField(pmuData->rawData, "next_comm", &next_comm, sizeof(next_comm)); +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + auto pmuData = &data[i]; + int prev_pid; + PmuGetField(pmuData->rawData, "prev_pid", &prev_pid, sizeof(prev_pid)); + char next_comm[16]; + PmuGetField(pmuData->rawData, "next_comm", &next_comm, sizeof(next_comm)); + printf("next_comm=%s;prev_pid=%d\n", next_comm, prev_pid); +} ``` ```python @@ -480,10 +632,10 @@ func main() { var cArray [15]C.char nextErr := v.GetField("next_comm", unsafe.Pointer(&cArray)) if nextErr != nil { - fmt.Printf("get next_comm failed err is%v ",nextErr) + fmt.Printf("get next_comm failed err is%v\n",nextErr) } else { ptr := (*C.char)(unsafe.Pointer(&cArray[0])) - fmt.Printf("next_comm=%v;", C.GoString(ptr)) + fmt.Printf("next_comm=%v\n", C.GoString(ptr)) } prevPid := C.int(0) @@ -505,36 +657,61 @@ libkperf提供了事件分组的能力,能够让多个事件同时处于采集 perf stat -e "{cycles,branch-loads,branch-load-misses,iTLB-loads}",inst_retired ``` -对于libkperf,可以通过设置PmuAttr的evtAttr字段来设定哪些事件放在一个group内。 -比如,可以这样调用: +如果对多个相关联的事件采集,可以把关联的事件放到一个事件组。比如,计算bad speculation需要用到事件inst_retired,inst_spec和cycles,计算retiring需要用到事件inst_retired和cycles。那么perf应该这样使用: +``` +perf stat -e "{inst_retired,inst_spec,cycles}","{inst_spec,cycles}" +``` +用libkperf可以这样实现: ```c++ -// c++代码示例 -unsigned numEvt = 5; -char *evtList[numEvt] = {"cycles","branch-loads","branch-load-misses","iTLB-loads","inst_retired"}; -// 前四个事件是一个分组 -struct EvtAttr groupId[numEvt] = {1,1,1,1,-1}; +// 指定5个事件,因为inst_retired和cycles会重复出现在多个指标中,所以需要重复指定事件。 +char *evtList[5] = {"inst_retired", "inst_spec", "cycles", "inst_retired", "cycles"}; +// 指定事件分组编号,前三个事件为一组,后两个事件为一组。 +EvtAttr groupId[5] = {1,1,1,2,2}; PmuAttr attr = {0}; attr.evtList = evtList; -attr.numEvt = numEvt; +attr.numEvt = 5; attr.evtAttr = groupId; +int pd = PmuOpen(COUNTING, &attr); +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData *data = nullptr; +int len = PmuRead(pd, &data); +// 根据分组来聚合数据 +map> evtMap; +for (int i=0;i +#include "symbol.h" +#include "pmu.h" + PmuDeviceAttr devAttr[2]; // DDR读带宽 devAttr[0].metric = PMU_DDR_READ_BW; @@ -627,10 +826,10 @@ for (int i = 0; i < len / 2; ++i) { // channelID表示数据对应的通道ID。 // count是距离上次采集的DDR总读/写包长,单位是Byte, // 需要除以时间间隔得到带宽(这里的时间间隔是1秒)。 - cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; + std::cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } for (int i = len / 2; i < len; ++i) { - cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; + std::cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } DevDataFree(devData); PmuDataFree(oriData); @@ -639,6 +838,9 @@ PmuDisable(pd); ```python # python代码示例 +import kperf +import time + dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_READ_BW), kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW) @@ -658,6 +860,10 @@ for data in dev_data.iter: ```go // go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_DDR_READ_BW}, kperf.PmuDeviceAttr{Metric: kperf.PMU_DDR_WRITE_BW}} fd, _ := kperf.PmuDeviceOpen(deviceAttrs) kperf.PmuEnable(fd) @@ -705,6 +911,10 @@ libkperf提供了采集L3 cache平均时延的能力,用于分析访存型应 参考代码: ```c++ +#include +#include "symbol.h" +#include "pmu.h" + // c++代码示例 PmuDeviceAttr devAttr[1]; // L3平均时延 @@ -720,7 +930,7 @@ PmuDeviceData *devData = nullptr; auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 1, &devData); // devData的长度等于cluster个数 for (int i=0;i +#include "symbol.h" +#include "pmu.h" + PmuDeviceAttr devAttr[1]; // 采集PCIE设备RX的读带宽 devAttr[0].metric = PMU_PCIE_RX_MRD_BW; @@ -802,6 +1023,9 @@ PmuDisable(pd); ```python # python代码示例 +import kperf +import time + dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_PCIE_RX_MRD_BW, bdf="16:04.0") ] @@ -817,6 +1041,10 @@ for data in dev_data.iter: ```go // go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_PCIE_RX_MRD_BW, Bdf: "16:04.0"}} fd, _ := kperf.PmuDeviceOpen(deviceAttrs) kperf.PmuEnable(fd) @@ -837,6 +1065,102 @@ kperf.PmuClose(fd) pcie bw(16:04.0): 124122412 Bytes/ns ``` +### 采集跨numa/跨socket访问HHA比例 +libkperf提供了采集跨numa/跨socket访问HHA的操作比例的能力,用于分析访存型应用的性能瓶颈,采集以numa为粒度。 + +参考代码: +```c++ +// c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + +PmuDeviceAttr devAttr[2]; +// 采集跨numa访问HHA的操作比例 +devAttr[0].metric = PMU_HHA_CROSS_NUMA; +// 采集跨socket访问HHA的操作比例 +devAttr[1].metric = PMU_HHA_CROSS_SOCKET; +// 初始化采集任务 +int pd = PmuDeviceOpen(devAttr, 2); +// 开始采集 +PmuEnable(pd); +sleep(1); +PmuData *oriData = nullptr; +int oriLen = PmuRead(pd, &oriData); +PmuDeviceData *devData = nullptr; +auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); +// devData的长度等于设备numa的个数 +for (int i = 0; i < len / 2; ++i) { + cout << "HHA cross-numa operations ratio (Numa: " << devData[i].numaId << "): " << devData[i].count<< "\n"; +} +for (int i = len / 2; i < len; ++i) { + cout << "HHA cross-socket operations ratio (Numa: " << devData[i].numaId << "): " << devData[i].count<< "\n"; +} +DevDataFree(devData); +PmuDataFree(oriData); +PmuDisable(pd); +``` + +```python +# python代码示例 +import kperf +import time + +dev_attr = [ + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET) +] +pd = kperf.device_open(dev_attr) +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) +ori_data = kperf.read(pd) +dev_data = kperf.get_device_metric(ori_data, dev_attr) +for data in dev_data.iter: + if data.metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA: + print(f"HHA cross-numa operations ratio (Numa: {data.numaId}): {data.count}") + if data.metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET: + print(f"HHA cross-socket operations ratio (Numa: {data.numaId}): {data.count}") +``` + +```go +// go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + +deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_HHA_CROSS_NUMA}, kperf.PmuDeviceAttr{Metric: kperf.PMU_HHA_CROSS_SOCKET}} +fd, _ := kperf.PmuDeviceOpen(deviceAttrs) +kperf.PmuEnable(fd) +time.Sleep(1 * time.Second) +kperf.PmuDisable(fd) +dataVo, _ := kperf.PmuRead(fd) +deivceDataVo, _ := kperf.PmuGetDevMetric(dataVo, deviceAttrs) +for _, v := range deivceDataVo.GoDeviceData { + if v.Metric == kperf.PMU_HHA_CROSS_NUMA { + fmt.Printf("HHA cross-numa operations ratio (Numa: %v): %v\n", v.NumaId, v.Count) + } + if v.Metric == kperf.PMU_HHA_CROSS_SOCKET { + fmt.Printf("HHA cross-socket operations ratio (Numa: %v): %v\n", v.NumaId, v.Count) + } +} +kperf.DevDataFree(deivceDataVo) +kperf.PmuDataFree(dataVo) +kperf.PmuClose(fd) +``` + +执行上述代码,输出的结果类似如下: +``` +HHA cross-numa operations ratio (Numa: 0): 0.438888 +HHA cross-numa operations ratio (Numa: 1): 0.0248052 +HHA cross-numa operations ratio (Numa: 2): 0.0277224 +HHA cross-numa operations ratio (Numa: 3): 0.181404 +HHA cross-socket operations ratio (Numa: 0): 0.999437 +HHA cross-socket operations ratio (Numa: 1): 0.0253748 +HHA cross-socket operations ratio (Numa: 2): 0.329864 +HHA cross-socket operations ratio (Numa: 3): 0.18956 +``` + ### 采集系统调用函数耗时信息 libkperf基于tracepoint事件采集能力,在原有能力的基础上,重新封装了一组相关的调用API,来提供采集系统调用函数耗时信息的能力,类似于perf trace命令 @@ -848,6 +1172,10 @@ perf trace -e read,write 比如,可以这样调用: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + unsigned numFunc = 2; const char *funs1 = "read"; const char *funs2 = "write"; @@ -862,7 +1190,7 @@ PmuTraceDisable(pd); PmuTraceData *data = nullptr; int len = PmuTraceRead(pd, &data); for(int i = 0; i < len; ++i) { - printf("funcName: %s, elspsedTime: %f ms pid: %d tid: %d cpu: %d comm: %s", data[i].funcs, data[i].elapsedTime, data[i].pid, data[i].tid, data[i].cpu, data[i].comm) + printf("funcName: %s, elapsedTime: %f ms pid: %d tid: %d cpu: %d comm: %s\n", data[i].funcs, data[i].elapsedTime, data[i].pid, data[i].tid, data[i].cpu, data[i].comm); } PmuTraceClose(pd); ``` @@ -871,6 +1199,7 @@ PmuTraceClose(pd); # python代码示例 import kperf import time + funcList = ["read","write"] pmu_trace_attr = kperf.PmuTraceAttr(funcs=funcList) pd = kperf.trace_open(kperf.PmuTraceType.TRACE_SYS_CALL, pmu_trace_attr) @@ -930,6 +1259,10 @@ funcName: write elapsedTime: 0.00118 ms pid: 997235 tid: 997235 cpu: 110 comm: t ### 采集BRBE数据 libkperf基于sampling的能力,增加了对branch sample stack数据的采集能力,用于获取CPU的跳转记录, 通过branchSampleFilter可指定获取不同类型的分支跳转记录。 ```c++ +#include +#include "symbol.h" +#include "pmu.h" + char* evtList[1] = {"cycles"}; int* cpuList = nullptr; PmuAttr attr = {0}; @@ -980,6 +1313,7 @@ ffff88f60aa0->ffff88f60618 1 ```python import time +import ksym import kperf evtList = ["cycles"] diff --git a/docs/Go_API.md b/docs/Go_API.md index fd0a34685a6e670092dc331524ecd3c962400f7e..33d7a7cfaa71eba13f8bff14b7599319086aa6d5 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -300,7 +300,7 @@ import "fmt" func main() { syscallList := kperf.PmuSysCallFuncList() if syscallList == nil { - fmt.Printf("sys call list is empty") + fmt.Printf("sys call list is empty\n") } else { for _, funcName := range syscallList { fmt.Printf("func name %v\n", funcName) @@ -346,6 +346,8 @@ func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事 * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_SMMU_TRAN 采集指定smmu设备的地址转换次数,单位:count + * PMU_HHA_CROSS_NUMA 采集每个numa的跨numa访问HHA的操作比例 + * PMU_HHA_CROSS_SOCKET 采集每个numa的跨socket访问HHA的操作比例 * Bdf: 指定需要采集设备的bdf号,只对pcie和smmu指标有效 * 返回值是int和error,pd > 0表示初始化成功,pd == -1初始化失败,可通过kperf.error()查看错误信息,以下是一个kperf.device_open的示例 @@ -478,12 +480,12 @@ import "fmt" func main() { err := kperf.PmuOpenCpuFreqSampling(100) if err != nil { - fmt.Printf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v", err) + fmt.Printf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v\n", err) } freqList := kperf.PmuReadCpuFreqDetail() for _, v := range freqList { - fmt.Printf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) + fmt.Printf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d\n", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) } kperf.PmuCloseCpuFreqSampling() @@ -501,7 +503,7 @@ func main() { attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) if err != nil { - fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v", err) + fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v\n", err) return } @@ -511,24 +513,24 @@ func main() { dataVo, err := kperf.PmuRead(fd) if err != nil { - fmt.Printf("kperf pmuread failed, expect err is nil, but is %v", err) + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) return } for _, o := range dataVo.GoData { if len(o.Symbols) != 0 { - fmt.Printf("expect symbol data is empty, but is not") + fmt.Printf("expect symbol data is empty, but is not\n") } } parseErr := kperf.ResolvePmuDataSymbol(dataVo) if parseErr != nil { - fmt.Printf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v", parseErr) + fmt.Printf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v\n", parseErr) } for _, o := range dataVo.GoData { if len(o.Symbols) == 0 { - fmt.Printf("expect symbol data is not empty, but is empty") + fmt.Printf("expect symbol data is not empty, but is empty\n") } } kperf.PmuDataFree(dataVo) diff --git a/docs/Python_API.md b/docs/Python_API.md index a0a196876d05dcb50d7d867829db8ffd902293ff..df07e7c94e9c583c26b979fbccac419928f15449 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -82,8 +82,10 @@ kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) ```python # python代码示例 -import time import kperf +import ksym +import time + evtList = ["cycles", "branch-misses"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) @@ -196,6 +198,7 @@ get_field(pmu_data: ImplPmuData, field_name: str, value: c_void_p) ```python import kperf +import ksym import time from ctypes import * @@ -272,8 +275,9 @@ kperf.trace_open(trace_type: kperf.PmuTraceType, pmu_trace_attr: kperf.PmuTraceA ```python # python代码示例 -import time import kperf +import time + funcs = ["read", "write"] pmu_trace_attr = kperf.PmuTraceAttr(funcs=funcs) pd = kperf.trace_open(kperf.PmuTraceType.TRACE_SYS_CALL, pmu_trace_attr) @@ -335,17 +339,21 @@ kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指 * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count - * PMU_L3_LAT 采集每个numa的L3的总时延,单位:cycles + * PMU_L3_LAT 采集每个cluster的L3的总时延,单位:cycles * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/ns * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_SMMU_TRAN 采集指定smmu设备的地址转换次数,单位:count + * PMU_HHA_CROSS_NUMA 采集每个numa的跨numa访问HHA的操作比例 + * PMU_HHA_CROSS_SOCKET 采集每个numa的跨socket访问HHA的操作比例 * bdf: 指定需要采集设备的bdf号,只对pcie和smmu指标有效 * 返回值是int类型,pd > 0表示初始化成功,pd == -1初始化失败,可通过kperf.error()查看错误信息,以下是一个kperf.device_open的示例 ```python # python代码示例 +import kperf +import time dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_L3_TRAFFIC) ] @@ -384,8 +392,8 @@ kperf.get_device_metric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) 对 kperf.device_bdf_list(bdf_type: PmuBdfType): 查找当前系统pcie指标中有效的bdf列表和smmu指标中的有效bdf列表 -* calss PmuBdfType: - PMU_BDF_TYPE_PCIE: pice指标类型 +* class PmuBdfType: + PMU_BDF_TYPE_PCIE: pcie指标类型 PMU_BDF_TYPE_SMMU: smmu指标类型 * 返回数据iterator[str],可通过for循环遍历该单元 以下是kperf.device_bdf_list示例 @@ -443,6 +451,9 @@ def close_cpu_freq_sampling() 关闭cpu频率采集 def read_cpu_freq_detail() -> CpuFreqDetail 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 ```python #python代码示例 +import kperf +import time + err = kperf.open_cpu_freq_sampling(100) if err != 0: print(f"error number: {kperf.errorno()} error message: {kperf.error()}") @@ -459,6 +470,9 @@ kperf.close_cpu_freq_sampling() def resolvePmuDataSymbol(pmuData: PmuData) -> int: 当SymbolMode不设置或者设置为0时,可通过该接口解析read返回的PmuData数据中的符号 ```python #python代码示例 +import kperf +import time + event_name = "cycles" pmu_attr = kperf.PmuAttr( evtList=[event_name], diff --git a/go/CMakeLists.txt b/go/CMakeLists.txt index 05d797e797a979c7c82848135c51cac06df0a3b7..053cd7033fb9f0e4b58c5b9189e613252e308a67 100644 --- a/go/CMakeLists.txt +++ b/go/CMakeLists.txt @@ -2,11 +2,16 @@ project(go) set(TARGET_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/src/libkperf/include) set(TARGET_LIB_DIR ${CMAKE_CURRENT_LIST_DIR}/src/libkperf/lib) +set(TARGET_STATIC_LIB_DIR ${CMAKE_CURRENT_LIST_DIR}/src/libkperf/static_lib) file(MAKE_DIRECTORY ${TARGET_INCLUDE_DIR}) file(MAKE_DIRECTORY ${TARGET_LIB_DIR}) +file(MAKE_DIRECTORY ${TARGET_STATIC_LIB_DIR}) file(GLOB HEAD_FILES "${CMAKE_CURRENT_LIST_DIR}/../include/*.h" "${CMAKE_CURRENT_LIST_DIR}/../symbol/symbol.h") +file(GLOB STATIC_ELF_FILES ${THIRD_PARTY}/local/elfin-parser/libelf++.a ${THIRD_PARTY}/local/elfin-parser/libdwarf++.a) install(FILES ${HEAD_FILES} DESTINATION ${TARGET_INCLUDE_DIR}) -install(TARGETS sym kperf DESTINATION ${TARGET_LIB_DIR}) \ No newline at end of file +install(TARGETS sym kperf DESTINATION ${TARGET_LIB_DIR}) +install(TARGETS sym_static kperf_static DESTINATION ${TARGET_STATIC_LIB_DIR}) +install(FILES ${STATIC_ELF_FILES} DESTINATION ${TARGET_STATIC_LIB_DIR}) \ No newline at end of file diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index b21f30460c6e99fe4f02d2517a0c9ac7e851c8cc..126edd971bb6350a181e0321ea0ca8fa8be39c4a 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -17,7 +17,8 @@ package kperf /* #cgo CFLAGS: -I ../include -#cgo LDFLAGS: -L ../lib -lkperf -lsym +#cgo !static LDFLAGS: -L ../lib -lkperf -lsym +#cgo static LDFLAGS: -L ../static_lib -lkperf -lsym -lelf++ -ldwarf++ -lstdc++ -lnuma #include "pmu.h" #include "symbol.h" @@ -245,12 +246,12 @@ var ( // PmuDeviceMetric var ( - // Pernuma metric. - // Collect ddr read bandwidth for each numa node. + // Perchannel metric. + // Collect ddr read bandwidth for each channel. // Unit: Bytes/s PMU_DDR_READ_BW C.enum_PmuDeviceMetric = C.PMU_DDR_READ_BW - // Pernuma metric. - // Collect ddr write bandwidth for each numa node. + // Perchannel metric. + // Collect ddr write bandwidth for each channel. // Unit: Bytes/s PMU_DDR_WRITE_BW C.enum_PmuDeviceMetric = C.PMU_DDR_WRITE_BW // Percore metric. @@ -265,8 +266,8 @@ var ( // Collect L3 total reference count, including miss and hit count. // Unit: count PMU_L3_REF C.enum_PmuDeviceMetric = C.PMU_L3_REF - // Pernuma metric. - // Collect L3 total latency for each numa node. + // Percluster metric. + // Collect L3 total latency for each cluster node. // Unit: cycles PMU_L3_LAT C.enum_PmuDeviceMetric = C.PMU_L3_LAT // Collect pcie rx bandwidth. @@ -284,6 +285,12 @@ var ( // Collect smmu address transaction. // Unit: count PMU_SMMU_TRAN C.enum_PmuDeviceMetric = C.PMU_SMMU_TRAN + // Pernuma metric. + // Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA C.enum_PmuDeviceMetric = C.PMU_HHA_CROSS_NUMA + // Pernuma metric. + // Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET C.enum_PmuDeviceMetric = C.PMU_HHA_CROSS_SOCKET ) // PmuBdfType @@ -348,6 +355,7 @@ type PmuData struct { Pid int // process id Tid int // thread id Cpu int // cpu id + GroupId int // id for group event Comm string // process command Period uint64 // sample period Count uint64 // event count. Only available for counting @@ -1178,6 +1186,7 @@ func transferCPmuDataToGoData(cPmuData *C.struct_PmuData, dataLen int, fd int) [ goDatas[i].Count = uint64(dataObj.count) goDatas[i].CountPercent = float64(dataObj.countPercent) goDatas[i].Cpu = int(dataObj.cpu) + goDatas[i].GroupId = int(dataObj.groupId) if dataObj.cpuTopo != nil { goDatas[i].CpuTopo = CpuTopology{CoreId: int(dataObj.cpuTopo.coreId), NumaId: int(dataObj.cpuTopo.numaId), SocketId: int(dataObj.cpuTopo.socketId)} } diff --git a/go/src/libkperf/sym/sym.go b/go/src/libkperf/sym/sym.go index 1d0787956c036dc5913739d0b0cd598cdfec51de..0e8326856e9ae7e1f08053b08d4bf2e125d98a61 100644 --- a/go/src/libkperf/sym/sym.go +++ b/go/src/libkperf/sym/sym.go @@ -17,7 +17,8 @@ package sym /* #cgo CFLAGS: -I ../include -#cgo LDFLAGS: -L ../lib -lsym +#cgo !static LDFLAGS: -L ../lib -lsym +#cgo static LDFLAGS: -L ../static_lib -lsym -lstdc++ -lelf++ -ldwarf++ #include "symbol.h" #include "pcerrc.h" diff --git a/include/pmu.h b/include/pmu.h index 7d4390c572fd35158adf426b88333fa24d576bca..2c7dd3b4df26656058e80cd313dac68820900627 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -93,7 +93,7 @@ enum BranchSampleFilter { }; struct EvtAttr { - int group_id; + int groupId; }; struct PmuAttr { @@ -246,6 +246,7 @@ struct PmuData { pid_t pid; // process id int tid; // thread id int cpu; // cpu id + int groupId; // id for group event struct CpuTopology *cpuTopo; // cpu topology const char *comm; // process command uint64_t period; // sample period @@ -419,12 +420,12 @@ int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *valu struct SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char *fieldName); enum PmuDeviceMetric { - // Pernuma metric. - // Collect ddr read bandwidth for each numa node. + // Perchannel metric. + // Collect ddr read bandwidth for each channel. // Unit: Bytes PMU_DDR_READ_BW, - // Pernuma metric. - // Collect ddr write bandwidth for each numa node. + // Perchannel metric. + // Collect ddr write bandwidth for each channel. // Unit: Bytes PMU_DDR_WRITE_BW, // Percore metric. @@ -457,7 +458,13 @@ enum PmuDeviceMetric { // Perpcie metric. // Collect smmu address transaction. // Unit: count - PMU_SMMU_TRAN + PMU_SMMU_TRAN, + // Pernuma metric. + // Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA, + // Pernuma metric. + // Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET }; struct PmuDeviceAttr { diff --git a/pmu/dummy_event.cpp b/pmu/dummy_event.cpp index 723a753342d3ecdbf61a70eeca44cad37b866d6e..9137db89afa8c9af634139b94b5da5c2b357be97 100644 --- a/pmu/dummy_event.cpp +++ b/pmu/dummy_event.cpp @@ -76,6 +76,7 @@ namespace KUNPENG_PMU { if (forkPidQueue.empty()) { continue; } + std::lock_guard lg(dummyMutex); auto& pid = forkPidQueue.front(); for (const auto& evtList: evtLists) { auto groupId = evtList->GetGroupId(); @@ -83,7 +84,6 @@ namespace KUNPENG_PMU { DummyContext ctx = {evtList, static_cast(pid), evtGroupInfo.first, evtGroupInfo.second}; forkStrategy.DoHandler(ctx, evtGroupInfo.first, evtGroupInfo.second); } - std::lock_guard lg(dummyMutex); forkPidQueue.pop(); } }); @@ -146,7 +146,9 @@ namespace KUNPENG_PMU { if (header->type == PERF_RECORD_FORK) { auto sample = (KUNPENG_PMU::PerfRecordFork*) header; std::lock_guard lg(dummyMutex); - forkPidQueue.push(sample->tid); + if((uint8_t*)page + MAP_LEN > ringBuf + off + sizeof(KUNPENG_PMU::PerfRecordFork)) { + forkPidQueue.push(sample->tid); + } } if (header->type == PERF_RECORD_EXIT) { auto sample = (KUNPENG_PMU::PerfRecordFork*) header; diff --git a/pmu/dummy_event.h b/pmu/dummy_event.h index 04686773d07c35f6f3015b980032163b596fe712..dc25970c37ea255644e1790d94a279d7c0313e16 100644 --- a/pmu/dummy_event.h +++ b/pmu/dummy_event.h @@ -65,7 +65,7 @@ namespace KUNPENG_PMU { std::thread dummyThread; std::thread consumeThread; - std::atomic dummyFlag; + volatile std::atomic dummyFlag; std::vector>& evtLists; std::vector ppids; diff --git a/pmu/evt_list.cpp b/pmu/evt_list.cpp index bee7fa8dc5e13b0c475e79007b580a6111771e5f..29bcf1d1c19705cb42df43ff209bac169f675564 100644 --- a/pmu/evt_list.cpp +++ b/pmu/evt_list.cpp @@ -91,7 +91,9 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrSetBranchSampleFilter(branchSampleFilter); int err = 0; if (groupEnable) { - err = perfEvt->Init(groupEnable, evtLeader->xyCounterArray[row][col]->GetFd(), resetOutPutFd); + // If evtLeader is nullptr, I am the leader. + auto groupFd = evtLeader?evtLeader->xyCounterArray[row][col]->GetFd():-1; + err = perfEvt->Init(groupEnable, groupFd, resetOutPutFd); } else { err = perfEvt->Init(groupEnable, -1, resetOutPutFd); } @@ -109,6 +111,11 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr /proc/sys/kernel/perf_event_paranoid'"); + } + if (err == UNKNOWN_ERROR) { pcerr::SetCustomErr(err, std::string{strerror(errno)}); } @@ -163,7 +170,15 @@ void KUNPENG_PMU::EvtList::FillFields( { for (auto i = start; i < end; ++i) { data[i].cpuTopo = cpuTopo; - data[i].evt = this->pmuEvt->name.c_str(); + if (groupInfo && pmuEvt->collectType == COUNTING && i - start > 0) { + // For group events, PmuData are all read by event leader, + // and then some PmuData elements should be related to group members. + data[i].evt = groupInfo->evtGroupChildList[i-start-1]->pmuEvt->name.c_str(); + } else { + // For no group events or group leader. + data[i].evt = this->pmuEvt->name.c_str(); + } + data[i].groupId = this->groupId; if (data[i].comm == nullptr) { data[i].comm = procTopo->comm; } @@ -176,6 +191,9 @@ void KUNPENG_PMU::EvtList::FillFields( int KUNPENG_PMU::EvtList::Read(vector& data, std::vector& sampleIps, std::vector& extPool, std::vector& switchData) { + + std::unique_lock lg(mutex); + for (unsigned int row = 0; row < numCpu; row++) { for (unsigned int col = 0; col < numPid; col++) { int err = this->xyCounterArray[row][col]->BeginRead(); @@ -245,40 +263,57 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons return; } std::unique_lock lock(mutex); + this->pidList.emplace_back(shared_ptr(topology, FreeProcTopo)); + bool hasInitErr = false; + std::map perfEvtMap; for (unsigned int row = 0; row < numCpu; row++) { - this->pidList.emplace_back(shared_ptr(topology, FreeProcTopo)); - procMap[pid] = this->pidList.back(); PerfEvtPtr perfEvt = this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList.back()->tid, this->pmuEvt.get()); if (perfEvt == nullptr) { - return; + hasInitErr = true; + break; } perfEvt->SetSymbolMode(symMode); perfEvt->SetBranchSampleFilter(branchSampleFilter); int err = 0; if (groupEnable) { int sz = this->pidList.size(); - err = perfEvt->Init(groupEnable, evtLeader->xyCounterArray[row][sz - 1]->GetFd(), -1); + auto groupFd = evtLeader?evtLeader->xyCounterArray[row][sz - 1]->GetFd():-1; + err = perfEvt->Init(groupEnable, groupFd, -1); } else { err = perfEvt->Init(groupEnable, -1, -1); } if (err != SUCCESS) { - return; + hasInitErr = true; + break; } - fdList.insert(perfEvt->GetFd()); + perfEvtMap.emplace(row, perfEvt); + } + + if (!hasInitErr) { + procMap[pid] = this->pidList.back(); numPid++; - this->xyCounterArray[row].emplace_back(perfEvt); - /** - * If the current status is enable, start, read, other existing perfEvt may have been enabled and is counting, - * so the new perfEvt must also be added to enable. If the current status is read, the status of all perfEvt - * may be disable. At this time No need to collect counts. - */ - if (evtStat == ENABLE || evtStat == START) { - perfEvt->Enable(); + for (unsigned int row = 0; row < numCpu; row++) { + auto perfEvt = perfEvtMap[row]; + fdList.insert(perfEvt->GetFd()); + this->xyCounterArray[row].emplace_back(perfEvt); + /** + * If the current status is enable, start, read, other existing perfEvt may have been enabled and is counting, + * so the new perfEvt must also be added to enable. If the current status is read, the status of all perfEvt + * may be disable. At this time No need to collect counts. + */ + if (evtStat == ENABLE || evtStat == START) { + perfEvt->Enable(); + } + if (evtStat == READ && prevStat != DISABLE) { + perfEvt->Enable(); + } } - if (evtStat == READ && prevStat != DISABLE) { - perfEvt->Enable(); + } else { + for (const auto& evtPtr : perfEvtMap) { + close(evtPtr.second->GetFd()); } + this->pidList.erase(this->pidList.end() - 1); } } @@ -302,7 +337,7 @@ void KUNPENG_PMU::EvtList::ClearExitFd() int pid = it->get()->GetPid(); if (exitPidVet.find(pid) != exitPidVet.end()) { int fd = it->get()->GetFd(); - this->fdList.erase(fd); + this->fdList.erase(this->fdList.find(fd)); close(fd); it = perfVet.erase(it); continue; @@ -323,4 +358,9 @@ void KUNPENG_PMU::EvtList::ClearExitFd() procMap.erase(exitPid); numPid--; } +} + +void KUNPENG_PMU::EvtList::SetGroupInfo(const EventGroupInfo &grpInfo) +{ + this->groupInfo = unique_ptr(new EventGroupInfo(grpInfo)); } \ No newline at end of file diff --git a/pmu/evt_list.h b/pmu/evt_list.h index 109dba565737524836f466fe989e5007f47149b5..d064143d69d1ab874283d9af769ee2bfb2e96550 100644 --- a/pmu/evt_list.h +++ b/pmu/evt_list.h @@ -49,13 +49,14 @@ enum class UncoreState { OnlyOther = 0b01, }; +struct EventGroupInfo; class EvtList { public: using ProcPtr = std::shared_ptr; using CpuPtr = std::shared_ptr; EvtList(const SymbolMode &symbolMode, std::vector &cpuList, std::vector &pidList, - std::shared_ptr pmuEvt, const int group_id) - : symMode(symbolMode), cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt), group_id(group_id) + std::shared_ptr pmuEvt, const int groupId) + : symMode(symbolMode), cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt), groupId(groupId) { this->numCpu = this->cpuList.size(); this->numPid = this->pidList.size(); @@ -72,6 +73,8 @@ public: int Read(std::vector& pmuData, std::vector& sampleIps, std::vector& extPool, std::vector& switchData); + void SetGroupInfo(const EventGroupInfo &grpInfo); + void SetTimeStamp(const int64_t& timestamp) { this->ts = timestamp; @@ -99,7 +102,7 @@ public: int GetGroupId() const { - return group_id; + return groupId; } int GetBlockedSample() const @@ -121,7 +124,7 @@ private: std::vector pidList; std::vector unUsedPidList; std::shared_ptr pmuEvt; - int group_id; // event group id + int groupId; // event group id std::vector>> xyCounterArray; std::shared_ptr MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent); unsigned int numCpu = 0; @@ -134,6 +137,8 @@ private: int prevStat; int evtStat; std::mutex mutex; + // Fixme: decouple group event with normal event, use different classes to implement Read and Init. + std::unique_ptr groupInfo = nullptr; }; struct EventGroupInfo { diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter.cpp index bf801b0637a7497eb49c7387d654cc565b51cf58..9b2db6dd24fbe3bf34ef878d0f51d0163b9feebd 100644 --- a/pmu/perf_counter.cpp +++ b/pmu/perf_counter.cpp @@ -30,8 +30,20 @@ #include "perf_counter.h" using namespace std; +using namespace pcerr; static constexpr int MAX_ATTR_SIZE = 120; + +struct GroupReadFormat { + __u64 nr; + __u64 timeEnabled; + __u64 timeRunning; + struct { + __u64 value; + __u64 id; + } values[]; +}; + /** * Read pmu counter and deal with pmu multiplexing * Right now we do not implement grouping logic, thus we ignore the @@ -40,18 +52,90 @@ static constexpr int MAX_ATTR_SIZE = 120; int KUNPENG_PMU::PerfCounter::Read(vector &data, std::vector &sampleIps, std::vector &extPool, std::vector &swtichData) { - struct ReadFormat perfCountValue; - - /** - * If some how the file descriptor is less than 0, - * we make the count to be 0 and return - */ if (__glibc_unlikely(this->fd < 0)) { - this->count = 0; + this->accumCount.clear(); + return UNKNOWN_ERROR; + } + + if (groupStatus == GroupStatus::NO_GROUP) { + return ReadSingleEvent(data); + } else if (groupStatus == GroupStatus::GROUP_LEADER) { + return ReadGroupEvents(data); + } + + // Group members do not need to read counters, + // Group leader will read them all. + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector &data) +{ + ReadFormat perfCountValue; + int len = read(this->fd, &perfCountValue, sizeof(perfCountValue)); + if (len < 0) { + New(UNKNOWN_ERROR, strerror(errno)); + return UNKNOWN_ERROR; + } + if (accumCount.empty()) { + accumCount.assign(1, 0); + } + + int err = CountValueToData(perfCountValue.value, perfCountValue.timeEnabled, + perfCountValue.timeRunning, accumCount[0], data); + if (err != SUCCESS) { + return err; + } + + this->enabled = perfCountValue.timeEnabled; + this->running = perfCountValue.timeRunning; + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector &data) +{ + // Fixme: + // In current class, we do not know how many events in group. + // Then we read for max struct size: nr+timeEnabled+timeRunning+ MAX_GROUP_EVENTS*(value+id) + static const unsigned MAX_GROUP_EVENTS = 14; + unsigned readSize = sizeof(__u64)*3 + sizeof(__u64)*2*MAX_GROUP_EVENTS; + GroupReadFormat *perfCountValue = static_cast(malloc(readSize)); + if (perfCountValue == NULL) { + return COMMON_ERR_NOMEM; + } + int len = read(this->fd, perfCountValue, readSize); + if (len < 0) { + free(perfCountValue); + New(UNKNOWN_ERROR, strerror(errno)); return UNKNOWN_ERROR; } - read(this->fd, &perfCountValue, sizeof(perfCountValue)); - if (perfCountValue.value < count || perfCountValue.timeEnabled < enabled || perfCountValue.timeRunning < running) { + + if (accumCount.empty()) { + accumCount.assign(perfCountValue->nr, 0); + } + + for (int i = 0;i < accumCount.size(); ++i) { + auto err = CountValueToData(perfCountValue->values[i].value, + perfCountValue->timeEnabled, + perfCountValue->timeRunning, + accumCount[i], + data + ); + if (err != SUCCESS) { + free(perfCountValue); + return err; + } + } + + this->enabled = perfCountValue->timeEnabled; + this->running = perfCountValue->timeRunning; + free(perfCountValue); + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 timeEnabled, + const __u64 timeRunning, __u64 &accumCount, vector &data) +{ + if (value < accumCount || timeEnabled < enabled || timeRunning < running) { return LIBPERF_ERR_COUNT_OVERFLOW; } @@ -60,17 +144,14 @@ int KUNPENG_PMU::PerfCounter::Read(vector &data, std::vector(perfCountValue.timeEnabled - enabled) / static_cast(perfCountValue.timeRunning - running); - increCount = static_cast((perfCountValue.value - count)* percent); + percent = static_cast(timeEnabled - enabled) / static_cast(timeRunning - running); + increCount = static_cast((value - accumCount)* percent); } - - this->count = perfCountValue.value; - this->enabled = perfCountValue.timeEnabled; - this->running = perfCountValue.timeRunning; + accumCount = value; data.emplace_back(PmuData{0}); auto& current = data.back(); @@ -123,7 +204,13 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou * and any child events are initialized with disabled bit set to 0. Despite disabled bit being set to 0, * the child events will not start counting until the group leader is enabled. */ - attr.disabled = 0; + if (groupFd != -1) { + attr.disabled = 0; + groupStatus = GroupStatus::GROUP_MEMBER; + } else { + groupStatus = GroupStatus::GROUP_LEADER; + } + attr.read_format |= PERF_FORMAT_GROUP; this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } else { #ifdef IS_X86 @@ -136,7 +223,9 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou } else { this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } + groupStatus = GroupStatus::NO_GROUP; } + this->groupFd = groupFd; DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd); if (__glibc_unlikely(this->fd < 0)) { @@ -150,12 +239,33 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou */ int KUNPENG_PMU::PerfCounter::Enable() { + if (groupFd != -1) { + // Only group leader should use ioctl to enable, disable or reset, + // otherwise each event in the group will be collected for different durations. + return SUCCESS; + } int err = PerfEvt::Enable(); if (err != SUCCESS) { return err; } - this->count = 0; + this->accumCount.clear(); this->enabled = 0; this->running = 0; return SUCCESS; } + +int KUNPENG_PMU::PerfCounter::Disable() +{ + if (groupFd != -1) { + return SUCCESS; + } + return PerfEvt::Disable(); +} + +int KUNPENG_PMU::PerfCounter::Reset() +{ + if (groupFd != -1) { + return SUCCESS; + } + return PerfEvt::Reset(); +} \ No newline at end of file diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h index 31280c78e7ad15e260512cccbd0daf5067e81253..8937bdbf3210a198d81aab972a18d0c12ebdf5dc 100644 --- a/pmu/perf_counter.h +++ b/pmu/perf_counter.h @@ -40,12 +40,30 @@ namespace KUNPENG_PMU { std::vector &extPool, std::vector &swtichData) override; int MapPerfAttr(const bool groupEnable, const int groupFd) override; int Enable() override; + int Disable() override; + int Reset() override; private: - // Accumulated pmu count, time enabled and time running. - __u64 count = 0; - __u64 enabled = 0; - __u64 running = 0; + enum class GroupStatus + { + NO_GROUP, + GROUP_LEADER, + GROUP_MEMBER + }; + + int CountValueToData(const __u64 value, const __u64 timeEnabled, + const __u64 timeRunning, __u64 &accumCount, std::vector &data); + int ReadSingleEvent(std::vector &data); + int ReadGroupEvents(std::vector &data); + + // Accumulated pmu count, time enabled and time running. + __u64 enabled = 0; + __u64 running = 0; + // For group events, is the accum counts of all members. + // For normal events, has only one element. + std::vector<__u64> accumCount; + int groupFd = 0; + GroupStatus groupStatus = GroupStatus::NO_GROUP; }; } // namespace KUNPENG_PMU #endif diff --git a/pmu/pfm/pfm.cpp b/pmu/pfm/pfm.cpp index e516dd505933a739496378729db9f5ad5d03eab8..aae863ba2baa027fdec60ec3f816b3e8c7082b83 100644 --- a/pmu/pfm/pfm.cpp +++ b/pmu/pfm/pfm.cpp @@ -173,6 +173,7 @@ struct PmuEvt* PfmGetSpeEvent( evt->collectType = collectType; int type = GetSpeType(); if (type == -1) { + delete evt; return nullptr; } evt->type = static_cast(type); diff --git a/pmu/pfm/uncore.cpp b/pmu/pfm/uncore.cpp index 6ae72cccc080d161f02ca1efda6822a15f2d97aa..e73eb3043069c6a4d3c5fcde53e8b772f415cdd9 100644 --- a/pmu/pfm/uncore.cpp +++ b/pmu/pfm/uncore.cpp @@ -391,6 +391,7 @@ struct PmuEvt* GetUncoreEvent(const char* pmuName, int collectType) // Fill fields for uncore devices. auto err = FillUncoreFields(pmuName, pmuEvtPtr); if (err != SUCCESS) { + delete pmuEvtPtr; return nullptr; } return pmuEvtPtr; @@ -414,6 +415,7 @@ struct PmuEvt* GetUncoreRawEvent(const char* pmuName, int collectType) // Fill fields for uncore devices. auto err = FillUncoreFields(pmuName, pmuEvtPtr); if (err != SUCCESS) { + delete pmuEvtPtr; return nullptr; } return pmuEvtPtr; diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index cba1713aedba628746537d7b283d96a82248f257..30de9ef1bdfb717340b7ba300964c85bfec7a49e 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -270,12 +270,12 @@ static void CopyAttrData(PmuAttr* newAttr, PmuAttr* inputAttr, enum PmuTaskType newAttr->evtList = newEvtList; newAttr->numEvt = inputAttr->numEvt; - // If the event group ID is not enabled, set the group_id to -1. It indicates that the event is not grouped. + // If the event group ID is not enabled, set the groupId to -1. It indicates that the event is not grouped. if ((collectType == SAMPLING || collectType == COUNTING) && inputAttr->evtAttr == nullptr) { struct EvtAttr *evtAttr = new struct EvtAttr[newAttr->numEvt]; // handle event group id. -1 means that it doesn't run event group feature. for (int i = 0; i < newAttr->numEvt; ++i) { - evtAttr[i].group_id = -1; + evtAttr[i].groupId = -1; } newAttr->evtAttr = evtAttr; } @@ -289,13 +289,13 @@ static bool FreeEvtAttr(struct PmuAttr *attr) bool flag = false; int notGroupId = -1; for (int i = 0; i < attr->numEvt; ++i) { - if (attr->evtAttr[i].group_id != notGroupId ) { + if (attr->evtAttr[i].groupId != notGroupId ) { flag = true; break; } } - // when the values of group_id are all -1, the applied memory is released. + // when the values of groupId are all -1, the applied memory is released. if (!flag) { delete[] attr->evtAttr; attr->evtAttr = nullptr; @@ -427,6 +427,7 @@ static void PmuTaskAttrFree(PmuTaskAttr *taskAttr) int PmuOpen(enum PmuTaskType collectType, struct PmuAttr *attr) { SetWarn(SUCCESS); + New(SUCCESS); PmuAttr copiedAttr = *attr; pair previousEventList = {0, nullptr}; try { @@ -830,25 +831,7 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt } } -static bool PerfEventSupported(__u64 type, __u64 config) -{ - perf_event_attr attr{}; - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(struct perf_event_attr); - attr.type = type; - attr.config = config; - attr.disabled = 1; - attr.inherit = 1; - attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; - int fd = KUNPENG_PMU::PerfEventOpen(&attr, -1, 0, -1, 0); - if (fd < 0) { - return false; - } - close(fd); - return true; -} - -static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int group_id) +static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int groupId) { unique_ptr taskParam(CreateNode(), PmuTaskAttrFree); /** @@ -879,25 +862,13 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att #endif return nullptr; } - - if (!PerfEventSupported(pmuEvt->type, pmuEvt->config)) { - int err = MapErrno(errno); - if (err == LIBPERF_ERR_NO_PERMISSION) { - New(LIBPERF_ERR_NO_PERMISSION, "Current user does not have the permission to collect the event.Swtich to the root user and run the 'echo -1 > /proc/sys/kernel/perf_event_paranoid'"); - } else if(err == UNKNOWN_ERROR) { - New(UNKNOWN_ERROR, std::string{strerror(errno)}); - } else { - New(err); - } - return nullptr; - } } /** * Assign cpus to collect */ PrepareCpuList(attr, taskParam.get(), pmuEvt); - taskParam->group_id = group_id; + taskParam->groupId = groupId; taskParam->pmuEvt = shared_ptr(pmuEvt, PmuEvtFree); taskParam->pmuEvt->useFreq = attr->useFreq; @@ -919,7 +890,7 @@ struct PmuTaskAttr* AssignPmuTaskParam(enum PmuTaskType collectType, struct PmuA return taskParam; } for (int i = 0; i < attr->numEvt; i++) { - struct PmuTaskAttr* current = AssignTaskParam(collectType, attr, attr->evtList[i], attr->evtAttr[i].group_id); + struct PmuTaskAttr* current = AssignTaskParam(collectType, attr, attr->evtList[i], attr->evtAttr[i].groupId); if (current == nullptr) { return nullptr; } diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index d0a47c4f39b9eac93ee4ca36042614c539336387..9e8feb0feb8fa9716f1f8ebef92274091d90bbc3 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -15,6 +15,7 @@ ******************************************************************************/ #include #include +#include #include #include "linked_list.h" #include "cpu_map.h" @@ -149,7 +150,7 @@ namespace KUNPENG_PMU { } fdNum += CalRequireFd(cpuTopoList.size(), procTopoList.size(), taskParam->pmuEvt->collectType); std::shared_ptr evtList = - std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->group_id); + std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId); needBytesNum += PredictRequiredMemory(taskParam->pmuEvt->collectType, cpuTopoList.size(), procTopoList.size()); evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd)); InsertEvtList(pd, evtList); @@ -198,7 +199,7 @@ namespace KUNPENG_PMU { continue; } if (eventGroupInfoMap.find(evtList->GetGroupId()) == eventGroupInfoMap.end()) { - auto err = EvtInit(false, nullptr, pd, evtList, isMemoryEnough); + auto err = EvtInit(true, nullptr, pd, evtList, isMemoryEnough); if (err != SUCCESS) { return err; } @@ -232,6 +233,7 @@ namespace KUNPENG_PMU { return err; } } + evtGroup.second.evtLeader->SetGroupInfo(evtGroup.second); } groupMapPtr eventDataEvtGroup = std::make_shared>(eventGroupInfoMap); InsertDataEvtGroupList(pd, eventDataEvtGroup); @@ -294,7 +296,34 @@ namespace KUNPENG_PMU { } } - void HandleBlockData(std::vector& pmuData, std::vector& switchData) + + void SortTwoVector(std::vector& pmuData, std::vector& sampleIps) + { + std::vector indices(pmuData.size()); + std::iota(indices.begin(), indices.end(), 0); + std::stable_sort(indices.begin(), indices.end(), [&pmuData](size_t a, size_t b){ + if (pmuData[a].tid == pmuData[b].tid) { + return pmuData[a].ts < pmuData[b].ts; + } + return pmuData[a].tid < pmuData[b].tid; + }); + + std::vector sortedPmuData; + std::vector sortedSampleIps; + size_t size = pmuData.size(); + sortedPmuData.reserve(size); + sortedSampleIps.reserve(size); + + for (size_t i = 0; i < size; ++i) { + sortedPmuData.emplace_back(std::move(pmuData[indices[i]])); + sortedSampleIps.emplace_back(std::move(sampleIps[indices[i]])); + } + pmuData = std::move(sortedPmuData); + sampleIps = std::move(sortedSampleIps); + } + + void HandleBlockData(std::vector& pmuData, std::vector& sampleIps, + SymbolMode symMode,std::vector& switchData) { std::sort(switchData.begin(), switchData.end(), [](const PmuSwitchData& a, const PmuSwitchData& b) { if (a.tid == b.tid) { @@ -305,7 +334,7 @@ namespace KUNPENG_PMU { std::unordered_map> tidToOffTimeStamps; int64_t outTime = 0; int prevTid = -1; - for (const auto& item: switchData) { + for (const auto& item : switchData) { if (item.swOut) { outTime = item.ts; prevTid = item.tid; @@ -323,19 +352,13 @@ namespace KUNPENG_PMU { } } } - - std::sort(pmuData.begin(), pmuData.end(), [](const PmuData& a, const PmuData& b) { - if (a.tid == b.tid) { - return a.ts < b.ts; - } - return a.tid < b.tid; - }); + SortTwoVector(pmuData, sampleIps); int csCnt = 0; int64_t prevTs = 0; int64_t currentTs = 0; int64_t curPeriod = 0; int currentTid = -1; - for (auto& item: pmuData) { + for (auto& item : pmuData) { if (currentTid != item.tid) { currentTid = item.tid; csCnt = 0; @@ -347,7 +370,9 @@ namespace KUNPENG_PMU { if (strcmp(item.evt, "context-switches") == 0) { // Convert stack from 'schedule[kernel] -> futex_wait[kernel] -> ...[kernel] -> lock_wait -> start_thread' // to 'lock_wait -> start_thread', only keeping user stack. - TrimKernelStack(item); + if (symMode != NO_SYMBOL_RESOLVE) { + TrimKernelStack(item); + } // Before the context-switches event, there is only one cycles event, which we need to ignore. if (currentTs == 0) { currentTs = item.ts; @@ -444,6 +469,7 @@ namespace KUNPENG_PMU { void PmuList::Close(const int pd) { + EraseDummyEvent(pd); auto evtList = GetEvtList(pd); for (auto item: evtList) { item->Close(); @@ -455,7 +481,6 @@ namespace KUNPENG_PMU { EraseDataEvtGroupList(pd); RemoveEpollFd(pd); EraseSpeCpu(pd); - EraseDummyEvent(pd); EraseParentEventMap(pd); SymResolverDestroy(); PmuEventListFree(); @@ -667,7 +692,6 @@ namespace KUNPENG_PMU { } auto& eventData = userDataList[iPmuData]; - auto symMode = symModeList[eventData.pd]; for (size_t i = 0; i < eventData.data.size(); ++i) { auto& pmuData = eventData.data[i]; auto& ipsData = eventData.sampleIps[i]; @@ -675,6 +699,15 @@ namespace KUNPENG_PMU { pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); } } + if (GetBlockedSampleState(eventData.pd) == 1) { + for (auto& item : eventData.data) { + if (strcmp(item.evt, "context-switches") == 0) { + // Convert stack from 'schedule[kernel] -> futex_wait[kernel] -> ...[kernel] -> lock_wait -> start_thread' + // to 'lock_wait -> start_thread', only keeping user stack. + TrimKernelStack(item); + } + } + } New(SUCCESS); return SUCCESS; } @@ -760,7 +793,8 @@ namespace KUNPENG_PMU { } else { FillStackInfo(evData); if (GetBlockedSampleState(pd) == 1) { - HandleBlockData(evData.data, evData.switchData); + auto symMode = symModeList[evData.pd]; + HandleBlockData(evData.data, evData.sampleIps, symMode, evData.switchData); } auto inserted = userDataList.emplace(pData, move(evData)); dataList.erase(pd); diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h index 523e2ad96b18c34f34fa99665897aba9054ff56c..99539d297dd002069f14b4cf9f2cb8841a70406a 100644 --- a/pmu/pmu_list.h +++ b/pmu/pmu_list.h @@ -34,7 +34,7 @@ struct PmuTaskAttr { // list length has to be as the same as numPid std::shared_ptr pmuEvt; // which pmu to be collected - int group_id; // event group id + int groupId; // event group id struct PmuTaskAttr* next; // next task attribute }; diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 6998c5f6fc7c88392da89d1cb9e4e8d0f6cb70ac..64c9908f88c29e55b3fb00ceaabda4d13d388e85 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -84,11 +84,13 @@ namespace KUNPENG_PMU { {PmuDeviceMetric::PMU_PCIE_RX_MWR_BW, "PMU_PCIE_RX_MWR_BW"}, {PmuDeviceMetric::PMU_PCIE_TX_MRD_BW, "PMU_PCIE_TX_MRD_BW"}, {PmuDeviceMetric::PMU_PCIE_TX_MWR_BW, "PMU_PCIE_TX_MWR_BW"}, - {PmuDeviceMetric::PMU_SMMU_TRAN, "PMU_SMMU_TRAN"} + {PmuDeviceMetric::PMU_SMMU_TRAN, "PMU_SMMU_TRAN"}, + {PmuDeviceMetric::PMU_HHA_CROSS_NUMA, "PMU_HHA_CROSS_NUMA"}, + {PmuDeviceMetric::PMU_HHA_CROSS_SOCKET, "PMU_HHA_CROSS_SOCKET"}, }; set percoreMetric = {PMU_L3_TRAFFIC, PMU_L3_MISS, PMU_L3_REF}; - set pernumaMetric = {PMU_L3_LAT}; + set pernumaMetric = {PMU_HHA_CROSS_NUMA, PMU_HHA_CROSS_SOCKET}; set perClusterMetric = {PMU_L3_LAT}; set perChannelMetric = {PMU_DDR_READ_BW, PMU_DDR_WRITE_BW}; set perpcieMetric = {PMU_PCIE_RX_MRD_BW, @@ -269,6 +271,30 @@ namespace KUNPENG_PMU { 2 } }; + + PMU_METRIC_PAIR HHA_CROSS_NUMA = { + PmuDeviceMetric::PMU_HHA_CROSS_NUMA, + { + "hisi_sccl", + "hha", + {"0x0", "0x02"}, + "", + "", + 0 + } + }; + + PMU_METRIC_PAIR HHA_CROSS_SOCKET = { + PmuDeviceMetric::PMU_HHA_CROSS_SOCKET, + { + "hisi_sccl", + "hha", + {"0x0", "0x01"}, + "", + "", + 0 + } + }; } static const map HIP_A_UNCORE_METRIC_MAP { @@ -278,6 +304,8 @@ namespace KUNPENG_PMU { METRIC_CONFIG::L3_MISS, METRIC_CONFIG::L3_REF, METRIC_CONFIG::SMMU_TRAN, + METRIC_CONFIG::HHA_CROSS_NUMA, + METRIC_CONFIG::HHA_CROSS_SOCKET, }; static const map HIP_B_UNCORE_METRIC_MAP { @@ -292,6 +320,8 @@ namespace KUNPENG_PMU { METRIC_CONFIG::PCIE_TX_MRD_BW, METRIC_CONFIG::PCIE_TX_MWR_BW, METRIC_CONFIG::SMMU_TRAN, + METRIC_CONFIG::HHA_CROSS_NUMA, + METRIC_CONFIG::HHA_CROSS_SOCKET, }; static const map L3_CLOCK_NS {{CHIP_TYPE::HIPB, 0.3448275862}}; @@ -854,7 +884,7 @@ namespace KUNPENG_PMU { } // remove duplicate device attribute - static int RemoveDupDeviceAttr(struct PmuDeviceAttr *attr, unsigned len, std::vector& deviceAttr, bool l3ReDup) + static int RemoveDupDeviceAttr(struct PmuDeviceAttr *attr, unsigned len, std::vector& deviceAttr) { std::unordered_set uniqueSet; for (int i = 0; i < len; ++i) { @@ -866,17 +896,6 @@ namespace KUNPENG_PMU { } if (uniqueSet.find(key) == uniqueSet.end()) { - // when in deviceopen remove the same PMU_L3_TRAFFIC and PMU_L3_REF, - // but when getDevMetric we need to keep them. - if (l3ReDup == true && - (attr[i].metric == PmuDeviceMetric::PMU_L3_TRAFFIC || attr[i].metric == PmuDeviceMetric::PMU_L3_REF)) { - if (uniqueSet.find(std::to_string(PmuDeviceMetric::PMU_L3_TRAFFIC)) != uniqueSet.end()) { - continue; - } - if (uniqueSet.find(std::to_string(PmuDeviceMetric::PMU_L3_REF)) != uniqueSet.end()) { - continue; - } - } uniqueSet.insert(key); deviceAttr.emplace_back(attr[i]); } @@ -970,6 +989,9 @@ namespace KUNPENG_PMU { case PMU_PCIE_TX_MWR_BW: case PMU_SMMU_TRAN: return PMU_METRIC_BDF; + case PMU_HHA_CROSS_NUMA: + case PMU_HHA_CROSS_SOCKET: + return PMU_METRIC_NUMA; } return PMU_METRIC_INVALID; } @@ -990,25 +1012,57 @@ namespace KUNPENG_PMU { int AggregateByNuma(const PmuDeviceMetric metric, const vector &rawData, vector &devData) { - map devDataByNuma; + const auto& deviceConfig = GetDeviceMtricConfig(); + const auto& findConfig = deviceConfig.find(metric); + if (findConfig == deviceConfig.end()) { + return SUCCESS; + } + auto &evts = findConfig->second.events; + if (evts.size() != 2) { + return SUCCESS; + } + // Event name for total access count. + string totalEvt = evts[0]; + // Event name for cross-numa/cross-socket count. + string crossEvt = evts[1]; + // Sort data by numa, and then sort by event string. + map> devDataByNuma; for (auto &data : rawData) { + string devName; + string evtName; + if (!GetDeviceName(data.evtName, devName, evtName)) { + continue; + } + auto evtConfig = ExtractEvtStr("config", evtName); auto findData = devDataByNuma.find(data.numaId); if (findData == devDataByNuma.end()) { - PmuDeviceData outData; - outData.metric = data.metric; - outData.count = data.count; - outData.mode = GetMetricMode(data.metric); - outData.numaId = data.numaId; - devDataByNuma[data.numaId] = outData; + devDataByNuma[data.numaId][evtConfig] = data; } else { - findData->second.count += data.count; + devDataByNuma[data.numaId][evtConfig].count += data.count; } } for (auto &data : devDataByNuma) { - devData.push_back(data.second); + // Get events of cross-numa/cross-socket access count and total access count. + auto findcrossData = data.second.find(crossEvt); + auto findtotalData = data.second.find(totalEvt); + if (findcrossData == data.second.end() || findtotalData == data.second.end()) { + continue; + } + // Compute ratio: cross access count / total access count + double ratio = 0.0; + if (findtotalData->second.count != 0) { + ratio = (double)(findcrossData->second.count) / findtotalData->second.count; + } else { + ratio = -1; + } + PmuDeviceData outData; + outData.metric = metric; + outData.count = ratio; + outData.mode = GetMetricMode(metric); + outData.numaId = data.first; + devData.push_back(outData); } - return SUCCESS; } @@ -1113,7 +1167,7 @@ namespace KUNPENG_PMU { return ddrcIndex; } - static bool getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) + static bool getChannelId(const char *evt, unsigned &channelId) { string devName; string evtName; @@ -1155,7 +1209,7 @@ namespace KUNPENG_PMU { unordered_map, PmuDeviceData, channelKeyHash> devDataByChannel; //Key: socketId, channelId, ddrNumaId for (auto &data : rawData) { unsigned channelId; - if (!getChannelId(data.evtName, data.ddrNumaId, channelId)) { + if (!getChannelId(data.evtName, channelId)) { continue; } auto ddrDatakey = make_tuple(data.socketId, channelId, data.ddrNumaId); @@ -1278,6 +1332,8 @@ namespace KUNPENG_PMU { {PMU_PCIE_TX_MRD_BW, PcieBWAggregate}, {PMU_PCIE_TX_MWR_BW, PcieBWAggregate}, {PMU_SMMU_TRAN, SmmuTransAggregate}, + {PMU_HHA_CROSS_NUMA, AggregateByNuma}, + {PMU_HHA_CROSS_SOCKET, AggregateByNuma}, }; static bool IsMetricEvent(const string &devName, const string &evtName, const PmuDeviceAttr &devAttr) @@ -1380,7 +1436,7 @@ namespace KUNPENG_PMU { if (perClusterMetric.find(devAttr.metric) != perClusterMetric.end()) { devData.clusterId = pmuData[i].cpuTopo->coreId / clusterWidth; } - if (perChannelMetric.find(devAttr.metric) != pernumaMetric.end()) { + if (perChannelMetric.find(devAttr.metric) != perChannelMetric.end()) { devData.ddrNumaId = pmuData[i].cpuTopo->numaId; devData.socketId = pmuData[i].cpuTopo->socketId; } @@ -1468,7 +1524,7 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) } // Remove duplicate device attributes. vector deviceAttr; - if (RemoveDupDeviceAttr(attr, len, deviceAttr, true) != SUCCESS) { + if (RemoveDupDeviceAttr(attr, len, deviceAttr) != SUCCESS) { return -1; } vector configEvtList; @@ -1480,8 +1536,17 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) configEvtList.insert(configEvtList.end(), temp.begin(), temp.end()); } - vector evts; + //remove the same event of PMU_L3_TRAFFIC and PMU_L3_REF, PMU_HHA_CROSS_NUMA and PMU_HHA_CROSS_SOCKET + unordered_set tmpEvents; + vector filteredEvtList; for (auto& evt : configEvtList) { + if (tmpEvents.find(evt) == tmpEvents.end()) { + tmpEvents.insert(evt); + filteredEvtList.push_back(evt); + } + } + vector evts; + for (auto& evt : filteredEvtList) { evts.push_back(const_cast(evt.c_str())); } @@ -1533,7 +1598,7 @@ int PmuGetDevMetric(struct PmuData *pmuData, unsigned len, } // Remove duplicate device attributes. vector deviceAttr; - if (RemoveDupDeviceAttr(attr, attrLen, deviceAttr, false) != SUCCESS) { + if (RemoveDupDeviceAttr(attr, attrLen, deviceAttr) != SUCCESS) { return -1; } // Filter pmuData by metric and generate InnerDeviceData, @@ -1671,21 +1736,21 @@ int PmuGetNumaCore(unsigned nodeId, unsigned **coreList) ifstream in(nodeListFile); if (!in.is_open()) { New(LIBPERF_ERR_KERNEL_NOT_SUPPORT); - return LIBPERF_ERR_KERNEL_NOT_SUPPORT; + return -1; } std::string cpulist; in >> cpulist; auto split = SplitStringByDelimiter(cpulist, '-'); if (split.size() != 2) { New(LIBPERF_ERR_KERNEL_NOT_SUPPORT); - return LIBPERF_ERR_KERNEL_NOT_SUPPORT; + return -1; } auto start = stoi(split[0]); auto end = stoi(split[1]); int coreNums = end - start + 1; if (coreNums <= 0) { New(LIBPERF_ERR_KERNEL_NOT_SUPPORT); - return LIBPERF_ERR_KERNEL_NOT_SUPPORT; + return -1; } InitializeCoreArray(); *coreList = &coreArray[start]; diff --git a/pmu/sample_process.cpp b/pmu/sample_process.cpp index 778b62222b2a3cc5654d0034f216a2fd34bd8b35..5f30eb6f0310c2d3c86386ad99996d446baa5ce2 100644 --- a/pmu/sample_process.cpp +++ b/pmu/sample_process.cpp @@ -22,33 +22,6 @@ #define PAGE_SIZE (sysconf(_SC_PAGESIZE)) #define MB() asm volatile("dmb ish" ::: "memory") static constexpr int MAX_DATA_SIZE = 8192; -#ifdef IS_X86 -#define PerfRingbufferSmpStoreRelease(p, v) \ - ({ \ - union { \ - typeof(*p) val; \ - char charHead[1]; \ - } pointerUnion = {.val = (v)}; \ - asm volatile("mov %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ - }) -#else -#define PerfRingbufferSmpStoreRelease(p, v) \ - ({ \ - union { \ - typeof(*p) val; \ - char charHead[1]; \ - } pointerUnion = {.val = (v)}; \ - asm volatile("stlr %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ - }) -#endif - -void KUNPENG_PMU::PerfMmapConsume(PerfMmap &map) -{ - - __u64 prev = map.prev; - struct perf_event_mmap_page *base = (struct perf_event_mmap_page *)map.base; - PerfRingbufferSmpStoreRelease(&base->data_tail, prev); -} void KUNPENG_PMU::PerfMmapReadDone(PerfMmap &map) { diff --git a/pmu/sample_process.h b/pmu/sample_process.h index 8ad940755b93ec827f80d50a8a13f329fcb88bbf..f012335b76b1dbbd3ce08c8f045810ac8bffd1df 100644 --- a/pmu/sample_process.h +++ b/pmu/sample_process.h @@ -17,13 +17,39 @@ #define PMU_SAMPLE_PROCESS_H #include #include "pmu_event.h" +#include "common.h" + +#ifdef IS_X86 +#define PerfRingbufferSmpStoreRelease(p, v) \ + ({ \ + union { \ + typeof(*p) val; \ + char charHead[1]; \ + } pointerUnion = {.val = (v)}; \ + asm volatile("mov %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ + }) +#else +#define PerfRingbufferSmpStoreRelease(p, v) \ + ({ \ + union { \ + typeof(*p) val; \ + char charHead[1]; \ + } pointerUnion = {.val = (v)}; \ + asm volatile("stlr %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ + }) +#endif namespace KUNPENG_PMU { int MmapInit(PerfMmap& sampleMmap); union PerfEvent* ReadEvent(PerfMmap& map); int RingbufferReadInit(PerfMmap& map); - void PerfMmapConsume(PerfMmap& map); + inline void PerfMmapConsume(PerfMmap& map) + { + __u64 prev = map.prev; + struct perf_event_mmap_page *base = (struct perf_event_mmap_page *)map.base; + PerfRingbufferSmpStoreRelease(&base->data_tail, prev); + } void PerfMmapReadDone(PerfMmap& map); } // namespace KUNPENG_PMU diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index cab155621d5ddf0b295aad7042d2a558b70c0ce8..cb78a80f1e896f4d64f972ddf0b08dd0f9834f57 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -129,7 +129,7 @@ int KUNPENG_PMU::PerfSampler::Close() return SUCCESS; } -void KUNPENG_PMU::PerfSampler::UpdatePidInfo(const pid_t &pid, const int &tid) +void KUNPENG_PMU::PerfSampler::UpdatePidInfo(const int &tid) { auto findProc = procMap.find(tid); if (findProc == procMap.end()) { @@ -149,6 +149,9 @@ void KUNPENG_PMU::PerfSampler::UpdateCommInfo(KUNPENG_PMU::PerfEvent *event) procTopo->tid = event->comm.tid; procTopo->pid = event->comm.pid; procTopo->comm = static_cast(malloc(strlen(event->comm.comm) + 1)); + if (procTopo->comm == nullptr) { + return; + } strcpy(procTopo->comm, event->comm.comm); DBG_PRINT("Add to proc map: %d\n", event->comm.tid); procMap[event->comm.tid] = procTopo; @@ -217,6 +220,7 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( return; } KUNPENG_PMU::PerfRawSample *sample = (KUNPENG_PMU::PerfRawSample *)event->sample.array; + ips->ips.reserve(ips->ips.size() + sample->nr); // Copy ips from ring buffer and get stack info later. if (evt->callStack == 0) { int i = 0; @@ -224,12 +228,13 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( i++; } if (i < sample->nr) { - ips->ips.push_back(sample->ips[i]); + ips->ips.emplace_back(sample->ips[i]); } } else { for (int i = sample->nr - 1; i >= 0; --i) { - if (IsValidIp(sample->ips[i])) { - ips->ips.push_back(sample->ips[i]); + const auto& ip = sample->ips[i]; + if (IsValidIp(ip)) { + ips->ips.emplace_back(ip); } } } @@ -264,7 +269,7 @@ void KUNPENG_PMU::PerfSampler::ReadRingBuffer(vector &data, vectormmap.tid, event->mmap.filename, event->mmap.addr); } else if (symMode == RESOLVE_ELF) { SymResolverUpdateModuleNoDwarf(event->mmap.tid, event->mmap.filename, event->mmap.addr); @@ -272,7 +277,7 @@ void KUNPENG_PMU::PerfSampler::ReadRingBuffer(vector &data, vectormmap2.tid, event->mmap2.filename, event->mmap2.addr); } else if (symMode == RESOLVE_ELF) { SymResolverUpdateModuleNoDwarf(event->mmap2.tid, event->mmap2.filename, event->mmap2.addr); @@ -281,7 +286,7 @@ void KUNPENG_PMU::PerfSampler::ReadRingBuffer(vector &data, vectorfork.pid, event->fork.tid); - UpdatePidInfo(event->fork.pid, event->fork.tid); + UpdatePidInfo(event->fork.tid); break; } case PERF_RECORD_COMM: { @@ -308,7 +313,7 @@ void KUNPENG_PMU::PerfSampler::FillComm(const size_t &start, const size_t &end, auto& pmuData = data[i]; auto findProc = procMap.find(pmuData.tid); if (findProc == procMap.end()) { - UpdatePidInfo(pmuData.pid, pmuData.tid); + UpdatePidInfo(pmuData.tid); findProc = procMap.find(pmuData.tid); if (findProc == procMap.end()) { continue; diff --git a/pmu/sampler.h b/pmu/sampler.h index 41fcdff17aa7b90cb1c911ce56e04950bbb4f2c2..6c49a74d6e594aec3cb18bd3087743c087e9b088 100644 --- a/pmu/sampler.h +++ b/pmu/sampler.h @@ -59,7 +59,7 @@ namespace KUNPENG_PMU { void ReadRingBuffer(std::vector &data, std::vector &sampleIps, std::vector &extPool, std::vector &switchData); void FillComm(const size_t &start, const size_t &end, std::vector &data); - void UpdatePidInfo(const pid_t &pid, const int &tid); + void UpdatePidInfo(const int &tid); void UpdateCommInfo(KUNPENG_PMU::PerfEvent *event); void ParseSwitch(KUNPENG_PMU::PerfEvent *event, struct PmuSwitchData *switchCurData); void ParseBranchSampleData(struct PmuData *pmuData, PerfRawSample *sample, union PerfEvent *event, std::vector &extPool); diff --git a/pmu/spe.cpp b/pmu/spe.cpp index 2bc42c400bb7ba277a6da589d52262e2d5f06ea8..a8cc7d9570c85c5c9ce719f0c316f2a775582b11 100644 --- a/pmu/spe.cpp +++ b/pmu/spe.cpp @@ -132,14 +132,17 @@ static void CoreSpeClose(struct SpeCoreContext *ctx, struct SpeContext *speCtx) { if (ctx->speMpage && ctx->speMpage != MAP_FAILED) { munmap(ctx->speMpage, speCtx->speMmapSize); + ctx->speMpage = nullptr; } if (ctx->auxMpage && ctx->auxMpage != MAP_FAILED) { munmap(ctx->auxMpage, speCtx->auxMmapSize); + ctx->auxMpage = nullptr; } if (ctx->dummyMpage && ctx->dummyMpage != MAP_FAILED) { munmap(ctx->dummyMpage, speCtx->dummyMmapSize); + ctx->dummyMpage = nullptr; } if (ctx->speFd > 0) { @@ -211,6 +214,7 @@ int SpeOpen(PmuEvt *attr, int cpu, SpeContext *ctx) if (attr->type == -1) { free(ctx); + ctx = nullptr; return LIBPERF_ERR_SPE_UNAVAIL; } @@ -225,6 +229,7 @@ int SpeOpen(PmuEvt *attr, int cpu, SpeContext *ctx) ctx->coreCtxes = (struct SpeCoreContext *)malloc(sizeof(struct SpeCoreContext)); if (!ctx->coreCtxes) { free(ctx); + ctx = nullptr; return COMMON_ERR_NOMEM; } ctx->coreCtxes->mask = ctx->auxMmapSize - 1; @@ -233,7 +238,9 @@ int SpeOpen(PmuEvt *attr, int cpu, SpeContext *ctx) auto err = CoreSpeOpen(&ctx->coreCtxes, ctx, attr, cpu); if (err != 0) { free(ctx->coreCtxes); + ctx->coreCtxes = nullptr; free(ctx); + ctx = nullptr; return err; } return SUCCESS; @@ -304,7 +311,9 @@ void SpeClose(struct SpeContext *ctx) } free(ctx->coreCtxes); + ctx->coreCtxes = nullptr; free(ctx); + ctx = nullptr; return; } @@ -478,7 +487,7 @@ static struct SpeRecord *CoreAuxData(struct SpeCoreContext *ctx, AuxContext *aux return bufEnd; } -static size_t ComputeAuxSize(size_t auxMapLen, size_t headOff, size_t oldOff, int pageSize) +static size_t ComputeAuxSize(size_t auxMapLen, size_t headOff, size_t oldOff) { // Compute current aux buffer size by current offset and previous offset. size_t size = 0; @@ -493,7 +502,7 @@ static size_t ComputeAuxSize(size_t auxMapLen, size_t headOff, size_t oldOff, in } static struct SpeRecord *CoreSpeData(struct SpeCoreContext *ctx, struct ContextSwitchData *dummyData, - struct SpeRecord *buf, int *remainSize, int pageSize, int cpu) + struct SpeRecord *buf, int *remainSize, int cpu) { int dummyIdx = 1; struct perf_event_mmap_page *mpage = (struct perf_event_mmap_page *)ctx->speMpage; @@ -505,7 +514,7 @@ static struct SpeRecord *CoreSpeData(struct SpeCoreContext *ctx, struct ContextS } size_t headOff = head & ctx->mask; size_t oldOff = old & ctx->mask; - size_t size = ComputeAuxSize(mpage->aux_size, headOff, oldOff, pageSize); + size_t size = ComputeAuxSize(mpage->aux_size, headOff, oldOff); size_t auxOffset = 0; struct SpeRecord *bufEnd = nullptr; @@ -545,7 +554,7 @@ int Spe::SpeReadData(struct SpeContext *context, struct SpeRecord *buf, int size int remainSize = size; int dummySize = context->dummyMmapSize; CoreDummyData(context->coreCtxes, dummyData, dummySize, context->pageSize); - CoreSpeData(context->coreCtxes, dummyData, buf, &remainSize, context->pageSize, cpu); + CoreSpeData(context->coreCtxes, dummyData, buf, &remainSize, cpu); return size - remainSize; } diff --git a/python/modules/_libkperf/Config.py b/python/modules/_libkperf/Config.py index aef5f1920bafd329be96b973e853a30f09db0ea0..19d545adc28a2c542b107f6172c3c41e907ff6f1 100644 --- a/python/modules/_libkperf/Config.py +++ b/python/modules/_libkperf/Config.py @@ -23,16 +23,16 @@ VERSION = '1.0' UTF_8 = 'utf-8' -def lib_path() -> str: +def lib_path(): return os.path.dirname(os.path.abspath(__file__)) -def libsym_path() -> str: +def libsym_path(): libsym = 'libsym.so' return os.path.join(lib_path(), libsym) -def libkperf_path() -> str: +def libkperf_path(): libkperf = 'libkperf.so' return os.path.join(lib_path(), libkperf) diff --git a/python/modules/_libkperf/Perror.py b/python/modules/_libkperf/Perror.py index b2e474d3cab5cca5e2cee8fbcdc9d7edfd2121c6..ba65041fdbe1fe717596d6560fdc1af5d4c33590 100644 --- a/python/modules/_libkperf/Perror.py +++ b/python/modules/_libkperf/Perror.py @@ -17,7 +17,7 @@ import ctypes from .Config import UTF_8, sym_so -def Perrorno() -> int: +def Perrorno(): """ int Perrorno(); """ @@ -28,7 +28,7 @@ def Perrorno() -> int: return c_Perrorno() -def Perror() -> str: +def Perror(): """ const char* Perror(); """ @@ -39,7 +39,7 @@ def Perror() -> str: return c_Perror().decode(UTF_8) -def GetWarn() -> int: +def GetWarn(): """ int GetWarn(); """ @@ -50,7 +50,7 @@ def GetWarn() -> int: return c_GetWarn() -def GetWarnMsg() -> str: +def GetWarnMsg(): """ const char* GetWarnMsg(); """ diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index 2cffe19b5da9c202b6b127743cda904d09f3d855..8cead80ba3372eefc11193fd4c3d2aebe04aad35 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -28,35 +28,35 @@ class SampleRateUnion(ctypes.Union): class CtypesEvtAttr(ctypes.Structure): """ struct EvtAttr { - int group_id; + int groupId; }; """ - _fields_ = [('group_id', ctypes.c_int)] + _fields_ = [('groupId', ctypes.c_int)] - def __init__(self, group_id: int=0, *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) - self.group_id = ctypes.c_int(group_id) + def __init__(self, groupId=0, *args, **kw): + super(CtypesEvtAttr, self).__init__(*args, **kw) + self.groupId = ctypes.c_int(groupId) class EvtAttr: __slots__ = ['__c_evt_attr'] - def __init__(self, group_id: int=0) -> None: - self.__c_evt_attr = CtypesEvtAttr(group_id) + def __init__(self, groupId=0): + self.__c_evt_attr = CtypesEvtAttr(groupId) @property - def c_evt_attr(self) -> CtypesEvtAttr: + def c_evt_attr(self): return self.__c_evt_attr @property - def group_id(self) -> int: - return int(self.c_evt_attr.group_id) + def groupId(self): + return int(self.c_evt_attr.groupId) - @group_id.setter - def group_id(self, group_id: int) -> None: - self.c_evt_attr.group_id = ctypes.c_int(group_id) + @groupId.setter + def groupId(self, groupId): + self.c_evt_attr.groupId = ctypes.c_int(groupId) @classmethod - def from_c_evt_attr(cls, c_evt_attr: CtypesEvtAttr) -> 'EvtAttr': + def from_c_evt_attr(cls, c_evt_attr): evt_attr = cls() evt_attr.__c_evt_attr = c_evt_attr return evt_attr @@ -115,24 +115,24 @@ class CtypesPmuAttr(ctypes.Structure): ] def __init__(self, - evtList: List[str]=None, - pidList: List[int]=None, - cpuList: List[int]=None, - evtAttr: List[int]=None, - sampleRate: int=0, - useFreq: bool=False, - excludeUser: bool=False, - excludeKernel: bool=False, - symbolMode: int=0, - callStack: bool=False, - blockedSample: bool=False, - dataFilter: int=0, - evFilter: int=0, - minLatency: int=0, - includeNewFork: bool=False, - branchSampleFilter: int=0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + evtList=None, + pidList=None, + cpuList=None, + evtAttr=None, + sampleRate=0, + useFreq=False, + excludeUser=False, + excludeKernel=False, + symbolMode=0, + callStack=False, + blockedSample=False, + dataFilter=0, + evFilter=0, + minLatency=0, + includeNewFork=False, + branchSampleFilter=0, + *args, **kw): + super(CtypesPmuAttr, self).__init__(*args, **kw) if evtList: numEvt = len(evtList) @@ -183,26 +183,26 @@ class CtypesPmuAttr(ctypes.Structure): self.includeNewFork = includeNewFork -class PmuAttr: +class PmuAttr(object): __slots__ = ['__c_pmu_attr'] def __init__(self, - evtList: List[str]=None, - pidList: List[int]=None, - cpuList: List[int]=None, - evtAttr: List[CtypesEvtAttr]=None, - sampleRate: int=0, - useFreq: bool=False, - excludeUser: bool=False, - excludeKernel: bool=False, - symbolMode: int=0, - callStack: bool=False, - blockedSample: bool=False, - dataFilter: int=0, - evFilter: int=0, - minLatency: int=0, - includeNewFork: bool=False, - branchSampleFilter: int=0) -> None: + evtList=None, + pidList=None, + cpuList=None, + evtAttr=None, + sampleRate=0, + useFreq=False, + excludeUser=False, + excludeKernel=False, + symbolMode=0, + callStack=False, + blockedSample=False, + dataFilter=0, + evFilter=0, + minLatency=0, + includeNewFork=False, + branchSampleFilter=0): self.__c_pmu_attr = CtypesPmuAttr( evtList=evtList, pidList=pidList, @@ -223,19 +223,19 @@ class PmuAttr: ) @property - def c_pmu_attr(self) -> CtypesPmuAttr: + def c_pmu_attr(self): return self.__c_pmu_attr @property - def numEvt(self) -> int: + def numEvt(self): return self.c_pmu_attr.numEvt @property - def evtList(self) -> List[str]: + def evtList(self): return [self.c_pmu_attr.evtList[i].decode(UTF_8) for i in range(self.numEvt)] @evtList.setter - def evtList(self, evtList: List[str]) -> None: + def evtList(self, evtList): if evtList: numEvt = len(evtList) self.c_pmu_attr.evtList = (ctypes.c_char_p * numEvt)(*[evt.encode(UTF_8) for evt in evtList]) @@ -245,15 +245,15 @@ class PmuAttr: self.c_pmu_attr.numEvt = ctypes.c_uint(0) @property - def numPid(self) -> int: + def numPid(self): return self.c_pmu_attr.numPid @property - def pidList(self) -> List[int]: + def pidList(self): return [self.c_pmu_attr.pidList[i] for i in range(self.numPid)] @pidList.setter - def pidList(self, pidList: List[int]) -> None: + def pidList(self, pidList): if pidList: numPid = len(pidList) self.c_pmu_attr.pidList = (ctypes.c_int * numPid)(*[pid for pid in pidList]) @@ -263,11 +263,11 @@ class PmuAttr: self.c_pmu_attr.numPid = ctypes.c_uint(0) @property - def evtAttr(self) -> List[CtypesEvtAttr]: + def evtAttr(self): return [self.c_pmu_attr.evtAttr[i] for i in range(len(self.c_pmu_attr.evtAttr))] @evtAttr.setter - def evtAttr(self, evtAttr: List[CtypesEvtAttr]) -> None: + def evtAttr(self, evtAttr): if evtAttr: numEvtAttr = len(evtAttr) self.c_pmu_attr.evtAttr = (CtypesEvtAttr * numEvtAttr)(*[CtypesEvtAttr(evt) for evt in evtAttr]) @@ -275,15 +275,15 @@ class PmuAttr: self.c_pmu_attr.evtAttr = None @property - def numCpu(self) -> int: + def numCpu(self): return self.c_pmu_attr.numCpu @property - def cpuList(self) -> List[int]: + def cpuList(self): return [self.c_pmu_attr.cpuList[i] for i in range(self.numCpu)] @cpuList.setter - def cpuList(self, cpuList: List[int]) -> None: + def cpuList(self, cpuList): if cpuList: numCpu = len(cpuList) self.c_pmu_attr.cpuList = (ctypes.c_int * numCpu)(*[cpu for cpu in cpuList]) @@ -293,109 +293,109 @@ class PmuAttr: self.c_pmu_attr.numCpu = ctypes.c_uint(0) @property - def sampleRate(self) -> int: + def sampleRate(self): if not self.useFreq: return self.c_pmu_attr.sampleRate.period else: return self.c_pmu_attr.sampleRate.freq @sampleRate.setter - def sampleRate(self, sampleRate: int) -> None: + def sampleRate(self, sampleRate): if not self.useFreq: self.c_pmu_attr.sampleRate.period = ctypes.c_uint(sampleRate) else: self.c_pmu_attr.sampleRate.freq = ctypes.c_uint(sampleRate) @property - def useFreq(self) -> bool: + def useFreq(self): return bool(self.c_pmu_attr.useFreq) @useFreq.setter - def useFreq(self, useFreq: bool) -> None: + def useFreq(self, useFreq): self.c_pmu_attr.useFreq = int(useFreq) @property - def excludeUser(self) -> bool: + def excludeUser(self): return bool(self.c_pmu_attr.excludeUser) @excludeUser.setter - def excludeUser(self, excludeUser: bool) -> None: + def excludeUser(self, excludeUser): self.c_pmu_attr.excludeUser = int(excludeUser) @property - def excludeKernel(self) -> bool: + def excludeKernel(self): return bool(self.c_pmu_attr.excludeKernel) @excludeKernel.setter - def excludeKernel(self, excludeKernel: bool) -> None: + def excludeKernel(self, excludeKernel): self.c_pmu_attr.excludeKernel = int(excludeKernel) @property - def symbolMode(self) -> int: + def symbolMode(self): return self.c_pmu_attr.symbolMode @symbolMode.setter - def symbolMode(self, symbolMode: int) -> None: + def symbolMode(self, symbolMode): self.c_pmu_attr.symbolMode = ctypes.c_uint(symbolMode) @property - def callStack(self) -> bool: + def callStack(self): return bool(self.c_pmu_attr.callStack) @callStack.setter - def callStack(self, callStack: bool) -> None: + def callStack(self, callStack): self.c_pmu_attr.callStack = int(callStack) @property - def blockedSample(self) -> bool: + def blockedSample(self): return bool(self.c_pmu_attr.blockedSample) @blockedSample.setter - def blockedSample(self, blockedSample: bool) -> None: + def blockedSample(self, blockedSample): self.c_pmu_attr.blockedSample = int(blockedSample) @property - def dataFilter(self) -> int: + def dataFilter(self): return self.c_pmu_attr.dataFilter @dataFilter.setter - def dataFilter(self, dataFilter: int) -> None: + def dataFilter(self, dataFilter): self.c_pmu_attr.dataFilter = ctypes.c_uint64(dataFilter) @property - def evFilter(self) -> int: + def evFilter(self): return self.c_pmu_attr.evFilter @evFilter.setter - def evFilter(self, evFilter: int) -> None: + def evFilter(self, evFilter): self.c_pmu_attr.evFilter = ctypes.c_uint(evFilter) @property - def minLatency(self) -> int: + def minLatency(self): return self.c_pmu_attr.minLatency @minLatency.setter - def minLatency(self, minLatency: int) -> None: + def minLatency(self, minLatency): self.c_pmu_attr.minLatency = ctypes.c_ulong(minLatency) @property - def includeNewFork(self) -> bool: + def includeNewFork(self): return bool(self.c_pmu_attr.includeNewFork) @includeNewFork.setter - def includeNewFork(self, includeNewFork: bool) -> None: + def includeNewFork(self, includeNewFork): self.c_pmu_attr.includeNewFork = int(includeNewFork) @property - def branchSampleFilter(self) -> int: + def branchSampleFilter(self): return self.c_pmu_attr.branchSampleFilter @branchSampleFilter.setter - def branchSampleFilter(self, branchSampleFilter: int) -> None: + def branchSampleFilter(self, branchSampleFilter): self.c_pmu_attr.branchSampleFilter = ctypes.c_ulong(branchSampleFilter) @classmethod - def from_c_pmu_data(cls, c_pmu_attr: CtypesPmuAttr) -> 'PmuAttr': + def from_c_pmu_data(cls, c_pmu_attr): pmu_attr = cls() pmu_attr.__c_pmu_attr = c_pmu_attr return pmu_attr @@ -413,10 +413,10 @@ class CtypesPmuDeviceAttr(ctypes.Structure): ] def __init__(self, - metric: int = 0, - bdf: str = None, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + metric=0, + bdf= None, + *args, **kw): + super(CtypesPmuDeviceAttr, self).__init__(*args, **kw) self.metric = ctypes.c_int(metric) if bdf: @@ -425,44 +425,44 @@ class CtypesPmuDeviceAttr(ctypes.Structure): self.bdf = None -class PmuDeviceAttr: +class PmuDeviceAttr(object): __slots__ = ['__c_pmu_device_attr'] def __init__(self, - metric: int = 0, - bdf: str = None) -> None: + metric=0, + bdf= None): self.__c_pmu_device_attr = CtypesPmuDeviceAttr( metric=metric, bdf=bdf ) @property - def c_pmu_device_attr(self) -> CtypesPmuDeviceAttr: + def c_pmu_device_attr(self): return self.__c_pmu_device_attr @property - def metric(self) -> int: + def metric(self): return self.c_pmu_device_attr.metric @metric.setter - def metric(self, metric: int) -> None: + def metric(self, metric): self.c_pmu_device_attr.metric = ctypes.c_int(metric) @property - def bdf(self) -> str: + def bdf(self): if self.c_pmu_device_attr.bdf: return self.c_pmu_device_attr.bdf.decode(UTF_8) return None @bdf.setter - def bdf(self, bdf: str) -> None: + def bdf(self, bdf): if bdf: self.c_pmu_device_attr.bdf = ctypes.c_char_p(bdf.encode(UTF_8)) else: self.c_pmu_device_attr.bdf = None @classmethod - def from_c_pmu_device_attr(cls, c_pmu_device_attr: CtypesPmuDeviceAttr) -> 'PmuDeviceAttr': + def from_c_pmu_device_attr(cls, c_pmu_device_attr): pmu_device_attr = cls() pmu_device_attr.__c_pmu_device_attr = c_pmu_device_attr return pmu_device_attr @@ -510,43 +510,43 @@ class CtypesPmuDeviceData(ctypes.Structure): ] @property - def coreId(self) -> int: + def coreId(self): if self.mode == 1: # PMU_METRIC_CORE return self._union.coreId return 0 @property - def numaId(self) -> int: + def numaId(self): if self.mode == 2: # PMU_METRIC_NUMA return self._union.numaId return 0 @property - def clusterId(self) -> int: + def clusterId(self): if self.mode == 3: # PMU_METRIC_CLUSTER return self._union.clusterId return 0 @property - def bdf(self) -> str: + def bdf(self): if self.mode == 4 and self._union.bdf: # PMU_METRIC_BDF return self._union.bdf.decode(UTF_8) return "" @property - def channelId(self) -> int: + def channelId(self): if self.mode == 5 and self._union._structure.channelId: # PMU_METRIC_CHANNEL return self._union._structure.channelId return 0 @property - def ddrNumaId(self) -> int: + def ddrNumaId(self): if self.mode == 5 and self._union._structure.ddrNumaId: # PMU_METRIC_CHANNEL return self._union._structure.ddrNumaId return 0 @property - def socketId(self) -> int: + def socketId(self): if self.mode == 5 and self._union._structure.socketId: # PMU_METRIC_CHANNEL return self._union._structure.socketId return 0 @@ -555,74 +555,74 @@ class ImplPmuDeviceData: __slots__ = ['__c_pmu_device_data'] def __init__(self, - metric: int = 0, - count: float = 0, - mode: int = 0) -> None: + metric=0, + count=0, + mode=0): self.__c_pmu_device_data = CtypesPmuDeviceData() self.__c_pmu_device_data.metric = ctypes.c_int(metric) self.__c_pmu_device_data.count = ctypes.c_double(count) self.__c_pmu_device_data.mode = ctypes.c_int(mode) @property - def c_pmu_device_data(self) -> CtypesPmuDeviceData: + def c_pmu_device_data(self): return self.__c_pmu_device_data @property - def metric(self) -> int: + def metric(self): return self.c_pmu_device_data.metric @property - def count(self) -> float: + def count(self): return self.c_pmu_device_data.count @property - def mode(self) -> int: + def mode(self): return self.c_pmu_device_data.mode @property - def coreId(self) -> int: + def coreId(self): if self.mode == 1: # PMU_METRIC_CORE return self.c_pmu_device_data._union.coreId return 0 @property - def numaId(self) -> int: + def numaId(self): if self.mode == 2: # PMU_METRIC_NUMA return self.c_pmu_device_data._union.numaId return 0 @property - def clusterId(self) -> int: + def clusterId(self): if self.mode == 3: # PMU_METRIC_CLUSTER return self.c_pmu_device_data._union.clusterId return 0 @property - def bdf(self) -> str: + def bdf(self): if self.mode == 4 and self.c_pmu_device_data._union.bdf: # PMU_METRIC_BDF return self.c_pmu_device_data._union.bdf.decode(UTF_8) return "" @property - def channelId(self) -> int: + def channelId(self): if self.mode == 5 and self.c_pmu_device_data._union._structure.channelId: # PMU_METRIC_CHANNEL return self.c_pmu_device_data._union._structure.channelId return 0 @property - def ddrNumaId(self) -> int: + def ddrNumaId(self): if self.mode == 5 and self.c_pmu_device_data._union._structure.ddrNumaId: # PMU_METRIC_CHANNEL return self.c_pmu_device_data._union._structure.ddrNumaId return 0 @property - def socketId(self) -> int: + def socketId(self): if self.mode == 5 and self.c_pmu_device_data._union._structure.socketId: # PMU_METRIC_CHANNEL return self.c_pmu_device_data._union._structure.socketId return 0 @classmethod - def from_c_pmu_device_data(cls, c_pmu_device_data: CtypesPmuDeviceData) -> 'ImplPmuDeviceData': + def from_c_pmu_device_data(cls, c_pmu_device_data): pmu_device_data = cls() pmu_device_data.__c_pmu_device_data = c_pmu_device_data return pmu_device_data @@ -631,15 +631,15 @@ class ImplPmuDeviceData: class PmuDeviceData: __slots__ = ['__pointer', '__iter', '__len'] - def __init__(self, pointer: ctypes.POINTER(CtypesPmuDeviceData) = None, len: int = 0) -> None: - self.__pointer = pointer - self.__len = len + def __init__(self, pointer = None, len=0): + self.__pointer =pointer + self.__len =len self.__iter = (ImplPmuDeviceData.from_c_pmu_device_data(self.__pointer[i]) for i in range(self.__len)) - def __del__(self) -> None: + def __del__(self): self.free() - def __len__(self) -> int: + def __len__(self): return self.__len def __getitem__(self, index): @@ -648,14 +648,14 @@ class PmuDeviceData: return ImplPmuDeviceData.from_c_pmu_device_data(self.__pointer[index]) @property - def len(self) -> int: + def len(self): return self.__len @property - def iter(self) -> Iterator[ImplPmuDeviceData]: + def iter(self): return self.__iter - def free(self) -> None: + def free(self): if self.__pointer is not None: DevDataFree(self.__pointer) self.__pointer = None @@ -684,11 +684,11 @@ class CtypesPmuTraceAttr(ctypes.Structure): ] def __init__(self, - funcs: List[str]=None, - pidList: List[int]=None, - cpuList: List[int]=None, - *args: Any, **kw:Any) -> None: - super().__init__(*args, **kw) + funcs=None, + pidList=None, + cpuList=None, + *args, **kw): + super(CtypesPmuTraceAttr, self).__init__(*args, **kw) if funcs: numFuncs = len(funcs) @@ -715,13 +715,13 @@ class CtypesPmuTraceAttr(ctypes.Structure): self.numCpu = ctypes.c_uint(0) -class PmuTraceAttr: +class PmuTraceAttr(object): __slots__ = ['__c_pmu_trace_attr'] def __init__(self, - funcs: List[str]=None, - pidList: List[int]=None, - cpuList: List[int]=None) -> None: + funcs=None, + pidList=None, + cpuList=None): self.__c_pmu_trace_attr = CtypesPmuTraceAttr( funcs=funcs, pidList=pidList, @@ -729,19 +729,19 @@ class PmuTraceAttr: ) @property - def c_pmu_trace_attr(self) -> CtypesPmuTraceAttr: + def c_pmu_trace_attr(self): return self.__c_pmu_trace_attr @property - def numFuncs(self) -> int: + def numFuncs(self): return self.c_pmu_trace_attr.numFuncs @property - def funcs(self) -> List[str]: + def funcs(self): return [self.c_pmu_trace_attr.funcs[i].decode(UTF_8) for i in range(self.numFuncs)] @funcs.setter - def funcs(self, funcs: List[str]) -> None: + def funcs(self, funcs): if funcs: numFuncs = len(funcs) self.c_pmu_trace_attr.funcs = (ctypes.c_char_p * numFuncs)(*[func.encode(UTF_8) for func in funcs]) @@ -751,15 +751,15 @@ class PmuTraceAttr: self.c_pmu_trace_attr.numFuncs = ctypes.c_uint(0) @property - def numPid(self) -> int: + def numPid(self): return self.c_pmu_trace_attr.numPid @property - def pidList(self) -> List[int]: + def pidList(self): return [self.c_pmu_trace_attr.pidList[i] for i in range(self.numPid)] @pidList.setter - def pidList(self, pidList: List[int]) -> None: + def pidList(self, pidList): if pidList: numPid = len(pidList) self.c_pmu_trace_attr.pidList = (ctypes.c_int * numPid)(*[pid for pid in pidList]) @@ -769,15 +769,15 @@ class PmuTraceAttr: self.c_pmu_trace_attr.numPid = ctypes.c_uint(0) @property - def numCpu(self) -> int: + def numCpu(self): return self.c_pmu_trace_attr.numCpu @property - def cpuList(self) -> List[int]: + def cpuList(self): return [self.c_pmu_trace_attr.cpuList[i] for i in range(self.numCpu)] @cpuList.setter - def cpuList(self, cpuList: List[int]) -> None: + def cpuList(self, cpuList): if cpuList: numCpu = len(cpuList) self.c_pmu_trace_attr.cpuList = (ctypes.c_int * numCpu)(*[cpu for cpu in cpuList]) @@ -802,11 +802,11 @@ class CtypesCpuTopology(ctypes.Structure): ] def __init__(self, - coreId: int=0, - numaId: int=0, - socketId: int=0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + coreId=0, + numaId=0, + socketId=0, + *args, **kw): + super(CtypesCpuTopology, self).__init__(*args, **kw) self.coreId = ctypes.c_int(coreId) self.numaId = ctypes.c_int(numaId) self.socketId = ctypes.c_int(socketId) @@ -816,9 +816,9 @@ class CpuTopology: __slots__ = ['__c_cpu_topo'] def __init__(self, - coreId: int=0, - numaId: int=0, - socketId: int=0) -> None: + coreId=0, + numaId=0, + socketId=0): self.__c_cpu_topo = CtypesCpuTopology( coreId=coreId, numaId=numaId, @@ -826,35 +826,35 @@ class CpuTopology: ) @property - def c_cpu_topo(self) -> CtypesCpuTopology: + def c_cpu_topo(self): return self.__c_cpu_topo @property - def coreId(self) -> int: + def coreId(self): return self.c_cpu_topo.coreId @coreId.setter - def coreId(self, coreId: int) -> None: + def coreId(self, coreId): self.c_cpu_topo.coreId = ctypes.c_int(coreId) @property - def numaId(self) -> int: + def numaId(self): return self.c_cpu_topo.numaId @numaId.setter - def numaId(self, numaId: int) -> None: + def numaId(self, numaId): self.c_cpu_topo.numaId = ctypes.c_int(numaId) @property - def socketId(self) -> int: + def socketId(self): return self.c_cpu_topo.socketId @socketId.setter - def socketId(self, socketId: int) -> None: + def socketId(self, socketId): self.c_cpu_topo.socketId = ctypes.c_int(socketId) @classmethod - def from_c_cpu_topo(cls, c_cpu_topo: CtypesCpuTopology) -> 'CpuTopology': + def from_c_cpu_topo(cls, c_cpu_topo): cpu_topo = cls() cpu_topo.__c_cpu_topo = c_cpu_topo return cpu_topo @@ -865,27 +865,27 @@ class CtypesSampleRawData(ctypes.Structure): ('data', ctypes.c_char_p) ] - def __init__(self, data: str='', *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + def __init__(self, data='', *args, **kw): + super(CtypesSampleRawData, self).__init__(*args, **kw) self.data = ctypes.c_char_p(data.encode(UTF_8)) class SampleRawData: __slots__ = ['__c_sample_rawdata'] - def __init__(self, data: str='') -> None: + def __init__(self, data=''): self.__c_sample_rawdata = CtypesSampleRawData(data) @property - def c_pmu_data_rawData(self) -> CtypesSampleRawData: + def c_pmu_data_rawData(self): return self.__c_sample_rawdata @property - def data(self) -> str: + def data(self): return self.__c_sample_rawdata.data.decode(UTF_8) @classmethod - def from_sample_raw_data(cls, c_sample_raw_data: CtypesSampleRawData) -> 'SampleRawData': + def from_sample_raw_data(cls, c_sample_raw_data): sample_raw_data = cls() sample_raw_data.__c_sample_rawdata = c_sample_raw_data return sample_raw_data @@ -910,9 +910,9 @@ class ImplBranchRecords(): __slots__ = ['__c_branch_record'] def __init__(self, - fromAddr: int=0, - toAddr: int=0, - cycles: int=0) -> None: + fromAddr=0, + toAddr=0, + cycles=0): self.__c_branch_record = CytpesBranchSampleRecord( fromAddr=fromAddr, toAddr=toAddr, @@ -920,23 +920,23 @@ class ImplBranchRecords(): ) @property - def c_branch_record(self) -> CytpesBranchSampleRecord: + def c_branch_record(self): return self.__c_branch_record @property - def fromAddr(self) -> int: + def fromAddr(self): return self.c_branch_record.fromAddr @property - def toAddr(self) -> int: + def toAddr(self): return self.c_branch_record.toAddr @property - def cycles(self) -> int: + def cycles(self): return self.c_branch_record.cycles @classmethod - def from_c_branch_record(cls, c_branch_record: CytpesBranchSampleRecord) -> 'ImplBranchRecords': + def from_c_branch_record(cls, c_branch_record): branch_record = cls() branch_record.__c_branch_record = c_branch_record return branch_record @@ -945,17 +945,17 @@ class ImplBranchRecords(): class BranchRecords(): __slots__ = ['__pointer', '__iter', '__len'] - def __init__(self, pointer: ctypes.POINTER(CytpesBranchSampleRecord) = None, nr: int=0) -> None: + def __init__(self, pointer = None, nr=0): self.__pointer = pointer self.__len = nr self.__iter = (ImplBranchRecords.from_c_branch_record(self.__pointer[i]) for i in range(self.__len)) @property - def len(self) -> int: + def len(self): return self.__len @property - def iter(self) -> Iterator[ImplBranchRecords]: + def iter(self): return self.__iter class CytpesSpeDataExt(ctypes.Structure): @@ -966,12 +966,12 @@ class CytpesSpeDataExt(ctypes.Structure): ('lat', ctypes.c_ushort), ] def __init__(self, - pa: int=0, - va: int=0, - event: int=0, - lat: int=0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + pa=0, + va=0, + event=0, + lat=0, + *args, **kw): + super(CytpesSpeDataExt, self).__init__(*args, **kw) self.pa = ctypes.c_ulong(pa) self.va = ctypes.c_ulong(va) self.event = ctypes.c_ulong(event) @@ -1000,33 +1000,33 @@ class PmuDataExt: __slots__ = ['__c_pmu_data_ext'] @property - def c_pmu_data_ext(self) -> CtypesPmuDataExt: + def c_pmu_data_ext(self): return self.__c_pmu_data_ext @property - def pa(self) -> int: + def pa(self): return self.c_pmu_data_ext.ext.speDataExt.pa @property - def va(self) -> int: + def va(self): return self.c_pmu_data_ext.ext.speDataExt.va @property - def event(self) -> int: + def event(self): return self.c_pmu_data_ext.ext.speDataExt.event @property - def lat(self) -> int: + def lat(self): return self.c_pmu_data_ext.ext.speDataExt.lat @property - def branchRecords(self) -> BranchRecords: + def branchRecords(self): if self.__c_pmu_data_ext.ext.branchRecords.branchRecords: return BranchRecords(self.__c_pmu_data_ext.ext.branchRecords.branchRecords, self.__c_pmu_data_ext.ext.branchRecords.nr) return None @classmethod - def from_pmu_data_ext(cls, c_pmu_data_ext: CtypesPmuDataExt) -> 'PmuDataExt': + def from_pmu_data_ext(cls, c_pmu_data_ext): pmu_data_ext = cls() pmu_data_ext.__c_pmu_data_ext = c_pmu_data_ext return pmu_data_ext @@ -1051,13 +1051,13 @@ class CtypesSampleRawField(ctypes.Structure): ] def __init__(self, - field_name: str='', - field_str: str='', - offset: int=0, - size: int=0, - is_signed: int=0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + field_name='', + field_str='', + offset=0, + size=0, + is_signed=0, + *args, **kw): + super(CtypesSampleRawField, self).__init__(*args, **kw) self.fieldName = ctypes.c_char_p(field_name.encode(UTF_8)) self.fieldStr = ctypes.c_char_p(field_str.encode(UTF_8)) self.offset = ctypes.c_uint(offset) @@ -1069,39 +1069,39 @@ class SampleRawField: __slots__ = ['__c_sample_raw_field'] def __init__(self, - field_name: str='', - field_str: str='', - offset: int=0, - size: int=0, - is_signed: int=0) -> None: + field_name='', + field_str='', + offset=0, + size=0, + is_signed=0): self.__c_sample_raw_field = CtypesSampleRawField(field_name, field_str, offset, size, is_signed) @property - def c_sample_raw_field(self) -> CtypesSampleRawField: + def c_sample_raw_field(self): return self.__c_sample_raw_field @property - def field_name(self) -> str: + def field_name(self): return self.__c_sample_raw_field.fieldName.decode(UTF_8) @property - def field_str(self) -> str: + def field_str(self): return self.__c_sample_raw_field.fieldStr.decode(UTF_8) @property - def size(self) -> int: + def size(self): return self.__c_sample_raw_field.size @property - def offset(self) -> int: + def offset(self): return self.__c_sample_raw_field.offset @property - def is_signed(self) -> bool: + def is_signed(self): return bool(self.__c_sample_raw_field.isSigned) @classmethod - def from_sample_raw_field(cls, __c_sample_raw_field: CtypesSampleRawField): + def from_sample_raw_field(cls, __c_sample_raw_field): sample_raw_data = cls() sample_raw_data.__c_sample_raw_field = __c_sample_raw_field return sample_raw_data @@ -1115,7 +1115,8 @@ class CtypesPmuData(ctypes.Structure): int64_t ts; // time stamp. unit: ns pid_t pid; // process id int tid; // thread id - unsigned cpu; // cpu id + int cpu; // cpu id + int groupId; // id for group event struct CpuTopology *cpuTopo; // cpu topology const char *comm; // process command uint64_t period; // number of Samples @@ -1132,6 +1133,7 @@ class CtypesPmuData(ctypes.Structure): ('pid', ctypes.c_int), ('tid', ctypes.c_int), ('cpu', ctypes.c_int), + ('groupId', ctypes.c_int), ('cpuTopo', ctypes.POINTER(CtypesCpuTopology)), ('comm', ctypes.c_char_p), ('period', ctypes.c_uint64), @@ -1142,21 +1144,22 @@ class CtypesPmuData(ctypes.Structure): ] def __init__(self, - stack: CtypesStack=None, - evt: str='', - ts: int=0, - pid: int=0, - tid: int=0, - cpu: int=0, - cpuTopo: CtypesCpuTopology=None, - comm: str='', - period: int=0, - count: int=0, - countPercent: float=0.0, - ext: CtypesPmuDataExt=None, - rawData: CtypesSampleRawData=None, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + stack=None, + evt='', + ts=0, + pid=0, + tid=0, + cpu=0, + groupId=0, + cpuTopo=None, + comm='', + period=0, + count=0, + countPercent=0.0, + ext=None, + rawData=None, + *args, **kw): + super(CtypesPmuData, self).__init__(*args, **kw) self.stack = stack self.evt = ctypes.c_char_p(evt.encode(UTF_8)) @@ -1164,6 +1167,7 @@ class CtypesPmuData(ctypes.Structure): self.pid = ctypes.c_int(pid) self.tid = ctypes.c_int(tid) self.cpu = ctypes.c_int(cpu) + self.groupId = ctypes.c_int(groupId) self.cpuTopo = cpuTopo self.comm = ctypes.c_char_p(comm.encode(UTF_8)) self.period = ctypes.c_uint64(period) @@ -1177,19 +1181,20 @@ class ImplPmuData: __slots__ = ['__c_pmu_data'] def __init__(self, - stack: Stack=None, - evt: str='', - ts: int=0, - pid: int=0, - tid: int=0, - cpu: int=0, - cpuTopo: CpuTopology=None, - comm: str='', - period: int=0, - count: int=0, - countPercent: float=0.0, - ext: PmuDataExt=None, - rawData: SampleRawData=None) -> None: + stack=None, + evt='', + ts=0, + pid=0, + tid=0, + cpu=0, + groupId=0, + cpuTopo=None, + comm='', + period=0, + count=0, + countPercent=0.0, + ext=None, + rawData=None): self.__c_pmu_data = CtypesPmuData( stack=stack.c_stack if stack else None, evt=evt, @@ -1197,6 +1202,7 @@ class ImplPmuData: pid=pid, tid=tid, cpu=cpu, + groupId=groupId, cpuTopo=cpuTopo.c_cpu_topo if cpuTopo else None, comm=comm, period=period, @@ -1207,111 +1213,119 @@ class ImplPmuData: ) @property - def c_pmu_data(self) -> CtypesPmuData: + def c_pmu_data(self): return self.__c_pmu_data @property - def stack(self) -> Stack: + def stack(self): return Stack.from_c_stack(self.c_pmu_data.stack.contents) if self.c_pmu_data.stack else None @stack.setter - def stack(self, stack: Stack) -> None: + def stack(self, stack): self.c_pmu_data.stack = stack.c_stack if stack else None @property - def evt(self) -> str: + def evt(self): return self.c_pmu_data.evt.decode(UTF_8) @evt.setter - def evt(self, evt: str) -> None: + def evt(self, evt): self.c_pmu_data.evt = ctypes.c_char_p(evt.encode(UTF_8)) @property - def ts(self) -> int: + def ts(self): return self.c_pmu_data.ts @ts.setter - def ts(self, ts: int) -> None: + def ts(self, ts): self.c_pmu_data.ts = ctypes.c_int64(ts) @property - def pid(self) -> int: + def pid(self): return self.c_pmu_data.pid @pid.setter - def pid(self, pid: int) -> None: + def pid(self, pid): self.c_pmu_data.pid = ctypes.c_int(pid) @property - def tid(self) -> int: + def tid(self): return self.c_pmu_data.tid @tid.setter - def tid(self, tid: int) -> None: + def tid(self, tid): self.c_pmu_data.tid = ctypes.c_int(tid) @property - def cpu(self) -> int: + def cpu(self): return self.c_pmu_data.cpu @cpu.setter - def cpu(self, cpu: int) -> None: + def cpu(self, cpu): self.c_pmu_data.cpu = ctypes.c_int(cpu) @property - def cpuTopo(self) -> CpuTopology: + def groupId(self): + return self.c_pmu_data.groupId + + @groupId.setter + def groupId(self, groupId): + self.c_pmu_data.groupId = ctypes.c_int(groupId) + + @property + def cpuTopo(self): return CpuTopology.from_c_cpu_topo(self.c_pmu_data.cpuTopo.contents) if self.c_pmu_data.cpuTopo else None @cpuTopo.setter - def cpuTopo(self, cpuTopo: CpuTopology) -> None: + def cpuTopo(self, cpuTopo): self.c_pmu_data.cpuTopo = cpuTopo.c_cpu_topo if cpuTopo else None @property - def comm(self) -> str: + def comm(self): return self.c_pmu_data.comm.decode(UTF_8) @comm.setter - def comm(self, comm: str) -> None: + def comm(self, comm): self.c_pmu_data.comm = ctypes.c_char_p(comm.encode(UTF_8)) @property - def period(self) -> int: + def period(self): return self.c_pmu_data.period @period.setter - def period(self, period: int) -> None: + def period(self, period): self.c_pmu_data.period = ctypes.c_uint64(period) @property - def count(self) -> int: + def count(self): return self.c_pmu_data.count @count.setter - def count(self, count: int) -> None: + def count(self, count): self.c_pmu_data.count = ctypes.c_uint64(count) @property - def countPercent(self) -> float: + def countPercent(self): return self.c_pmu_data.countPercent @countPercent.setter - def countPercent(self, countPercent: float) -> None: + def countPercent(self, countPercent): self.c_pmu_data.countPercent = ctypes.c_double(countPercent) @property - def ext(self) -> PmuDataExt: + def ext(self): return PmuDataExt.from_pmu_data_ext(self.c_pmu_data.ext.contents) if self.c_pmu_data.ext else None @property - def rawData(self) -> SampleRawData: + def rawData(self): return SampleRawData.from_sample_raw_data(self.c_pmu_data.rawData) if self.c_pmu_data.rawData else None @ext.setter - def ext(self, ext: PmuDataExt) -> None: + def ext(self, ext): self.c_pmu_data.ext = ext.c_pmu_data_ext if ext else None @classmethod - def from_c_pmu_data(cls, c_pmu_data: CtypesPmuData) -> 'ImplPmuData': + def from_c_pmu_data(cls, c_pmu_data): pmu_data = cls() pmu_data.__c_pmu_data = c_pmu_data return pmu_data @@ -1320,32 +1334,32 @@ class ImplPmuData: class PmuData: __slots__ = ['__pointer', '__iter', '__len'] - def __init__(self, pointer: ctypes.POINTER(CtypesPmuData) = None, len: int = 0) -> None: + def __init__(self, pointer = None, len=0): self.__pointer = pointer self.__len = len self.__iter = (ImplPmuData.from_c_pmu_data(self.__pointer[i]) for i in range(self.__len)) - def __del__(self) -> None: + def __del__(self): self.free() - def __len__(self) -> int: + def __len__(self): return self.__len def __iter__(self): return self.__iter - def pointer(self) -> ctypes.POINTER(CtypesPmuData): + def pointer(self): return self.__pointer @property - def len(self) -> int: + def len(self): return self.__len @property - def iter(self) -> Iterator[ImplPmuData]: + def iter(self): return self.__iter - def free(self) -> None: + def free(self): if self.__pointer is not None: PmuDataFree(self.__pointer) self.__pointer = None @@ -1373,15 +1387,15 @@ class CtypesPmuTraceData(ctypes.Structure): ] def __init__(self, - funcs: str = '', - startTs: int = 0, - elapsedTime: float = 0.0, - pid: int = 0, - tid: int = 0, - cpu: int = 0, - comm: str = '', - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + funcs= '', + startTs=0, + elapsedTime=0.0, + pid=0, + tid=0, + cpu=0, + comm= '', + *args, **kw): + super(CtypesPmuTraceData, self).__init__(*args, **kw) self.funcs = ctypes.c_char_p(funcs.encode(UTF_8)) self.startTs = ctypes.c_int64(startTs) @@ -1394,14 +1408,14 @@ class CtypesPmuTraceData(ctypes.Structure): class ImplPmuTraceData: __slots__ = ['__c_pmu_trace_data'] def __init__(self, - funcs: str = '', - startTs: int = 0, - elapsedTime: float = 0.0, - pid: int = 0, - tid: int = 0, - cpu: int = 0, - comm: str = '', - *args: Any, **kw: Any) -> None: + funcs= '', + startTs=0, + elapsedTime=0.0, + pid=0, + tid=0, + cpu=0, + comm= '', + *args, **kw): self.__c_pmu_trace_data = CtypesPmuTraceData( funcs=funcs, startTs=startTs, @@ -1413,67 +1427,67 @@ class ImplPmuTraceData: ) @property - def c_pmu_trace_data(self) -> CtypesPmuTraceData: + def c_pmu_trace_data(self): return self.__c_pmu_trace_data @property - def funcs(self) -> str: + def funcs(self): return self.__c_pmu_trace_data.funcs.decode(UTF_8) @funcs.setter - def funcs(self, funcs: str) -> None: + def funcs(self, funcs): self.__c_pmu_trace_data.funcs = ctypes.c_char_p(funcs.encode(UTF_8)) @property - def startTs(self) -> int: + def startTs(self): return self.__c_pmu_trace_data.startTs @startTs.setter - def startTs(self, startTs: int) -> None: + def startTs(self, startTs): self.__c_pmu_trace_data.startTs = ctypes.c_int64(startTs) @property - def elapsedTime(self) -> float: + def elapsedTime(self): return self.__c_pmu_trace_data.elapsedTime @elapsedTime.setter - def elapsedTime(self, elapsedTime: float) -> None: + def elapsedTime(self, elapsedTime): self.__c_pmu_trace_data.elapsedTime = ctypes.c_double(elapsedTime) @property - def pid(self) -> int: + def pid(self): return self.__c_pmu_trace_data.pid @pid.setter - def pid(self, pid: int) -> None: + def pid(self, pid): self.__c_pmu_trace_data.pid = ctypes.c_int(pid) @property - def tid(self) -> int: + def tid(self): return self.__c_pmu_trace_data.tid @tid.setter - def tid(self, tid: int) -> None: + def tid(self, tid): self.__c_pmu_trace_data.tid = ctypes.c_int(tid) @property - def cpu(self) -> int: + def cpu(self): return self.__c_pmu_trace_data.cpu @cpu.setter - def cpu(self, cpu: int) -> None: + def cpu(self, cpu): self.__c_pmu_trace_data.cpu = ctypes.c_int(cpu) @property - def comm(self) -> str: + def comm(self): return self.__c_pmu_trace_data.comm.decode(UTF_8) @comm.setter - def comm(self, comm: str) -> None: + def comm(self, comm): self.__c_pmu_trace_data.comm = ctypes.c_char_p(comm.encode(UTF_8)) @classmethod - def from_c_pmu_trace_data(cls, c_pmu_trace_data: CtypesPmuTraceData) -> 'ImplPmuTraceData': + def from_c_pmu_trace_data(cls, c_pmu_trace_data): pmu_trace_data = cls() pmu_trace_data.__c_pmu_trace_data = c_pmu_trace_data return pmu_trace_data @@ -1481,28 +1495,28 @@ class ImplPmuTraceData: class PmuTraceData: __slots__ = ['__pointer', '__iter', '__len'] - def __init__(self, pointer: ctypes.POINTER(CtypesPmuTraceData) = None, len: int = 0) -> None: + def __init__(self, pointer = None, len=0): self.__pointer = pointer self.__len = len self.__iter = (ImplPmuTraceData.from_c_pmu_trace_data(self.__pointer[i]) for i in range(self.__len)) - def __del__(self) -> None: + def __del__(self): self.free() @property - def len(self) -> int: + def len(self): return self.__len @property - def iter(self) -> Iterator[ImplPmuTraceData]: + def iter(self): return self.__iter - def free(self) -> None: + def free(self): if self.__pointer is not None: PmuTraceDataFree(self.__pointer) self.__pointer = None -def PmuOpen(collectType: int, pmuAttr: PmuAttr) -> int: +def PmuOpen(collectType, pmuAttr): """ int PmuOpen(enum PmuTaskType collectType, struct PmuAttr *attr); """ @@ -1515,7 +1529,7 @@ def PmuOpen(collectType: int, pmuAttr: PmuAttr) -> int: return c_PmuOpen(c_collectType, ctypes.byref(pmuAttr.c_pmu_attr)) -def PmuEventListFree() -> None: +def PmuEventListFree(): """ void PmuEventListFree(); """ @@ -1526,7 +1540,7 @@ def PmuEventListFree() -> None: c_PmuEventListFree() -def PmuEventList(eventType: int) -> Iterator[str]: +def PmuEventList(eventType): """ const char** PmuEventList(enum PmuEventType eventType, unsigned *numEvt); """ @@ -1541,7 +1555,7 @@ def PmuEventList(eventType: int) -> Iterator[str]: return (eventList[i].decode(UTF_8) for i in range(c_numEvt.value)) -def PmuEnable(pd: int) -> int: +def PmuEnable(pd): """ int PmuEnable(int pd); """ @@ -1554,7 +1568,7 @@ def PmuEnable(pd: int) -> int: return c_PmuEnable(c_pd) -def PmuDisable(pd: int) -> int: +def PmuDisable(pd): """ int PmuDisable(int pd); """ @@ -1567,7 +1581,7 @@ def PmuDisable(pd: int) -> int: return c_PmuDisable(c_pd) -def PmuCollect(pd: int, milliseconds: int, interval: int) -> int: +def PmuCollect(pd, milliseconds, interval): """ int PmuCollect(int pd, int milliseconds, unsigned interval); """ @@ -1582,7 +1596,7 @@ def PmuCollect(pd: int, milliseconds: int, interval: int) -> int: return c_PmuCollect(c_pd, c_milliseconds, c_interval) -def PmuStop(pd: int) -> None: +def PmuStop(pd): """ void PmuStop(int pd); """ @@ -1595,7 +1609,7 @@ def PmuStop(pd: int) -> None: c_PmuStop(c_pd) -def PmuDataFree(pmuData: ctypes.POINTER(CtypesPmuData)) -> None: +def PmuDataFree(pmuData): """ void PmuDataFree(struct PmuData* pmuData); """ @@ -1605,7 +1619,7 @@ def PmuDataFree(pmuData: ctypes.POINTER(CtypesPmuData)) -> None: c_PmuDataFree(pmuData) -def PmuRead(pd: int) -> PmuData: +def PmuRead(pd): """ int PmuRead(int pd, struct PmuData** pmuData); """ @@ -1619,7 +1633,7 @@ def PmuRead(pd: int) -> PmuData: c_data_len = c_PmuRead(c_pd, ctypes.byref(c_data_pointer)) return PmuData(c_data_pointer, c_data_len) -def ResolvePmuDataSymbol(pmuData: ctypes.POINTER(CtypesPmuData)) -> int: +def ResolvePmuDataSymbol(pmuData): """ int ResolvePmuDataSymbol(struct PmuData* pmuData); """ @@ -1629,8 +1643,7 @@ def ResolvePmuDataSymbol(pmuData: ctypes.POINTER(CtypesPmuData)) -> int: return c_ResolvePmuDataSymbol(pmuData) -def PmuAppendData(fromData: ctypes.POINTER(CtypesPmuData), - toData: ctypes.POINTER(ctypes.POINTER(CtypesPmuData))) -> int: +def PmuAppendData(fromData, toData): """ int PmuAppendData(struct PmuData *fromData, struct PmuData **toData); """ @@ -1641,7 +1654,7 @@ def PmuAppendData(fromData: ctypes.POINTER(CtypesPmuData), return c_PmuAppendData(fromData, toData) -def PmuClose(pd: int) -> None: +def PmuClose(pd): """ void PmuClose(int pd); """ @@ -1654,23 +1667,22 @@ def PmuClose(pd: int) -> None: c_PmuClose(c_pd) -def PmuDumpData(pmuData: PmuData, filepath: str, dumpDwf: int) -> None: +def PmuDumpData(pmuData, filepath, dumpDwf): """ int PmuDumpData(struct PmuData *pmuData, unsigned len, char *filepath, int dumpDwf); """ c_PmuDumpData = kperf_so.PmuDumpData - c_PmuDumpData.argtypes = [ctypes.POINTER(CtypesPmuData), ctypes.c_uint, ctypes.c_char_p, ctypes] + c_PmuDumpData.argtypes = [ctypes.POINTER(CtypesPmuData), ctypes.c_uint, ctypes.c_char_p, ctypes.c_int] c_PmuDumpData.restype = ctypes.c_int c_len = ctypes.c_uint(pmuData.len) c_filepath = ctypes.c_char_p(filepath.encode(UTF_8)) c_dumpDwf = ctypes.c_int(dumpDwf) - c_PmuDumpData(pmuData.pointer, c_len, c_filepath, c_dumpDwf) + c_PmuDumpData(pmuData.pointer(), c_len, c_filepath, c_dumpDwf) -def PmuGetField(rawData: ctypes.POINTER(CtypesSampleRawData), field_name: str, value: ctypes.c_void_p, - vSize: int) -> int: +def PmuGetField(rawData, field_name, value, vSize): """ int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *value, uint32_t vSize); """ @@ -1681,7 +1693,7 @@ def PmuGetField(rawData: ctypes.POINTER(CtypesSampleRawData), field_name: str, v return c_PmuGetField(rawData, field_name.encode(UTF_8), value, vSize) -def PmuGetFieldExp(rawData: ctypes.POINTER(CtypesSampleRawData), field_name: str) -> SampleRawField: +def PmuGetFieldExp(rawData, field_name): """ SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char *fieldName); """ @@ -1694,7 +1706,7 @@ def PmuGetFieldExp(rawData: ctypes.POINTER(CtypesSampleRawData), field_name: str return SampleRawField.from_sample_raw_field(pointer_field.contents) -def PmuDeviceBdfListFree() -> None: +def PmuDeviceBdfListFree(): """ void PmuDeviceBdfListFree() """ @@ -1704,7 +1716,7 @@ def PmuDeviceBdfListFree() -> None: c_PmuDeviceBdfListFree() -def PmuDeviceBdfList(bdf_type: int) -> Iterator[str]: +def PmuDeviceBdfList(bdf_type): """ const char** PmuDeviceBdfList(enum PmuBdfType bdfType, unsigned *numBdf); """ @@ -1717,10 +1729,10 @@ def PmuDeviceBdfList(bdf_type: int) -> Iterator[str]: c_bdf_list = c_PmuDeviceBdfList(c_bdf_type, ctypes.byref(c_num_bdf)) - return [c_bdf_list[i].decode(UTF_8) for i in range(c_num_bdf.value)] + return [c_bdf_list.decode(UTF_8) for i in range(c_num_bdf.value)] -def PmuDeviceOpen(device_attr: List[PmuDeviceAttr]) -> int: +def PmuDeviceOpen(device_attr): """ int PmuDeviceOpen(struct PmuDeviceAttr *deviceAttr, unsigned len); """ @@ -1732,7 +1744,7 @@ def PmuDeviceOpen(device_attr: List[PmuDeviceAttr]) -> int: return c_PmuDeviceOpen(c_device_attr, c_num_device) -def PmuGetDevMetric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) -> PmuDeviceData: +def PmuGetDevMetric(pmu_data, device_attr): """ int PmuGetDevMetric(struct PmuData *pmuData, unsigned pmuLen, struct PmuDeviceAttr *deviceAttr, unsigned len, struct PmuDeviceData *devicedata); @@ -1750,12 +1762,12 @@ def PmuGetDevMetric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) -> PmuD c_device_data = ctypes.POINTER(CtypesPmuDeviceData)() res = c_PmuGetDevMetric(pmu_data.pointer(), len(pmu_data), c_device_attr, num_device, ctypes.byref(c_device_data)) - if res <= 0: + if res <=0: return PmuDeviceData() return PmuDeviceData(c_device_data, res) -def DevDataFree(dev_data: ctypes.POINTER(CtypesPmuDeviceData)) -> None: +def DevDataFree(dev_data): """ void DevDataFree(struct PmuDeviceData *devData); """ @@ -1765,7 +1777,7 @@ def DevDataFree(dev_data: ctypes.POINTER(CtypesPmuDeviceData)) -> None: c_DevDataFree(dev_data) -def PmuGetCpuFreq(core: int) -> int: +def PmuGetCpuFreq(core): """ Get CPU frequency of a specific CPU core. @@ -1781,7 +1793,7 @@ def PmuGetCpuFreq(core: int) -> int: c_PmuGetCpuFreq.restype = ctypes.c_longlong return c_PmuGetCpuFreq(core) -def PmuGetClusterCore(clusterId: int) -> List[int]: +def PmuGetClusterCore(clusterId): """ Get CPU core list of a specific cluster. @@ -1800,7 +1812,7 @@ def PmuGetClusterCore(clusterId: int) -> List[int]: return [c_core_list[i] for i in range(c_num_core)] -def PmuGetNumaCore(numaId: int) -> List[int]: +def PmuGetNumaCore(numaId): """ Get CPU core list of a specific NUMA node. @@ -1819,7 +1831,7 @@ def PmuGetNumaCore(numaId: int) -> List[int]: return [c_core_list[i] for i in range(c_num_core)] -def PmuTraceOpen(traceType: int, pmuTraceAttr: PmuTraceAttr) -> int: +def PmuTraceOpen(traceType, pmuTraceAttr): """ int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr); """ @@ -1831,7 +1843,7 @@ def PmuTraceOpen(traceType: int, pmuTraceAttr: PmuTraceAttr) -> int: return c_PmuTraceOpen(c_traceType, ctypes.byref(pmuTraceAttr.c_pmu_trace_attr)) -def PmuTraceEnable(pd: int) -> int: +def PmuTraceEnable(pd): """ int PmuTraceEnable(int pd); """ @@ -1843,7 +1855,7 @@ def PmuTraceEnable(pd: int) -> int: return c_PmuTraceEnable(c_pd) -def PmuTraceDisable(pd: int) -> int: +def PmuTraceDisable(pd): """ int PmuTraceDisable(int pd); """ @@ -1855,7 +1867,7 @@ def PmuTraceDisable(pd: int) -> int: return c_PmuTraceDisable(c_pd) -def PmuTraceRead(pd: int) -> PmuTraceData: +def PmuTraceRead(pd): """ int PmuTraceRead(int pd, struct PmuTraceData** pmuTraceData); """ @@ -1869,7 +1881,7 @@ def PmuTraceRead(pd: int) -> PmuTraceData: c_data_len = c_PmuTraceRead(c_pd, ctypes.byref(c_data_pointer)) return PmuTraceData(c_data_pointer, c_data_len) -def PmuTraceClose(pd: int) -> None: +def PmuTraceClose(pd): """ void PmuTraceClose(int pd); """ @@ -1881,7 +1893,7 @@ def PmuTraceClose(pd: int) -> None: c_PmuTraceClose(c_pd) -def PmuTraceDataFree(pmuTraceData: ctypes.POINTER(CtypesPmuTraceData)) -> None: +def PmuTraceDataFree(pmuTraceData): """ void PmuTraceDataFree(struct PmuTraceData* pmuTraceData); """ @@ -1890,7 +1902,7 @@ def PmuTraceDataFree(pmuTraceData: ctypes.POINTER(CtypesPmuTraceData)) -> None: c_PmuTraceDataFree.restype = None c_PmuTraceDataFree(pmuTraceData) -def PmuSysCallFuncList() -> Iterator[str]: +def PmuSysCallFuncList(): """ char **PmuSysCallFuncList(unsigned *numFunc); """ @@ -1903,7 +1915,7 @@ def PmuSysCallFuncList() -> Iterator[str]: return (c_func_list[i].decode(UTF_8) for i in range(c_num_func.value)) -def PmuSysCallFuncListFree() -> None: +def PmuSysCallFuncListFree(): """ void PmuSysCallFuncListFree(); """ @@ -1930,12 +1942,12 @@ class CtypesPmuCpuFreqDetail(ctypes.Structure): ] def __init__(self, - cpuId: int = 0, - minFreq: int = 0, - maxFreq: int = 0, - avgFreq: int = 0, - *args:Any, **kw: Any) -> None: - super().__init__(*args, **kw) + cpuId=0, + minFreq=0, + maxFreq=0, + avgFreq=0, + *args, **kw): + super(CtypesPmuCpuFreqDetail, self).__init__(*args, **kw) self.cpuId = ctypes.c_int(cpuId) self.minFreq = ctypes.c_uint64(minFreq) self.maxFreq = ctypes.c_uint64(maxFreq) @@ -1945,11 +1957,11 @@ class CtypesPmuCpuFreqDetail(ctypes.Structure): class ImplPmuCpuFreqDetail: __slots__ = ['__c_pmu_cpu_freq_detail'] def __init__(self, - cpuId: int = 0, - minFreq: int = 0, - maxFreq: int = 0, - avgFreq: int = 0, - *args:Any, **kw: Any) -> None: + cpuId=0, + minFreq=0, + maxFreq=0, + avgFreq=0, + *args, **kw): self.__c_pmu_cpu_freq_detail = CtypesPmuCpuFreqDetail( cpuId=cpuId, minFreq=minFreq, @@ -1958,43 +1970,43 @@ class ImplPmuCpuFreqDetail: ) @property - def c_pmu_cpu_freq_detail(self) -> CtypesPmuCpuFreqDetail: + def c_pmu_cpu_freq_detail(self): return self.__c_pmu_cpu_freq_detail @property - def cpuId(self) -> int: + def cpuId(self): return self.__c_pmu_cpu_freq_detail.cpuId @cpuId.setter - def cpuId(self, cpuId: int) -> None: + def cpuId(self, cpuId): self.__c_pmu_cpu_freq_detail.cpuId = ctypes.c_int(cpuId) @property - def minFreq(self) -> int: + def minFreq(self): return self.__c_pmu_cpu_freq_detail.minFreq @minFreq.setter - def minFreq(self, minFreq: int) -> None: + def minFreq(self, minFreq): self.__c_pmu_cpu_freq_detail.minFreq = ctypes.c_uint64(minFreq) @property - def maxFreq(self) -> int: + def maxFreq(self): return self.__c_pmu_cpu_freq_detail.maxFreq @maxFreq.setter - def maxFreq(self, maxFreq: int) -> None: + def maxFreq(self, maxFreq): self.__c_pmu_cpu_freq_detail.maxFreq = ctypes.c_uint64(maxFreq) @property - def avgFreq(self) -> int: + def avgFreq(self): return self.__c_pmu_cpu_freq_detail.avgFreq @avgFreq.setter - def avgFreq(self, avgFreq: int) -> None: + def avgFreq(self, avgFreq): self.__c_pmu_cpu_freq_detail.avgFreq = ctypes.c_uint64(avgFreq) @classmethod - def from_c_pmu_cpu_freq_detail(cls, c_pmu_cpu_freq_detail: CtypesPmuCpuFreqDetail) -> 'ImplPmuCpuFreqDetail': + def from_c_pmu_cpu_freq_detail(cls, c_pmu_cpu_freq_detail): freq_detail = cls() freq_detail.__c_pmu_cpu_freq_detail = c_pmu_cpu_freq_detail return freq_detail @@ -2003,21 +2015,21 @@ class ImplPmuCpuFreqDetail: class PmuCpuFreqDetail: __slots__ = ['__pointer', '__iter', '__len'] - def __init__(self, pointer: ctypes.POINTER(CtypesPmuCpuFreqDetail) = None, len: int = 0) -> None: + def __init__(self, pointer=None, len=0): self.__pointer = pointer self.__len = len self.__iter = (ImplPmuCpuFreqDetail.from_c_pmu_cpu_freq_detail(self.__pointer[i]) for i in range(self.__len)) @property - def len(self) -> int: + def len(self): return self.__len @property - def iter(self) -> Iterator[ImplPmuCpuFreqDetail]: + def iter(self): return self.__iter -def PmuReadCpuFreqDetail() -> PmuCpuFreqDetail: +def PmuReadCpuFreqDetail(): """ struct PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum); """ @@ -2029,7 +2041,7 @@ def PmuReadCpuFreqDetail() -> PmuCpuFreqDetail: return PmuCpuFreqDetail(c_freq_detail_pointer, c_cpu_len.value) -def PmuOpenCpuFreqSampling(period: int) -> None: +def PmuOpenCpuFreqSampling(period): """ int PmuOpenCpuFreqSampling(unsigned period); """ @@ -2038,7 +2050,7 @@ def PmuOpenCpuFreqSampling(period: int) -> None: c_period = ctypes.c_uint(period) return c_PmuOpenCpuFreqSampling(c_period) -def PmuCloseCpuFreqSampling() -> None: +def PmuCloseCpuFreqSampling(): """ void PmuCloseCpuFreqSampling(); """ diff --git a/python/modules/_libkperf/Symbol.py b/python/modules/_libkperf/Symbol.py index 705f1a317cf43e5c8cc61bc826651d81f1a2832c..f18b9e9ed86a1b05c23456085fd5141fc6ccd52b 100644 --- a/python/modules/_libkperf/Symbol.py +++ b/python/modules/_libkperf/Symbol.py @@ -47,18 +47,18 @@ class CtypesSymbol(ctypes.Structure): ] def __init__(self, - addr: int = 0, - module: str = '', - symbolName: str = '', - mangleName: str = '', - fileName: str = '', - lineNum: int = 0, - offset: int = 0, - codeMapEndAddr: int = 0, - codeMapAddr: int = 0, - count: int = 0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + addr= 0, + module= '', + symbolName= '', + mangleName= '', + fileName= '', + lineNum= 0, + offset= 0, + codeMapEndAddr= 0, + codeMapAddr= 0, + count= 0, + *args, **kw): + super(CtypesSymbol, self).__init__(*args, **kw) self.addr = ctypes.c_ulong(addr) self.module = ctypes.c_char_p(module.encode(UTF_8)) @@ -79,16 +79,16 @@ class Symbol: __slots__ = ['__c_sym'] def __init__(self, - addr: int = 0, - module: str = '', - symbolName: str = '', - mangleName: str = '', - fileName: str = '', - lineNum: int = 0, - offset: int = 0, - codeMapEndAddr: int = 0, - codeMapAddr: int = 0, - count: int = 0) -> None: + addr= 0, + module= '', + symbolName= '', + mangleName= '', + fileName= '', + lineNum= 0, + offset= 0, + codeMapEndAddr= 0, + codeMapAddr= 0, + count= 0): self.__c_sym = CtypesSymbol( addr=addr, module=module, @@ -103,91 +103,91 @@ class Symbol: ) @property - def c_sym(self) -> CtypesSymbol: + def c_sym(self): return self.__c_sym @property - def addr(self) -> int: + def addr(self): return self.c_sym.addr @addr.setter - def addr(self, addr: int) -> None: + def addr(self, addr): self.c_sym.addr = ctypes.c_ulong(addr) @property - def module(self) -> str: + def module(self): return self.c_sym.module.decode(UTF_8) @module.setter - def module(self, module: str) -> None: + def module(self, module): self.c_sym.module = ctypes.c_char_p(module.encode(UTF_8)) @property - def symbolName(self) -> str: + def symbolName(self): return self.c_sym.symbolName.decode(UTF_8) @symbolName.setter - def symbolName(self, symbolName: str) -> None: + def symbolName(self, symbolName): self.c_sym.symbolName = ctypes.c_char_p(symbolName.encode(UTF_8)) @property - def mangleName(self) -> str: + def mangleName(self): return self.c_sym.mangleName.decode(UTF_8) @mangleName.setter - def mangleName(self, mangleName: str) -> None: + def mangleName(self, mangleName): self.c_sym.mangleName = ctypes.c_char_p(mangleName.encode(UTF_8)) @property - def fileName(self) -> str: + def fileName(self): return self.c_sym.fileName.decode(UTF_8) @fileName.setter - def fileName(self, fileName: str) -> None: + def fileName(self, fileName): self.c_sym.fileName = ctypes.c_char_p(fileName.encode(UTF_8)) @property - def lineNum(self) -> int: + def lineNum(self): return self.c_sym.lineNum @lineNum.setter - def lineNum(self, lineNum: int) -> None: + def lineNum(self, lineNum): self.c_sym.lineNum = ctypes.c_uint(lineNum) @property - def offset(self) -> int: + def offset(self): return self.c_sym.offset @offset.setter - def offset(self, offset: int) -> None: + def offset(self, offset): self.c_sym.offset = ctypes.c_ulong(offset) @property - def codeMapEndAddr(self) -> int: + def codeMapEndAddr(self): return self.c_sym.codeMapEndAddr @codeMapEndAddr.setter - def codeMapEndAddr(self, codeMapEndAddr: int) -> None: + def codeMapEndAddr(self, codeMapEndAddr): self.c_sym.codeMapEndAddr = ctypes.c_ulong(codeMapEndAddr) @property - def codeMapAddr(self) -> int: + def codeMapAddr(self): return self.c_sym.codeMapAddr @codeMapAddr.setter - def codeMapAddr(self, codeMapAddr: int) -> None: + def codeMapAddr(self, codeMapAddr): self.c_sym.codeMapAddr = ctypes.c_ulong(codeMapAddr) @property - def count(self) -> int: + def count(self): return self.c_sym.count @count.setter - def count(self, count: int) -> None: + def count(self, count): self.c_sym.count = ctypes.c_uint64(count) @classmethod - def from_c_sym(cls, c_sym: CtypesSymbol) -> 'Symbol': + def from_c_sym(cls, c_sym): symbol = cls() symbol.__c_sym = c_sym return symbol @@ -213,15 +213,15 @@ CtypesStack._fields_ = [ ] -class Stack: +class Stack(object): __slots__ = ['__c_stack'] def __init__(self, - symbol: Symbol = None, - next: 'Stack' = None, - prev: 'Stack' = None, - count: int = 0) -> None: + symbol= None, + next = None, + prev = None, + count= 0): self.__c_stack = CtypesStack( symbol=symbol.c_sym if symbol else None, next=next.c_stack if next else None, @@ -230,45 +230,45 @@ class Stack: ) @property - def c_stack(self) -> CtypesStack: + def c_stack(self): return self.__c_stack @property - def symbol(self) -> Symbol: + def symbol(self): return Symbol.from_c_sym(self.c_stack.symbol.contents) if self.c_stack.symbol else None @symbol.setter - def symbol(self, symbol: Symbol) -> None: + def symbol(self, symbol): self.c_stack.symbol = symbol.c_sym if symbol else None @property - def next(self) -> 'Stack': + def next(self): return self.from_c_stack(self.c_stack.next.contents) if self.c_stack.next else None @next.setter - def next(self, next: 'Stack') -> None: + def next(self, next): self.c_stack.next = next.c_stack if next else None @property - def prev(self) -> 'Stack': + def prev(self): return self.from_c_stack(self.c_stack.prev.contents) if self.c_stack.prev else None @prev.setter - def prev(self, prev: 'Stack') -> None: + def prev(self, prev): self.c_stack.prev = prev.c_stack if prev else None @property - def count(self) -> int: + def count(self): return self.c_stack.count @count.setter - def count(self, count: int) -> None: + def count(self, count): self.c_stack.count = ctypes.c_uint64(count) @classmethod - def from_c_stack(cls, c_stack: CtypesStack) -> 'Stack': + def from_c_stack(cls, c_stack): stack = cls() stack.__c_stack = c_stack return stack @@ -292,12 +292,12 @@ class CtypesAsmCode(ctypes.Structure): ] def __init__(self, - addr: int = 0, - code: str = '', - fileName: str = '', - lineNum: int = 0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + addr= 0, + code= '', + fileName= '', + lineNum= 0, + *args, **kw): + super(CtypesAsmCode, self).__init__(*args, **kw) self.addr = ctypes.c_ulong(addr) self.code = ctypes.c_char_p(code.encode(UTF_8)) self.fileName = ctypes.c_char_p(fileName.encode(UTF_8)) @@ -309,10 +309,10 @@ class AsmCode: __slots__ = ['__c_asm_code'] def __init__(self, - addr: int = 0, - code: str = '', - fileName: str = '', - lineNum: int = 0) -> None: + addr= 0, + code= '', + fileName= '', + lineNum= 0): self.__c_asm_code = CtypesAsmCode( addr=addr, code=code, @@ -321,43 +321,43 @@ class AsmCode: ) @property - def c_asm_code(self) -> CtypesAsmCode: + def c_asm_code(self): return self.__c_asm_code @property - def addr(self) -> int: + def addr(self): return self.c_asm_code.addr @addr.setter - def addr(self, addr: int) -> None: + def addr(self, addr): self.c_asm_code.addr = ctypes.c_ulong(addr) @property - def code(self) -> str: + def code(self): return self.c_asm_code.code.decode(UTF_8) @code.setter - def code(self, code: str) -> None: + def code(self, code): self.c_asm_code.code = ctypes.c_char_p(code.encode(UTF_8)) @property - def fileName(self) -> str: + def fileName(self): return self.c_asm_code.fileName.decode(UTF_8) @fileName.setter - def fileName(self, fileName: str) -> None: + def fileName(self, fileName): self.c_asm_code.fileName = ctypes.c_char_p(fileName.encode(UTF_8)) @property - def lineNum(self) -> int: + def lineNum(self): return self.c_asm_code.lineNum @lineNum.setter - def lineNum(self, lineNum: int) -> None: + def lineNum(self, lineNum): self.c_asm_code.lineNum = ctypes.c_uint(lineNum) @classmethod - def from_c_asm_code(cls, c_asm_code: CtypesAsmCode) -> 'AsmCode': + def from_c_asm_code(cls, c_asm_code): asm_code = cls() asm_code.__c_asm_code = c_asm_code return asm_code @@ -383,13 +383,13 @@ class CtypesStackAsm(ctypes.Structure): ] def __init__(self, - fileName: str = '', - funcStartAddr: int = 0, - functFileOffset: int = 0, - next: 'CtypesStackAsm' = None, - asmCode: CtypesAsmCode = None, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + fileName= '', + funcStartAddr= 0, + functFileOffset= 0, + next = None, + asmCode= None, + *args, **kw): + super(CtypesStackAsm, self).__init__(*args, **kw) self.fileName = ctypes.c_char_p(fileName.encode(UTF_8)) self.funcStartAddr = ctypes.c_ulong(funcStartAddr) self.functFileOffset = ctypes.c_ulong(functFileOffset) @@ -402,11 +402,11 @@ class StackAsm: __slots__ = ['__c_stack_asm'] def __init__(self, - fileName: str = '', - funcStartAddr: int = 0, - functFileOffset: int = 0, - next: 'StackAsm' = None, - asmCode: AsmCode = None) -> None: + fileName= '', + funcStartAddr= 0, + functFileOffset= 0, + next = None, + asmCode= None): self.__c_stack_asm = CtypesStackAsm( fileName=fileName, funcStartAddr=funcStartAddr, @@ -416,51 +416,51 @@ class StackAsm: ) @property - def c_stack_asm(self) -> CtypesStackAsm: + def c_stack_asm(self): return self.__c_stack_asm @property - def fileName(self) -> str: + def fileName(self): return self.c_stack_asm.fileName.decode(UTF_8) @fileName.setter - def fileName(self, fileName: str) -> None: + def fileName(self, fileName): self.c_stack_asm.fileName = ctypes.c_char_p(fileName.encode(UTF_8)) @property - def funcStartAddr(self) -> int: + def funcStartAddr(self): return self.c_stack_asm.funcStartAddr @funcStartAddr.setter - def funcStartAddr(self, funcStartAddr: int) -> None: + def funcStartAddr(self, funcStartAddr): self.c_stack_asm.funcStartAddr = ctypes.c_ulong(funcStartAddr) @property - def functFileOffset(self) -> int: + def functFileOffset(self): return self.c_stack_asm.functFileOffset @functFileOffset.setter - def functFileOffset(self, functFileOffset: int) -> None: + def functFileOffset(self, functFileOffset): self.c_stack_asm.functFileOffset = ctypes.c_ulong(functFileOffset) @property - def next(self) -> 'StackAsm': + def next(self): return self.from_c_stack_asm(self.c_stack_asm.next.contents) if self.c_stack_asm.next else None @next.setter - def next(self, next: 'StackAsm') -> None: + def next(self, next): self.c_stack_asm.next = next.c_stack_asm if next else None @property - def asmCode(self) -> AsmCode: + def asmCode(self): return AsmCode.from_c_asm_code(self.c_stack_asm.asmCode.contents) if self.c_stack_asm.asmCode else None @asmCode.setter - def asmCode(self, asmCode: AsmCode) -> None: + def asmCode(self, asmCode): self.c_stack_asm.asmCode = asmCode.c_asm_code if asmCode else None @classmethod - def from_c_stack_asm(cls, c_stack_asm: CtypesStackAsm) -> 'StackAsm': + def from_c_stack_asm(cls, c_stack_asm): stack_asm = cls() stack_asm.__c_stack_asm = c_stack_asm return stack_asm @@ -492,15 +492,15 @@ class CtypesProcTopology(ctypes.Structure): ] def __init__(self, - pid: int = 0, - tid: int = 0, - ppid: int = 0, - childPid: List[int] = None, - comm: str = '', - exe: str = '', - kernel: bool = False, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + pid= 0, + tid= 0, + ppid= 0, + childPid= None, + comm= '', + exe= '', + kernel= False, + *args, **kw): + super(CtypesProcTopology, self).__init__(*args, **kw) self.pid = ctypes.c_int(pid) self.tid = ctypes.c_int(tid) self.ppid = ctypes.c_int(ppid) @@ -521,13 +521,13 @@ class ProcTopology: __slots__ = ['__c_proc_topology'] def __init__(self, - pid: int = 0, - tid: int = 0, - ppid: int = 0, - childPid: List[int] = None, - comm: str = '', - exe: str = '', - kernel: bool = False) -> None: + pid= 0, + tid= 0, + ppid= 0, + childPid= None, + comm= '', + exe= '', + kernel= False): self.__c_proc_topology = CtypesProcTopology( pid = pid, tid=tid, @@ -539,44 +539,44 @@ class ProcTopology: ) @property - def c_proc_topology(self) -> CtypesProcTopology: + def c_proc_topology(self): return self.__c_proc_topology @property - def pid(self) -> int: + def pid(self): return self.c_proc_topology.pid @pid.setter - def pid(self, pid: int) -> None: + def pid(self, pid): self.c_proc_topology.pid = ctypes.c_int(pid) @property - def tid(self) -> int: + def tid(self): return self.c_proc_topology.tid @tid.setter - def tid(self, tid: int) -> None: + def tid(self, tid): self.c_proc_topology.tid = ctypes.c_int(tid) @property - def ppid(self) -> int: + def ppid(self): return self.c_proc_topology.ppid @ppid.setter - def ppid(self, ppid: int) -> None: + def ppid(self, ppid): self.c_proc_topology.ppid = ctypes.c_int(ppid) @property - def numChild(self) -> int: + def numChild(self): return self.c_proc_topology.numChild @property - def childPid(self) -> List[int]: + def childPid(self): return [self.c_proc_topology.childPid[i] for i in range(self.numChild)] @childPid.setter - def childPid(self, childPid: List[int]) -> None: + def childPid(self, childPid): if childPid: numChildPid = len(childPid) self.c_proc_topology.childPid = (ctypes.c_int * numChildPid)(*childPid) @@ -586,29 +586,29 @@ class ProcTopology: self.c_proc_topology.numChild = ctypes.c_int(0) @property - def comm(self) -> str: + def comm(self): return self.c_proc_topology.comm.decode(UTF_8) @comm.setter - def comm(self, comm: str) -> None: + def comm(self, comm): self.c_proc_topology.comm = ctypes.c_char_p(comm.encode(UTF_8)) @property - def exe(self) -> str: + def exe(self): return self.c_proc_topology.exe.decode(UTF_8) @exe.setter - def exe(self, exe: str) -> None: + def exe(self, exe): self.c_proc_topology.exe = ctypes.c_char_p(exe.encode(UTF_8)) @classmethod - def from_c_proc_topology(cls, c_proc_topology: CtypesProcTopology) -> 'ProcTopology': + def from_c_proc_topology(cls, c_proc_topology): proc_topology = cls() proc_topology.__c_proc_topology = c_proc_topology return proc_topology -def SymResolverRecordKernel() -> None: +def SymResolverRecordKernel(): """ int SymResolverRecordKernel(); """ @@ -619,7 +619,7 @@ def SymResolverRecordKernel() -> None: c_SymResolverRecordKernel() -def SymResolverRecordModule(pid: int) -> None: +def SymResolverRecordModule(pid): """ int SymResolverRecordModule(int pid); """ @@ -632,7 +632,7 @@ def SymResolverRecordModule(pid: int) -> None: c_SymResolverRecordModule(c_pid) -def SymResolverRecordModuleNoDwarf(pid: int) -> None: +def SymResolverRecordModuleNoDwarf(pid): """ int SymResolverRecordModuleNoDwarf(int pid); """ @@ -645,7 +645,7 @@ def SymResolverRecordModuleNoDwarf(pid: int) -> None: c_SymResolverRecordModuleNoDwarf(c_pid) -def StackToHash(pid: int, stackList: List[int]) -> Stack: +def StackToHash(pid, stackList): """ struct Stack* StackToHash(int pid, unsigned long* stack, int nr); """ @@ -664,7 +664,7 @@ def StackToHash(pid: int, stackList: List[int]) -> Stack: return Stack.from_c_stack(c_stack.contents) -def SymResolverMapAddr(pid: int, addr: int) -> Symbol: +def SymResolverMapAddr(pid, addr): """ struct Symbol* SymResolverMapAddr(int pid, unsigned long addr); """ @@ -681,7 +681,7 @@ def SymResolverMapAddr(pid: int, addr: int) -> Symbol: return Symbol.from_c_sym(c_sym.contents) -def FreeModuleData(pid: int) -> None: +def FreeModuleData(pid): """ void FreeModuleData(int pid); """ @@ -694,7 +694,7 @@ def FreeModuleData(pid: int) -> None: c_FreeModuleData(c_pid) -def SymResolverDestroy() -> None: +def SymResolverDestroy(): """ void SymResolverDestroy(); """ diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index 20776de6a05a9301ac787d816e54c528b6bd9ac1..5806a155e0a1aa6ae36e4a55da8c0dabc80c5f1a 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -119,28 +119,28 @@ class Error: LIBPERF_WARN_PCIE_BIOS_NOT_NEWEST = 1003 LIBPERF_WARN_INVALID_SMMU_BDF = 1004 -def errorno() -> int: +def errorno(): """ Obtaining error codes """ return _libkperf.Perrorno() -def error()-> str: +def error(): """ Obtaining Error Information """ return _libkperf.Perror() -def get_warn() -> int: +def get_warn(): """ Get warning codes """ return _libkperf.GetWarn() -def get_warn_msg()-> str: +def get_warn_msg(): """ Get warning message """ diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 2f0a2b5235d6e80cfbb70e0ff92b74a7fda94dd4..cce26c5ab94f3576d26f1a7eb10eccb1fd4d6f3c 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -107,12 +107,12 @@ class SymbolMode: RESOLVE_ELF_DWARF = 2 # Resolve elf and dwarf. All fields in Symbol will be valid. class PmuDeviceMetric: - # Pernuma metric. - # Collect ddr read bandwidth for each numa node. + # Perchannel metric. + # Collect ddr read bandwidth for each channel. # Unit: Bytes/s PMU_DDR_READ_BW = 0 - # Pernuma metric. - # Collect ddr write bandwidth for each numa node. + # Perchannel metric. + # Collect ddr write bandwidth for each channel. # Unit: Bytes/s PMU_DDR_WRITE_BW = 1 # Percore metric. @@ -127,8 +127,8 @@ class PmuDeviceMetric: # Collect L3 total reference count, including miss and hit count. # Unit: count PMU_L3_REF = 4 - # Pernuma metric. - # Collect L3 total latency for each numa node. + # Percluster metric. + # Collect L3 total latency for each cluster node. # Unit: cycles PMU_L3_LAT = 5 # Collect pcie rx bandwidth. @@ -146,6 +146,12 @@ class PmuDeviceMetric: # Collect smmu address transaction. # Unit: count PMU_SMMU_TRAN = 10 + # Pernuma metric. + # Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA = 11 + # Pernuma metric. + # Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET = 12 class PmuDeviceAttr(_libkperf.PmuDeviceAttr): """ @@ -158,7 +164,7 @@ class PmuDeviceAttr(_libkperf.PmuDeviceAttr): }; """ def __init__(self, metric, bdf=None): - super().__init__( + super(PmuDeviceAttr, self).__init__( metric=metric, bdf=bdf ) @@ -247,23 +253,23 @@ class PmuAttr(_libkperf.PmuAttr): includeNewFork: In count mode, enable it you can get the new child thread count, default is disabled. """ def __init__(self, - evtList: List[str] = None, - pidList: List[int] = None, - cpuList: List[int] = None, - evtAttr: List[_libkperf.CtypesEvtAttr] = None, - sampleRate: int = 0, - useFreq: bool = False, - excludeUser: bool = False, - excludeKernel: bool = False, - symbolMode: int = 0, - callStack: bool = False, - blockedSample: bool = False, - dataFilter: int = 0, - evFilter: int = 0, - minLatency: int = 0, - includeNewFork: bool = False, - branchSampleFilter: int = 0) -> None: - super().__init__( + evtList = None, + pidList = None, + cpuList = None, + evtAttr = None, + sampleRate = 0, + useFreq = False, + excludeUser = False, + excludeKernel = False, + symbolMode = 0, + callStack = False, + blockedSample = False, + dataFilter = 0, + evFilter = 0, + minLatency = 0, + includeNewFork = False, + branchSampleFilter = 0): + super(PmuAttr, self).__init__( evtList=evtList, pidList=pidList, cpuList=cpuList, @@ -320,10 +326,10 @@ class PmuTraceAttr(_libkperf.PmuTraceAttr): }; """ def __init__(self, - funcs: List[str] = None, - pidList: List[int] = None, - cpuList: List[int] = None) -> None: - super().__init__( + funcs = None, + pidList = None, + cpuList = None): + super(PmuTraceAttr, self).__init__( funcs=funcs, pidList=pidList, cpuList=cpuList @@ -335,7 +341,7 @@ class ImplPmuTraceData(_libkperf.ImplPmuTraceData): class PmuTraceData(_libkperf.PmuTraceData): pass -def open(collect_type: PmuTaskType, pmu_attr: PmuAttr) -> int: +def open(collect_type, pmu_attr): """ Initialize the collection target. On success, a task id is returned which is the unique identifier of the task. @@ -348,7 +354,7 @@ def open(collect_type: PmuTaskType, pmu_attr: PmuAttr) -> int: return _libkperf.PmuOpen(int(collect_type), pmu_attr) -def event_list(event_type: PmuEventType)-> Iterator[str]: +def event_list(event_type): """ Query all available event from system. :param event_type: type of event chosen by user @@ -357,7 +363,7 @@ def event_list(event_type: PmuEventType)-> Iterator[str]: return _libkperf.PmuEventList(int(event_type)) -def enable(pd: int)-> int: +def enable(pd): """ Enable counting or sampling of task . On success, 0 is returned. @@ -368,7 +374,7 @@ def enable(pd: int)-> int: return _libkperf.PmuEnable(pd) -def disable(pd: int)-> int: +def disable(pd): """ Disable counting or sampling of task . On success, 0 is returned. @@ -379,7 +385,7 @@ def disable(pd: int)-> int: return _libkperf.PmuDisable(pd) -def read(pd: int) -> PmuData: +def read(pd): """ Collect data. Pmu data are collected starting from the last PmuEnable or PmuRead. @@ -390,7 +396,7 @@ def read(pd: int) -> PmuData: """ return _libkperf.PmuRead(pd) -def resolvePmuDataSymbol(pmuData: PmuData) -> int: +def resolvePmuDataSymbol(pmuData): """ when kperf symbol mode is NO_SYMBOL_RESOLVE during PmuRead(), this function can be used to resolve stack symbols :param: pmuData @@ -399,7 +405,7 @@ def resolvePmuDataSymbol(pmuData: PmuData) -> int: return _libkperf.ResolvePmuDataSymbol(pmuData.pointer()) -def stop(pd: int) -> None: +def stop(pd): """ stop a sampling task in asynchronous mode :param pd: task id. @@ -407,7 +413,7 @@ def stop(pd: int) -> None: return _libkperf.PmuStop(pd) -def close(pd: int) -> None: +def close(pd): """ Close task with id . After PmuClose is called, all pmu data related to the task become invalid. @@ -416,7 +422,7 @@ def close(pd: int) -> None: return _libkperf.PmuClose(pd) -def dump(pmuData: PmuData, filepath: str, dump_dwf: int) -> None: +def dump(pmuData, filepath, dump_dwf): """ /** Dump pmu data to a specific file. @@ -431,7 +437,7 @@ def dump(pmuData: PmuData, filepath: str, dump_dwf: int) -> None: return _libkperf.PmuDumpData(pmuData, filepath, dump_dwf) -def get_field(pmu_data: _libkperf.ImplPmuData, field_name: str, value: c_void_p) -> int: +def get_field(pmu_data, field_name, value): """ get field value of trace pointer named field_name :param pmu_data: _libkperf.ImplPmuData @@ -442,7 +448,7 @@ def get_field(pmu_data: _libkperf.ImplPmuData, field_name: str, value: c_void_p) return _libkperf.PmuGetField(pmu_data.rawData.c_pmu_data_rawData, field_name, value, sizeof(value)) -def get_field_exp(pmu_data: _libkperf.ImplPmuData, field_name: str) -> SampleRawField: +def get_field_exp(pmu_data, field_name): """ get the field detail of trace pointer event :param pmu_data: the _libkperf.ImplPmuData @@ -451,7 +457,7 @@ def get_field_exp(pmu_data: _libkperf.ImplPmuData, field_name: str) -> SampleRaw """ return _libkperf.PmuGetFieldExp(pmu_data.rawData.c_pmu_data_rawData, field_name) -def device_bdf_list(bdf_type: PmuBdfType) -> Iterator[str]: +def device_bdf_list(bdf_type): """ Query all available BDF (Bus:Device.Function) list from system. :param bdf_type: type of bdf chosen by user @@ -459,7 +465,7 @@ def device_bdf_list(bdf_type: PmuBdfType) -> Iterator[str]: """ return _libkperf.PmuDeviceBdfList(int(bdf_type)) -def device_open(device_attr: List[PmuDeviceAttr]) -> int: +def device_open(device_attr): """ A high level interface for initializing PMU events for devices, such as L3 cache, DDRC, PCIe, and SMMU, to collect metrics like bandwidth, latency, and others. @@ -469,7 +475,7 @@ def device_open(device_attr: List[PmuDeviceAttr]) -> int: """ return _libkperf.PmuDeviceOpen(device_attr) -def get_device_metric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) -> PmuDeviceData: +def get_device_metric(pmu_data, device_attr): """ Get device performance metric data from pmu data :param pmu_data: raw data collected by pmu @@ -479,7 +485,7 @@ def get_device_metric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) -> Pm return _libkperf.PmuGetDevMetric(pmu_data, device_attr) -def get_cpu_freq(core: int) -> int: +def get_cpu_freq(core): """ Get cpu frequency :param core: cpu core id @@ -488,7 +494,7 @@ def get_cpu_freq(core: int) -> int: return _libkperf.PmuGetCpuFreq(core) -def get_cluster_core(clusterId: int) -> List[int]: +def get_cluster_core(clusterId): """ Get the list of core in a cluster :param cluster: cluster id @@ -496,7 +502,7 @@ def get_cluster_core(clusterId: int) -> List[int]: """ return _libkperf.PmuGetClusterCore(clusterId) -def get_numa_core(numaId: int) -> List[int]: +def get_numa_core(numaId): """ Get the list of core in a numa node :param numaId: numa node id @@ -504,37 +510,37 @@ def get_numa_core(numaId: int) -> List[int]: """ return _libkperf.PmuGetNumaCore(numaId) -def trace_open(trace_type: PmuTraceType, pmu_trace_attr: PmuTraceAttr) -> int: +def trace_open(trace_type, pmu_trace_attr): """ int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr); """ return _libkperf.PmuTraceOpen(int(trace_type), pmu_trace_attr) -def trace_enable(pd: int) -> int: +def trace_enable(pd): """ int PmuTraceEnable(int pd); """ return _libkperf.PmuTraceEnable(pd) -def trace_disable(pd: int) -> int: +def trace_disable(pd): """ int PmuTraceDisable(int pd); """ return _libkperf.PmuTraceDisable(pd) -def trace_read(pd: int) -> PmuTraceData: +def trace_read(pd): """ int PmuTraceRead(int pd, struct PmuTraceData **traceData); """ return _libkperf.PmuTraceRead(pd) -def trace_close(pd: int) -> None: +def trace_close(pd): """ void PmuTraceClose(int pd); """ return _libkperf.PmuTraceClose(pd) -def sys_call_func_list() -> Iterator[str]: +def sys_call_func_list(): """ get the system call function list :return: system call function list @@ -544,13 +550,13 @@ def sys_call_func_list() -> Iterator[str]: class CpuFreqDetail(_libkperf.PmuCpuFreqDetail): pass -def open_cpu_freq_sampling(period: int) -> None: +def open_cpu_freq_sampling(period): return _libkperf.PmuOpenCpuFreqSampling(period) -def close_cpu_freq_sampling() -> None: +def close_cpu_freq_sampling(): return _libkperf.PmuCloseCpuFreqSampling() -def read_cpu_freq_detail() -> CpuFreqDetail: +def read_cpu_freq_detail(): return _libkperf.PmuReadCpuFreqDetail() __all__ = [ diff --git a/python/modules/ksym/symbol.py b/python/modules/ksym/symbol.py index 0e7099e1afc9b1823fb21ffef8d72963e355d6c7..dfc34fcf29608f033fabebfae695151d57a71f40 100644 --- a/python/modules/ksym/symbol.py +++ b/python/modules/ksym/symbol.py @@ -20,16 +20,16 @@ import _libkperf class Symbol(_libkperf.Symbol): def __init__(self, - addr: int = 0, - module: str = '', - symbolName: str = '', - fileName: str = '', - lineNum: int = 0, - offset: int = 0, - codeMapEndAddr: int = 0, - codeMapAddr: int = 0, - count: int = 0) -> None: - super().__init__( + addr = 0, + module = '', + symbolName = '', + fileName = '', + lineNum = 0, + offset = 0, + codeMapEndAddr = 0, + codeMapAddr = 0, + count = 0): + super(Symbol, self).__init__( addr=addr, module=module, symbolName=symbolName, @@ -45,11 +45,11 @@ class Symbol(_libkperf.Symbol): class Stack(_libkperf.Stack): def __init__(self, - symbol: Symbol = None, - next: 'Stack' = None, - prev: 'Stack' = None, - count: int = 0) -> None: - super().__init__( + symbol = None, + next = None, + prev = None, + count = 0): + super(Stack, self).__init__( symbol=symbol.c_sym if symbol else None, next=next.c_stack if next else None, prev=prev.c_stack if prev else None, @@ -57,39 +57,39 @@ class Stack(_libkperf.Stack): ) -def record_kernel() -> None: +def record_kernel(): _libkperf.SymResolverRecordKernel() -def record_module(pid: int, dwarf: bool = True) -> None: +def record_module(pid, dwarf = True): if dwarf: _libkperf.SymResolverRecordModule(pid) else: _libkperf.SymResolverRecordModuleNoDwarf(pid) -def get_stack(pid: int, stacks: List[int]) -> Iterator[Stack]: +def get_stack(pid, stacks): """ Convert a callstack to an unsigned long long hashid """ return _libkperf.StackToHash(pid, stacks) -def get_symbol(pid: int, addr: int) -> Symbol: +def get_symbol(pid, addr): """ Map a specific address to a symbol """ return _libkperf.SymResolverMapAddr(pid, addr) -def free_module(pid: int) -> None: +def free_module(pid): """ free pid module data """ _libkperf.FreeModuleData(pid) -def destroy() -> None: +def destroy(): _libkperf.SymResolverDestroy() @@ -102,4 +102,4 @@ __all__ = [ 'get_symbol', 'free_module', 'destroy', -] +] \ No newline at end of file diff --git a/python/tests/test_metric.py b/python/tests/test_metric.py index bf653ce808d95364bc75264ed3ad0a2cda85915a..90c254be317d202ad78bd4a0523b726996d61d92 100644 --- a/python/tests/test_metric.py +++ b/python/tests/test_metric.py @@ -256,6 +256,28 @@ def test_get_metric_smmu_transaction(): print_dev_data_details(dev_data) kperf.close(pd) +def test_collect_hha_cross(): + dev_attr = [ + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA) + ] + pd = kperf.device_open(dev_attr) + print(kperf.error()) + assert pd != -1, f"Expected non-negative pd, but got {pd}" + kperf.enable(pd) + time.sleep(1) + kperf.disable(pd) + ori_data = kperf.read(pd) + assert len(ori_data) != -1, f"Expected non-negative ori_len, but got {len(ori_data)}" + + dev_data = kperf.get_device_metric(ori_data, dev_attr) + assert dev_data[0].metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET + assert dev_data[0].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + assert dev_data[-1].metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA + assert dev_data[-1].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + print_dev_data_details(dev_data) + kperf.close(pd) + if __name__ == '__main__': # 提示用户使用pytest 运行测试文件 print("This is a pytest script. Run it using the 'pytest' command.") diff --git a/symbol/CMakeLists.txt b/symbol/CMakeLists.txt index aaa8988e1e967e5d72bd7c9f6f503bbbc975ab34..33a14d21b3f32b2a990b42475aed5958261bb8d0 100644 --- a/symbol/CMakeLists.txt +++ b/symbol/CMakeLists.txt @@ -8,7 +8,7 @@ set(SYMBOL_FILE_DIR ${PROJECT_TOP_DIR}/symbol) set(INCLUDE_DIR ${PROJECT_TOP_DIR}/include) set(UTIL_FILE_DIR ${PROJECT_TOP_DIR}/util) -file(GLOB SYMBOL_SRC ${SYMBOL_FILE_DIR}/*c ${SYMBOL_FILE_DIR}/*cpp ${UTIL_FILE_DIR}/pcerr.cpp) +file(GLOB SYMBOL_SRC ${SYMBOL_FILE_DIR}/*c ${SYMBOL_FILE_DIR}/*cpp ${UTIL_FILE_DIR}/pcerr.cpp ${UTIL_FILE_DIR}/common.cpp) include_directories(${UTIL_FILE_DIR}) include_directories(${SYMBOL_FILE_DIR}) diff --git a/symbol/symbol_resolve.cpp b/symbol/symbol_resolve.cpp index 2f971f402e24961ef9b1fb126f6ca62621a239d3..b9f264dd7fc1f361e61cc1bb94da1603726fbbbe 100644 --- a/symbol/symbol_resolve.cpp +++ b/symbol/symbol_resolve.cpp @@ -25,6 +25,7 @@ #include "name_resolve.h" #include "pcerr.h" #include "symbol_resolve.h" +#include "common.h" using namespace KUNPENG_SYM; constexpr __u64 MAX_LINE_LENGTH = 1024; @@ -671,13 +672,11 @@ int SymbolResolve::RecordElf(const char* fileName) } this->elfMap.emplace(file, myElf); } catch (std::exception& error) { - close(fd); pcerr::New(LIBSYM_ERR_ELFIN_FOMAT_FAILED, "libsym record elf format error: " + std::string{error.what()}); elfSafeHandler.releaseLock(file); return LIBSYM_ERR_ELFIN_FOMAT_FAILED; } - - close(fd); + pcerr::New(0, "success"); elfSafeHandler.releaseLock(file); return 0; @@ -719,14 +718,12 @@ int SymbolResolve::RecordDwarf(const char* fileName) efLoader.reset(); } catch (std::exception& error) { - close(fd); dwarfSafeHandler.releaseLock((file)); pcerr::New(LIBSYM_ERR_DWARF_FORMAT_FAILED, "libsym record dwarf file named " + file + " format error: " + std::string{error.what()}); return LIBSYM_ERR_DWARF_FORMAT_FAILED; } - close(fd); pcerr::New(0, "success"); dwarfSafeHandler.releaseLock((file)); return 0; @@ -1137,6 +1134,12 @@ struct StackAsm* SymbolResolve::MapAsmCodeStack( { char startAddrStr[ADDR_LEN]; char endAddrStr[ADDR_LEN]; + + if (!ExistPath(moduleName)) { + pcerr::New(LIBSYM_ERR_FILE_INVALID, "file does not exist"); + return nullptr; + } + if (startAddr >= endAddr) { pcerr::New(LIBSYM_ERR_START_SMALLER_END, "libysm the end address must be greater than the start address"); return nullptr; @@ -1150,6 +1153,7 @@ struct StackAsm* SymbolResolve::MapAsmCodeStack( pcerr::New(LIBSYM_ERR_SNPRINF_OPERATE_FAILED, "libsym fails to execute snprintf"); return nullptr; } + std::string cmd = "objdump -Fld " + moduleName + " --start-address=" + std::string{startAddrStr} + " --stop-address=" + std::string{endAddrStr}; FILE* pipe = popen(cmd.c_str(), "r"); diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index d10ca39265bd6c9a9ffacb35661be5085b9c7d0f..d6b5e10dedab7c9806d175e9f129d297b22a0d47 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -37,7 +37,7 @@ TEST_F(TestMetric, GetInvalidBdfList) enum PmuBdfType bdfType = (enum PmuBdfType)5; unsigned bdfLen = 0; const char** bdfList = PmuDeviceBdfList(bdfType, &bdfLen); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_EQ(bdfList, nullptr); } @@ -46,7 +46,7 @@ TEST_F(TestMetric, GetPcieBdfList) enum PmuBdfType bdfType = PMU_BDF_TYPE_PCIE; unsigned bdfLen = 0; const char** bdfList = PmuDeviceBdfList(bdfType, &bdfLen); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(bdfList, nullptr); } @@ -55,7 +55,7 @@ TEST_F(TestMetric, GetSmmuBdfList) enum PmuBdfType bdfType = PMU_BDF_TYPE_SMMU; unsigned bdfLen = 0; const char** bdfList = PmuDeviceBdfList(bdfType, &bdfLen); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(bdfList, nullptr); } @@ -63,7 +63,7 @@ TEST_F(TestMetric, GetCpuFreq) { unsigned core = 6; int64_t cpu6Freq = PmuGetCpuFreq(core); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(cpu6Freq, -1); } @@ -72,12 +72,8 @@ TEST_F(TestMetric, GetClusterIdListSuccess) unsigned clusterId = 3; unsigned* coreList = nullptr; int len = PmuGetClusterCore(clusterId, &coreList); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(len, -1); - for (int i = 0; i < len; ++i) { - cout << coreList[i] << " "; - } - cout << endl; } TEST_F(TestMetric, GetClusterIdListOverSize) @@ -85,7 +81,7 @@ TEST_F(TestMetric, GetClusterIdListOverSize) unsigned clusterId = 33; unsigned* coreList = nullptr; int len = PmuGetClusterCore(clusterId, &coreList); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_EQ(len, -1); } @@ -94,12 +90,8 @@ TEST_F(TestMetric, GetNumaIdList) unsigned numaId = 2; unsigned* coreList = nullptr; int len = PmuGetNumaCore(numaId, &coreList); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(len, -1); - for (int i = 0; i < len; ++i) { - cout << coreList[i] << " "; - } - cout << endl; } TEST_F(TestMetric, CollectDDRBandwidth) @@ -108,7 +100,6 @@ TEST_F(TestMetric, CollectDDRBandwidth) devAttr[0].metric = PMU_DDR_READ_BW; devAttr[1].metric = PMU_DDR_WRITE_BW; int pd = PmuDeviceOpen(devAttr, 2); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -133,7 +124,6 @@ TEST_F(TestMetric, CollectL3Latency) PmuDeviceAttr devAttr = {}; devAttr.metric = PMU_L3_LAT; int pd = PmuDeviceOpen(&devAttr, 1); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -212,7 +202,6 @@ TEST_F(TestMetric, CollectL3LatencyAndL3Miss) devAttr[1].metric = PMU_L3_MISS; int pd = PmuDeviceOpen(devAttr, 2); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -248,7 +237,6 @@ TEST_F(TestMetric, GetMetricPcieBandwidth) } int pd = PmuDeviceOpen(devAttr, bdfLen); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -276,7 +264,6 @@ TEST_F(TestMetric, GetMetricSmmuTransaction) const char** bdfList = nullptr; unsigned bdfLen = 0; bdfList = PmuDeviceBdfList(PMU_BDF_TYPE_SMMU, &bdfLen); - cout << Perror() << endl; ASSERT_NE(bdfList, nullptr); PmuDeviceAttr devAttr[bdfLen] = {}; for (int i = 0; i < bdfLen; ++i) { @@ -285,7 +272,6 @@ TEST_F(TestMetric, GetMetricSmmuTransaction) } int pd = PmuDeviceOpen(devAttr, bdfLen); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -309,4 +295,29 @@ TEST_F(TestMetric, GetMetricSmmuTransaction) DevDataFree(devData); PmuDataFree(oriData); PmuClose(pd); +} + +TEST_F(TestMetric, GetMetricHHACross) +{ + PmuDeviceAttr devAttr[2] = {}; + devAttr[0].metric = PMU_HHA_CROSS_NUMA; + devAttr[1].metric = PMU_HHA_CROSS_SOCKET; + int pd = PmuDeviceOpen(devAttr, 2); + ASSERT_NE(pd, -1); + PmuEnable(pd); + sleep(1); + PmuDisable(pd); + PmuData* oriData = nullptr; + int oriLen = PmuRead(pd, &oriData); + ASSERT_NE(oriLen, -1); + + PmuDeviceData *devData = nullptr; + auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); + ASSERT_EQ(devData[0].metric, PMU_HHA_CROSS_NUMA); + ASSERT_EQ(devData[0].mode, PMU_METRIC_NUMA); + ASSERT_EQ(devData[len - 1].metric, PMU_HHA_CROSS_SOCKET); + ASSERT_EQ(devData[len - 1].mode, PMU_METRIC_NUMA); + DevDataFree(devData); + PmuDataFree(oriData); + PmuClose(pd); } \ No newline at end of file diff --git a/test/test_perf/test_trace_analysis.cpp b/test/test_perf/test_trace_analysis.cpp index 7062d6977b9bf80955e034e44ef798e2a34f93b6..5abec52e319946bab28fd11acca4303f631545b6 100644 --- a/test/test_perf/test_trace_analysis.cpp +++ b/test/test_perf/test_trace_analysis.cpp @@ -93,11 +93,6 @@ TEST_F(TestAnaylzeData, collect_single_trace_data_success) { EnableTracePointer(pd, 1); int len = PmuTraceRead(pd, &data); EXPECT_TRUE(data != nullptr); - for (int i = 0; i < len; i++) { - cout << "funcName: " << data[i].funcs << " startTs: " << data[i].startTs << " elapsedTime: " << data[i].elapsedTime - << " pid: " << data[i].pid << " tid: " << data[i].tid << " cpu: " << data[i].cpu - << " comm: " << data[i].comm << endl; - } } /** @@ -118,7 +113,7 @@ TEST_F(TestAnaylzeData, collect_sleep_trace_data_success) { ASSERT_NE(pd, -1); EnableTracePointer(pd, 1); int len = PmuTraceRead(pd, &data); - EXPECT_TRUE(data != nullptr); + ASSERT_TRUE(data != nullptr); ASSERT_LT(data[0].elapsedTime, 0.1); } diff --git a/test/test_perf/test_trace_pointer.cpp b/test/test_perf/test_trace_pointer.cpp index 6bdb60d834346a14079869be878e4a695d3d652e..430f3945e78cde205bff09ffa3fcfc8a54e32a7e 100644 --- a/test/test_perf/test_trace_pointer.cpp +++ b/test/test_perf/test_trace_pointer.cpp @@ -142,8 +142,6 @@ TEST_F(TestTraceRaw, trace_pointer_net_napi) { bool l4_hash; rt = PmuGetField(rawData, "l4_hash", &l4_hash, sizeof(l4_hash)); ASSERT_EQ(rt, SUCCESS); - printf("name=%s napi_id=%d queue_mapping=%hd ip_summed=%02X l4_hash=%d ", name, napi_id, queue_mapping, - ip_summed, l4_hash); } } @@ -163,7 +161,6 @@ TEST_F(TestTraceRaw, trace_pointer_skb_copy_datagram_iovec) { unsigned int len; rt = PmuGetField(rawData, "len", &len, sizeof(len)); ASSERT_EQ(rt, SUCCESS); - printf("skbaddr=%p len=%d", skbaddr, len); } } diff --git a/util/common.cpp b/util/common.cpp index 67c4a6669b0776c652ce1e8e422c0a5994991e79..c157a5aafdb2e09d47df6bb77a045cd4927fcdc2 100644 --- a/util/common.cpp +++ b/util/common.cpp @@ -28,12 +28,6 @@ #include "pcerr.h" #include "common.h" -bool IsValidIp(unsigned long ip) { - return (ip != PERF_CONTEXT_HV && ip != PERF_CONTEXT_KERNEL && ip != PERF_CONTEXT_USER - && ip != PERF_CONTEXT_GUEST && ip != PERF_CONTEXT_GUEST_KERNEL - && ip != PERF_CONTEXT_GUEST_USER && ip != PERF_CONTEXT_MAX); -} - std::string GetRealPath(const std::string filePath) { char resolvedPath[PATH_MAX]; diff --git a/util/common.h b/util/common.h index 77b8a9b6c33052f98b59d040ebbf9f10d7a4844b..caa52a3fbc3db4c4a788280d780545aa64a75b58 100644 --- a/util/common.h +++ b/util/common.h @@ -31,7 +31,12 @@ const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/"; const std::string TRACE_DEBUG_EVENT_PATH = "/sys/kernel/debug/tracing/events/"; -bool IsValidIp(unsigned long ip); +inline bool IsValidIp(unsigned long ip) +{ + return (ip != PERF_CONTEXT_HV && ip != PERF_CONTEXT_KERNEL && ip != PERF_CONTEXT_USER + && ip != PERF_CONTEXT_GUEST && ip != PERF_CONTEXT_GUEST_KERNEL + && ip != PERF_CONTEXT_GUEST_USER && ip != PERF_CONTEXT_MAX); +} std::string GetRealPath(const std::string filePath); bool IsValidPath(const std::string& filePath); bool IsDirectory(const std::string& path); diff --git a/util/process_map.cpp b/util/process_map.cpp index 89938b532dd33e2494eaed26db6d5cd62eb6bd96..0745b51112fa00970095904f25d35fc5c8bf95fa 100644 --- a/util/process_map.cpp +++ b/util/process_map.cpp @@ -24,58 +24,7 @@ #include "process_map.h" using namespace std; -constexpr int COMM_SIZE = 128; constexpr int PATH_LEN = 1024; -unsigned int GetNumPid() -{ - DIR *directory = opendir("/proc"); - struct dirent *entry; - unsigned int count = 0; - if (directory == nullptr) { - perror("Error opening /proc directory"); - return -1; - } - - // Count the number of process directories (pidList) - while ((entry = readdir(directory))) { - // Check if the entry is a directory and represents a process ID - if (entry->d_type == DT_DIR && atoi(entry->d_name) != 0) { - count++; - } - } - closedir(directory); - return count; -} - -int *GetAllPids(unsigned int *count) -{ - DIR *directory; - struct dirent *entry; - int *pidList = nullptr; - directory = opendir("/proc"); - - *count = GetNumPid(); - - // Allocate memory for storing pidList - if ((*count) < SIZE_MAX / sizeof(int)) { - pidList = static_cast(malloc((*count) * sizeof(int))); - } - if (pidList == nullptr) { - perror("Memory allocation error"); - closedir(directory); - return nullptr; - } - - int index = 0; - while ((entry = readdir(directory))) { - if (entry->d_type == DT_DIR && atoi(entry->d_name) != 0) { - pidList[index++] = atoi(entry->d_name); - } - } - - closedir(directory); - return pidList; -} void FreeProcTopo(struct ProcTopology *procTopo) { diff --git a/util/process_map.h b/util/process_map.h index 15f0b09d1c548208d67bd7f301c9fa217c715558..e401fbe03fd7b81d855e74c13beace49975b071c 100644 --- a/util/process_map.h +++ b/util/process_map.h @@ -22,8 +22,6 @@ extern "C" { struct ProcTopology* GetProcTopology(pid_t pid); void FreeProcTopo(struct ProcTopology *procTopo); -int* GetAllPids(int* count); -unsigned int GetNumPid(); int* GetChildTid(int pid, int* numChild); #ifdef __cplusplus }