diff --git a/README.en.md b/README.en.md index 234f2cc0f154fa11903ef296e3c6f9f15bae4855..8ea06f35b15d7d0916b29ca5fef882fb93f3bcaa 100644 --- a/README.en.md +++ b/README.en.md @@ -127,82 +127,106 @@ Here are some examples: * Get pmu count for a process ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// Initialize event list and pid list in PmuAttr. -// There is one event in list, named 'cycles'. -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// Call PmuOpen and pmu descriptor is return. -// is an identity for current task. -int pd = PmuOpen(COUNTING, &attr); -// Start collection. -PmuEnable(pd); -// Collect for one second. -sleep(1); -// Stop collection. -PmuDisable(pd); -PmuData *data = NULL; -// Read pmu data. You can also read data before PmuDisable. -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // Initialize event list and pid list in PmuAttr. + // There is one event in list, named 'cycles'. + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // Call PmuOpen and pmu descriptor is return. + // is an identity for current task. + int pd = PmuOpen(COUNTING, &attr); + // Start collection. + PmuEnable(pd); + // Collect for one second. + sleep(1); + // Stop collection. + PmuDisable(pd); + PmuData *data = NULL; + // Read pmu data. You can also read data before PmuDisable. + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + PmuData *d = &data[i]; + std::cout << "evt=" << d->evt << "count=" << d->count << std::endl; + } + // To free PmuData, call PmuDataFree. + PmuDataFree(data); + // Like fd, call PmuClose if pd will not be used. + PmuClose(pd); } -// To free PmuData, call PmuDataFree. -PmuDataFree(data); -// Like fd, call PmuClose if pd will not be used. -PmuClose(pd); + ``` * Sample a process ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// Initialize event list and pid list in PmuAttr. -// There is one event in list, named 'cycles'. -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// Call PmuOpen and pmu descriptor is return. -// is an identity for current task. -// Use SAMPLING for sample task. -int pd = PmuOpen(SAMPLING, &attr); -// Start collection. -PmuEnable(pd); -// Collect for one second. -sleep(1); -// Stop collection. -PmuDisable(pd); -PmuData *data = NULL; -// Read pmu data. You can also read data before PmuDisable. -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - // Get an element from array. - PmuData *d = &data[i]; - // Get stack object which is a linked list. - Stack *stack = d->stack; - while (stack) { - // Get symbol object. - if (stack->symbol) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // Initialize event list and pid list in PmuAttr. + // There is one event in list, named 'cycles'. + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // Call PmuOpen and pmu descriptor is return. + // is an identity for current task. + // Use SAMPLING for sample task. + int pd = PmuOpen(SAMPLING, &attr); + // Start collection. + PmuEnable(pd); + // Collect for one second. + sleep(1); + // Stop collection. + PmuDisable(pd); + PmuData *data = NULL; + // Read pmu data. You can also read data before PmuDisable. + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + // Get an element from array. + PmuData *d = &data[i]; + // Get stack object which is a linked list. + Stack *stack = d->stack; + while (stack) { + // Get symbol object. + if (stack->symbol) { + Symbol *data = stack->symbol; + std::cout << std::hex << data->addr << " " << data->symbolName << "+0x" << data->offset << " " + << data->codeMapAddr << " (" << data->module << ")" + << " (" << std::dec << data->fileName << ":" << data->lineNum << ")" << std::endl; + } + stack = stack->next; } - stack = stack->next; } + // To free PmuData, call PmuDataFree. + PmuDataFree(data); + // Like fd, call PmuClose if pd will not be used. + PmuClose(pd); } -// To free PmuData, call PmuDataFree. -PmuDataFree(data); -// Like fd, call PmuClose if pd will not be used. -PmuClose(pd); + ``` * Python examples diff --git a/README.md b/README.md index 555c062d73d10d84407df11ba1493d38f3a03b23..5b4bfea1f44ce251deb7552d1b28773a3086eada 100644 --- a/README.md +++ b/README.md @@ -107,77 +107,101 @@ Go API文档可以参考GO_API.md: - 获取进程的pmu计数 ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// 初始化事件列表,指定需要计数的事件cycles。 -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// 调用PmuOpen,返回pd。pd表示该任务的id。 -int pd = PmuOpen(COUNTING, &attr); -// 开始采集。 -PmuEnable(pd); -// 采集1秒。 -sleep(1); -// 停止采集。 -PmuDisable(pd); -PmuData *data = NULL; -// 读取PmuData,它是一个数组,长度是len。 -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - PmuData *d = &data[i]; - ... +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pidList[1]; + pidList[0] = getpid(); + char *evtList[1]; + evtList[0] = "cycles"; + // 初始化事件列表,指定需要计数的事件cycles。 + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // 调用PmuOpen,返回pd。pd表示该任务的id。 + int pd = PmuOpen(COUNTING, &attr); + // 开始采集。 + PmuEnable(pd); + // 采集1秒。 + sleep(1); + // 停止采集。 + PmuDisable(pd); + PmuData *data = NULL; + // 读取PmuData,它是一个数组,长度是len。 + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + PmuData *d = &data[i]; + std::cout << "evt=" << d->evt << "count=" << d->count << std::endl; + } + // 释放PmuData。 + PmuDataFree(data); + // 类似fd,当任务结束时调用PmuClose释放资源。 + PmuClose(pd); } -// 释放PmuData。 -PmuDataFree(data); -// 类似fd,当任务结束时调用PmuClose释放资源。 -PmuClose(pd); ``` - 对进程进行采样 ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// 初始化事件列表,指定需要计数的事件cycles。 -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// 调用PmuOpen,返回pd。pd表示该任务的id。 -int pd = PmuOpen(SAMPLING, &attr); -// 开始采集。 -PmuEnable(pd); -// 采集1秒。 -sleep(1); -// 停止采集。 -PmuDisable(pd); -PmuData *data = NULL; -// 读取PmuData,它是一个数组,长度是len。 -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - // 获取数组的一个元素。 - PmuData *d = &data[i]; - // 获取调用栈对象,它是一个链表。 - Stack *stack = d->stack; - while (stack) { - // 获取符号对象。 - if (stack->symbol) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // 初始化事件列表,指定需要计数的事件cycles。 + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + attr.symbolMode = RESOLVE_ELF_DWARF; + attr.callStack = 1; + attr.freq = 200; + attr.useFreq = 1; + // 调用PmuOpen,返回pd。pd表示该任务的id。 + int pd = PmuOpen(SAMPLING, &attr); + // 开始采集。 + PmuEnable(pd); + // 采集1秒。 + sleep(1); + // 停止采集。 + PmuDisable(pd); + PmuData *data = NULL; + // 读取PmuData,它是一个数组,长度是len。 + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + // 获取数组的一个元素。 + PmuData *d = &data[i]; + // 获取调用栈对象,它是一个链表。 + Stack *stack = d->stack; + while (stack) { + // 获取符号对象。 + if (stack->symbol) { + Symbol *data = stack->symbol; + std::cout << std::hex << data->addr << " " << data->symbolName << "+0x" << data->offset << " " + << data->codeMapAddr << " (" << data->module << ")" + << " (" << std::dec << data->fileName << ":" << data->lineNum << ")" << std::endl; + + } + stack = stack->next; } - stack = stack->next; } + // 释放PmuData。 + PmuDataFree(data); + // 类似fd,当任务结束时调用PmuClose释放资源。 + PmuClose(pd); } -// 释放PmuData。 -PmuDataFree(data); -// 类似fd,当任务结束时调用PmuClose释放资源。 -PmuClose(pd); ``` - Python 例子 diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index fe89cc6beb4873e2d7c416197ec9bed3c17951e1..4ee4eee96189303faef8d0b51588efca4c5a2993 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -25,6 +25,7 @@ int pd = PmuOpen(COUNTING, &attr); # python代码示例 import time import kperf + evtList = ["cycles", "branch-misses"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) @@ -134,15 +135,38 @@ perf record -e cycles,branch-misses 设置PmuAttr的方式和Counting一样,在调用PmuOpen的时候,把任务类型设置为SAMPLING,并且设置采样频率: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +PmuAttr attr = {0}; +char* evtList[1] = {"cycles"}; attr.freq = 1000; // 采样频率是1000HZ attr.useFreq = 1; +attr.evtList = evtList; +attr.numEvt = 1; int pd = PmuOpen(SAMPLING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("cpu=%d pid=%d tid=%d period=%ld\n", data[i].cpu, data[i].pid, data[i].tid, data[i].period); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf +import ksym import time + evtList = ["branch-misses", "cycles"] pmu_attr = kperf.PmuAttr( evtList=evtList, @@ -150,6 +174,16 @@ pmu_attr = kperf.PmuAttr( symbolMode=kperf.SymbolMode.RESOLVE_ELF ) pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) +if pd == -1: + print(f"kperf pmuopen sample failed, expect err is nil, but is {kperf.error()}\n") +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) + +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"cpu {item.cpu} pid {item.pid} tid {item.tid} period {item.period}") +kperf.close(pd) ``` ```go @@ -165,6 +199,18 @@ func main() { fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v\n", err) return } + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + return + } + for _, o := range dataVo.GoData { + fmt.Printf("cpu=%d pid=%d tid=%d period=%v\n", o.Cpu, o.Pid, o.Tid, o.Period) + } + kperf.PmuClose(pd) } ``` @@ -197,23 +243,55 @@ perf record -e arm_spe_0/load_filter=1/ 对于libkperf,可以这样设置PmuAttr: ```c++ // c++代码示例 +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + PmuAttr attr = {0}; attr.period = 8192; // 采样周期是8192 attr.dataFilter = LOAD_FILTER; // 设置filter属性为load_filter + +int pd = PmuOpen(SPE_SAMPLING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + auto o = data[i]; + printf("spe base info comm=%s, pid=%d, tid=%d, coreId=%d, numaId=%d, sockedId=%d\n", o.comm, o.pid, o.tid, o.cpuTopo->coreId, o.cpuTopo->numaId, o.cpuTopo->socketId); + printf("spe ext info pa=%lu, va=%lu, event=%lu, latency=%lu\n", o.ext->pa, o.ext->va, o.ext->event, o.ext->lat); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf +import ksym +import time + pmu_attr = kperf.PmuAttr( - sampleRate = 1000, - symbolMode = kperf.SymbolMode.RESOLVE_ELF, - dataFilter = kperf.SpeFilter.SPE_DATA_ALL, - evFilter = kperf.SpeEventFilter.SPE_EVENT_RETIRED, - minLatency = 0x40 + sampleRate = 8192, + dataFilter = kperf.SpeFilter.LOAD_FILTER, ) # 需要root权限才能运行 pd = kperf.open(kperf.PmuTaskType.SPE_SAMPLING, pmu_attr) + +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) + +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"spe base info comm={item.comm}, pid={item.pid}, tid={item.tid}, coreId={item.cpuTopo.coreId}, numaId={item.cpuTopo.numaId}, sockedId={item.cpuTopo.socketId}") + print(f"spe ext info pa={item.ext.pa}, va={item.ext.va}, event={item.ext.event}, latency={item.ext.lat}\n") +kperf.close(pd) ``` ```go @@ -222,12 +300,28 @@ import "libkperf/kperf" import "time" func main() { - attr := kperf.PmuAttr{MinLatency:0x40, SymbolMode: kperf.ELF, SampleRate: 1000, DataFilter: kperf.SPE_DATA_ALL, EvFilter: kperf.SPE_EVENT_RETIRED} + attr := kperf.PmuAttr{SampleRate:8192, DataFilter: kperf.LOAD_FILTER} pd, err := kperf.PmuOpen(kperf.SPE, attr) if err != nil { fmt.Printf("kperf pmuopen spe failed, expect err is nil, but is %v\n", err) return } + + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + } + + for _, o := range dataVo.GoData { + fmt.Printf("spe base info comm=%v, pid=%v, tid=%v, coreId=%v, numaId=%v, sockedId=%v\n", o.Comm, o.Pid, o.Tid, o.CpuTopo.CoreId, o.CpuTopo.NumaId, o.CpuTopo.SocketId) + fmt.Printf("spe ext info pa=%v, va=%v, event=%v, latency=%v\n", o.SpeExt.Pa, o.SpeExt.Va, o.SpeExt.Event, o.SpeExt.Lat) + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(pd) } ``` @@ -311,13 +405,35 @@ PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = 1; int pd = PmuOpen(COUNTING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("evt=%s, count=%d\n", data[i].evt, data[i].count); +} +PmuClose(pd); ``` + ```python # python代码示例 import kperf +import time + evtList = ["hisi_sccl1_ddrc0/flux_rd/"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"evt={item.evt} count={item.count}") +kperf.close(pd) ``` ```go @@ -334,6 +450,18 @@ func main() { fmt.Printf("kperf pmuopen counting failed, expect err is nil, but is %v\n", err) return } + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + return + } + for _, o := range dataVo.GoData { + fmt.Printf("evt=%v count=%v \n", o.Evt, o.Count) + } + kperf.PmuClose(pd) } ``` @@ -350,7 +478,7 @@ evtList[0] = "hisi_sccl1_ddrc/flux_rd/"; evtList = ["hisi_sccl1_ddrc/flux_rd/"] ``` -```go +```goa // go代码示例 evtList := []string{"hisi_sccl1_ddrc/flux_rd/"} ``` @@ -380,16 +508,27 @@ libkperf支持tracepoint的采集,支持的tracepoint事件可以通过perf li 可以这样设置PmuAttr: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + char *evtList[1]; evtList[0] = "sched:sched_switch"; PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = 1; +attr.period = 1000; int pd = PmuOpen(SAMPLING, &attr); ``` ```python # python代码示例 +import kperf +import ksym +import time +from ctypes import * + evtList = ["sched:sched_switch"] pmu_attr = kperf.PmuAttr( evtList=evtList, @@ -404,7 +543,6 @@ pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) import "libkperf/kperf" import "fmt" - func main() { evtList := []string{"sched:sched_switch"} attr := kperf.PmuAttr{EvtList:evtList, SymbolMode:kperf.ELF, SampleRate: 1000} @@ -424,10 +562,24 @@ tracepoint能够获取每个事件特有的数据,比如sched:sched_switch包 libkperf提供了接口PmuGetField来获取tracepoint的数据。比如对于sched:sched_switch,可以这样调用: ```c++ // c++代码示例 -int prev_pid; -PmuGetField(pmuData->rawData, "prev_pid", &prev_pid, sizeof(prev_pid)); -char next_comm[16]; -PmuGetField(pmuData->rawData, "next_comm", &next_comm, sizeof(next_comm)); +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + auto pmuData = &data[i]; + int prev_pid; + PmuGetField(pmuData->rawData, "prev_pid", &prev_pid, sizeof(prev_pid)); + char next_comm[16]; + PmuGetField(pmuData->rawData, "next_comm", &next_comm, sizeof(next_comm)); + printf("next_comm=%s;prev_pid=%d\n", next_comm, prev_pid); +} ``` ```python @@ -480,10 +632,10 @@ func main() { var cArray [15]C.char nextErr := v.GetField("next_comm", unsafe.Pointer(&cArray)) if nextErr != nil { - fmt.Printf("get next_comm failed err is%v ",nextErr) + fmt.Printf("get next_comm failed err is%v\n",nextErr) } else { ptr := (*C.char)(unsafe.Pointer(&cArray[0])) - fmt.Printf("next_comm=%v;", C.GoString(ptr)) + fmt.Printf("next_comm=%v\n", C.GoString(ptr)) } prevPid := C.int(0) @@ -509,6 +661,11 @@ perf stat -e "{cycles,branch-loads,branch-load-misses,iTLB-loads}",inst_retired 比如,可以这样调用: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + unsigned numEvt = 5; char *evtList[numEvt] = {"cycles","branch-loads","branch-load-misses","iTLB-loads","inst_retired"}; // 前四个事件是一个分组 @@ -517,12 +674,27 @@ PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = numEvt; attr.evtAttr = groupId; + +int pd = PmuOpen(COUNTING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("evt=%s, count=%d evt=%d\n", data[i].evt, data[i].count, data[i].evt); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf import time + evtList = ["cycles","branch-loads","branch-load-misses","iTLB-loads","inst_retired"] # 前四个事件是一个分组 evtAttrList = [1,1,1,1,-1] @@ -535,6 +707,7 @@ pmu_data = kperf.read(pd) pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) for data in pmu_data.iter: print(f"cpu {data.cpu} count {data.count} evt {data.evt}") +kperf.close(pd) ``` ```go @@ -605,6 +778,10 @@ pmu_attr = kperf.PmuAttr(evtList=evtList, includeNewFork=True) 参考代码: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + PmuDeviceAttr devAttr[2]; // DDR读带宽 devAttr[0].metric = PMU_DDR_READ_BW; @@ -627,10 +804,10 @@ for (int i = 0; i < len / 2; ++i) { // channelID表示数据对应的通道ID。 // count是距离上次采集的DDR总读/写包长,单位是Byte, // 需要除以时间间隔得到带宽(这里的时间间隔是1秒)。 - cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; + std::cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } for (int i = len / 2; i < len; ++i) { - cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; + std::cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } DevDataFree(devData); PmuDataFree(oriData); @@ -639,6 +816,9 @@ PmuDisable(pd); ```python # python代码示例 +import kperf +import time + dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_READ_BW), kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW) @@ -658,6 +838,10 @@ for data in dev_data.iter: ```go // go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_DDR_READ_BW}, kperf.PmuDeviceAttr{Metric: kperf.PMU_DDR_WRITE_BW}} fd, _ := kperf.PmuDeviceOpen(deviceAttrs) kperf.PmuEnable(fd) @@ -705,6 +889,10 @@ libkperf提供了采集L3 cache平均时延的能力,用于分析访存型应 参考代码: ```c++ +#include +#include "symbol.h" +#include "pmu.h" + // c++代码示例 PmuDeviceAttr devAttr[1]; // L3平均时延 @@ -721,7 +909,7 @@ auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 1, &devData); // devData的长度等于cluster个数 for (int i=0;i +#include "symbol.h" +#include "pmu.h" + PmuDeviceAttr devAttr[1]; // 采集PCIE设备RX的读带宽 devAttr[0].metric = PMU_PCIE_RX_MRD_BW; @@ -802,6 +1001,9 @@ PmuDisable(pd); ```python # python代码示例 +import kperf +import time + dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_PCIE_RX_MRD_BW, bdf="16:04.0") ] @@ -817,6 +1019,10 @@ for data in dev_data.iter: ```go // go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_PCIE_RX_MRD_BW, Bdf: "16:04.0"}} fd, _ := kperf.PmuDeviceOpen(deviceAttrs) kperf.PmuEnable(fd) @@ -848,6 +1054,10 @@ perf trace -e read,write 比如,可以这样调用: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + unsigned numFunc = 2; const char *funs1 = "read"; const char *funs2 = "write"; @@ -862,7 +1072,7 @@ PmuTraceDisable(pd); PmuTraceData *data = nullptr; int len = PmuTraceRead(pd, &data); for(int i = 0; i < len; ++i) { - printf("funcName: %s, elspsedTime: %f ms pid: %d tid: %d cpu: %d comm: %s", data[i].funcs, data[i].elapsedTime, data[i].pid, data[i].tid, data[i].cpu, data[i].comm) + printf("funcName: %s, elapsedTime: %f ms pid: %d tid: %d cpu: %d comm: %s", data[i].funcs, data[i].elapsedTime, data[i].pid, data[i].tid, data[i].cpu, data[i].comm); } PmuTraceClose(pd); ``` @@ -871,6 +1081,7 @@ PmuTraceClose(pd); # python代码示例 import kperf import time + funcList = ["read","write"] pmu_trace_attr = kperf.PmuTraceAttr(funcs=funcList) pd = kperf.trace_open(kperf.PmuTraceType.TRACE_SYS_CALL, pmu_trace_attr) @@ -930,6 +1141,10 @@ funcName: write elapsedTime: 0.00118 ms pid: 997235 tid: 997235 cpu: 110 comm: t ### 采集BRBE数据 libkperf基于sampling的能力,增加了对branch sample stack数据的采集能力,用于获取CPU的跳转记录, 通过branchSampleFilter可指定获取不同类型的分支跳转记录。 ```c++ +#include +#include "symbol.h" +#include "pmu.h" + char* evtList[1] = {"cycles"}; int* cpuList = nullptr; PmuAttr attr = {0}; @@ -980,6 +1195,7 @@ ffff88f60aa0->ffff88f60618 1 ```python import time +import ksym import kperf evtList = ["cycles"] diff --git a/docs/Go_API.md b/docs/Go_API.md index fd0a34685a6e670092dc331524ecd3c962400f7e..30a5c3f103ab3cc7a8f1d0574a0f1d8d8a591ec3 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -300,7 +300,7 @@ import "fmt" func main() { syscallList := kperf.PmuSysCallFuncList() if syscallList == nil { - fmt.Printf("sys call list is empty") + fmt.Printf("sys call list is empty\n") } else { for _, funcName := range syscallList { fmt.Printf("func name %v\n", funcName) @@ -478,12 +478,12 @@ import "fmt" func main() { err := kperf.PmuOpenCpuFreqSampling(100) if err != nil { - fmt.Printf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v", err) + fmt.Printf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v\n", err) } freqList := kperf.PmuReadCpuFreqDetail() for _, v := range freqList { - fmt.Printf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) + fmt.Printf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d\n", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) } kperf.PmuCloseCpuFreqSampling() @@ -501,7 +501,7 @@ func main() { attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) if err != nil { - fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v", err) + fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v\n", err) return } @@ -511,24 +511,24 @@ func main() { dataVo, err := kperf.PmuRead(fd) if err != nil { - fmt.Printf("kperf pmuread failed, expect err is nil, but is %v", err) + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) return } for _, o := range dataVo.GoData { if len(o.Symbols) != 0 { - fmt.Printf("expect symbol data is empty, but is not") + fmt.Printf("expect symbol data is empty, but is not\n") } } parseErr := kperf.ResolvePmuDataSymbol(dataVo) if parseErr != nil { - fmt.Printf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v", parseErr) + fmt.Printf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v\n", parseErr) } for _, o := range dataVo.GoData { if len(o.Symbols) == 0 { - fmt.Printf("expect symbol data is not empty, but is empty") + fmt.Printf("expect symbol data is not empty, but is empty\n") } } kperf.PmuDataFree(dataVo) diff --git a/docs/Python_API.md b/docs/Python_API.md index a0a196876d05dcb50d7d867829db8ffd902293ff..2ec007acbf1223e3a9f1f84d002e17fd1f391180 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -82,8 +82,10 @@ kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) ```python # python代码示例 -import time import kperf +import ksym +import time + evtList = ["cycles", "branch-misses"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) @@ -196,6 +198,7 @@ get_field(pmu_data: ImplPmuData, field_name: str, value: c_void_p) ```python import kperf +import ksym import time from ctypes import * @@ -272,8 +275,9 @@ kperf.trace_open(trace_type: kperf.PmuTraceType, pmu_trace_attr: kperf.PmuTraceA ```python # python代码示例 -import time import kperf +import time + funcs = ["read", "write"] pmu_trace_attr = kperf.PmuTraceAttr(funcs=funcs) pd = kperf.trace_open(kperf.PmuTraceType.TRACE_SYS_CALL, pmu_trace_attr) @@ -346,6 +350,8 @@ kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指 ```python # python代码示例 +import kperf +import time dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_L3_TRAFFIC) ] @@ -443,6 +449,9 @@ def close_cpu_freq_sampling() 关闭cpu频率采集 def read_cpu_freq_detail() -> CpuFreqDetail 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 ```python #python代码示例 +import kperf +import time + err = kperf.open_cpu_freq_sampling(100) if err != 0: print(f"error number: {kperf.errorno()} error message: {kperf.error()}") @@ -459,6 +468,9 @@ kperf.close_cpu_freq_sampling() def resolvePmuDataSymbol(pmuData: PmuData) -> int: 当SymbolMode不设置或者设置为0时,可通过该接口解析read返回的PmuData数据中的符号 ```python #python代码示例 +import kperf +import time + event_name = "cycles" pmu_attr = kperf.PmuAttr( evtList=[event_name],