diff --git a/README.en.md b/README.en.md index cae1f6105dcab06479858af8b7f29ee3b3f1b10b..234f2cc0f154fa11903ef296e3c6f9f15bae4855 100644 --- a/README.en.md +++ b/README.en.md @@ -124,7 +124,7 @@ All pmu functions are accomplished by the following interfaces: Here are some examples: -* Get pmu count for a process. +* Get pmu count for a process ```C++ int pidList[1]; @@ -205,8 +205,7 @@ PmuDataFree(data); PmuClose(pd); ``` -Python examples: - +* Python examples ```python import time from collections import defaultdict @@ -237,8 +236,7 @@ def Counting(): kperf.close(pd) ``` -Go example - +* Go example ```go import "libkperf/kperf" import "fmt" @@ -290,7 +288,7 @@ python example.py ``` * **For Go example Code:** - You can directly go to the go/src/libkperf/libkperf_test directory. + You can directly go to the go/src/libkperf_test directory. ```shell go test -v # run all diff --git a/README.md b/README.md index e0441bc9b69e92e399910b95e741ed4adf83bcec..555c062d73d10d84407df11ba1493d38f3a03b23 100644 --- a/README.md +++ b/README.md @@ -105,8 +105,7 @@ Go API文档可以参考GO_API.md: 以下是一些示例: -- 获取进程的pmu计数。 - +- 获取进程的pmu计数 ```C++ int pidList[1]; pidList[0] = pid; @@ -181,7 +180,7 @@ PmuDataFree(data); PmuClose(pd); ``` -Python 例子: +- Python 例子 ```python import time from collections import defaultdict @@ -213,7 +212,8 @@ def Counting(): kperf.close(pd) ``` -Go 例子 + +- Go 例子 ```go import "libkperf/kperf" import "fmt" @@ -264,7 +264,7 @@ python example.py ``` * **针对Go示例代码:** -可以直接跳转到 go/src/libkperf/libkperf_test目录下 +可以直接跳转到 go/src/libkperf_test目录下 ```shell go test -v # 全部运行 go test -v -test.run TestCount #指定运行的用例 diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index d0d2f13898a5c9d7abdd41615f10a839596d8ae7..f8efa2988121cf82e7008cd1b249787e7ebf8a77 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -297,7 +297,7 @@ c --> d(......) Symbol的字段信息受PmuAttr影响: - PmuAttr.callStack会决定Stack是完整的调用栈,还是只有一层调用栈(即Stack链表只有一个元素)。 - PmuAttr.symbolMode如果等于NO_SYMBOL_RESOLVE,那么PmuData的stack是空指针。 -- PmuAttr.symbolMode如果等于RESOLVE_ELF,那么Symbol的fileName和lineNum没有数据,都等于0,因为没有解析dwarf信息。 +- PmuAttr.symbolMode如果等于RESOLVE_ELF,那么Symbol的fileName和lineNum没有数据,都等于0,因为没有解析dwarf信息(注:kernel的fileName为'[kernel]')。 - PmuAttr.symbolMode如果等于RESOLVE_ELF_DWARF,那么Symbol的所有信息都有效。 ### 采集uncore事件 @@ -600,7 +600,7 @@ pmu_attr = kperf.PmuAttr(evtList=evtList, includeNewFork=True) 注意,该功能是针对Counting模式,因为Sampling和SPE Sampling本身就会采集子线程的数据。 ### 采集DDRC带宽 -鲲鹏上提供了DDRC的pmu设备,用于采集DDR的性能数据,比如带宽等。libkperf提供了API,用于获取每个numa的DDR带宽数据。 +鲲鹏上提供了DDRC的pmu设备,用于采集DDR的性能数据,比如带宽等。libkperf提供了API,用于获取每个channel的DDR带宽数据。 参考代码: ```c++ @@ -620,15 +620,17 @@ PmuData *oriData = nullptr; int oriLen = PmuRead(pd, &oriData); PmuDeviceData *devData = nullptr; auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); -// 对于4个numa的服务器,devData的长度为8.前4个是读带宽,后4个是写带宽。 -for (int i=0;i<4;++i) { - // numaId表示数据对应的numa节点。 +// devData的长度为2 * n (总通道数)。前n个是读带宽,后n个是写带宽。 +for (int i = 0; i < len / 2; ++i) { + // socketId表示数据对应的socket节点。 + // ddrNumaId表示数据对应的numa节点。 + // channelID表示数据对应的通道ID。 // count是距离上次采集的DDR总读/写包长,单位是Byte, // 需要除以时间间隔得到带宽(这里的时间间隔是1秒)。 - cout << "read bandwidth(" << devData[i].numaId << "): " << devData[i].count/1024/1024 << "M/s\n"; + cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } -for (int i=4;i<8;++i) { - cout << "write bandwidth(" << devData[i].numaId << "): " << devData[i].count/1024/1024 << "M/s\n"; +for (int i = len / 2; i < len; ++i) { + cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } DevDataFree(devData); PmuDataFree(oriData); @@ -649,9 +651,9 @@ ori_data = kperf.read(pd) dev_data = kperf.get_device_metric(ori_data, dev_attr) for data in dev_data.iter: if data.metric == kperf.PmuDeviceMetric.PMU_DDR_READ_BW: - print(f"read bandwidth({data.numaId}): {data.count/1024/1024} M/s") + print(f"read bandwidth(Socket: {data.socketId} Numa: {data.ddrNumaId} Channel: {data.channelId}): {data.count/1024/1024} M/s") if data.metric == kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW: - print(f"write bandwidth({data.numaId}): {data.count/1024/1024} M/s") + print(f"write bandwidth(Socket: {data.socketId} Numa: {data.ddrNumaId} Channel: {data.channelId}): {data.count/1024/1024} M/s") ``` ```go @@ -665,10 +667,10 @@ dataVo, _ := kperf.PmuRead(fd) deivceDataVo, _ := kperf.PmuGetDevMetric(dataVo, deviceAttrs) for _, v := range deivceDataVo.GoDeviceData { if v.Metric == kperf.PMU_DDR_READ_BW { - fmt.Printf("read bandwidth(%v): %v M/s\n", v.NumaId, v.Count/1024/1024) + fmt.Printf("read bandwidth(Socket: %v Numa: %v Channel: %v): %v M/s\n", v.SocketId, v.DdrNumaId, v.ChannelId, v.Count/1024/1024) } if v.Metric == kperf.PMU_DDR_WRITE_BW { - fmt.Printf("write bandwidth(%v): %v M/s\n", v.NumaId, v.Count/1024/1024) + fmt.Printf("write bandwidth(Socket: %v Numa: %v Channel: %v): %v M/s\n", v.SocketId, v.DdrNumaId, v.ChannelId, v.Count/1024/1024) } } kperf.DevDataFree(deivceDataVo) @@ -678,14 +680,23 @@ kperf.PmuClose(fd) 执行上述代码,输出的结果类似如下: ``` -read bandwidth(0): 17.32 M/s -read bandwidth(1): 5.43 M/s -read bandwidth(2): 2.83 M/s -read bandwidth(3): 4.09 M/s -write bandwidth(0): 4.35 M/s -write bandwidth(1): 2.29 M/s -write bandwidth(2): 0.84 M/s -write bandwidth(3): 0.97 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 0): 6.08 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 1): 5.66 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 2): 6.23 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 3): 5.30 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 4): 4.21 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 5): 4.06 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 6): 3.99 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 7): 3.89 M/s +... +write bandwidth(Socket: 1 Numa: 2 Channel: 1): 1.49 M/s +write bandwidth(Socket: 1 Numa: 2 Channel: 2): 1.44 M/s +write bandwidth(Socket: 1 Numa: 2 Channel: 3): 1.39 M/s +write bandwidth(Socket: 1 Numa: 2 Channel: 4): 1.22 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 4): 1.44 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 5): 1.43 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 6): 1.40 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 7): 1.38 M/s ``` ### 采集L3 cache的时延 diff --git a/docs/Go_API.md b/docs/Go_API.md index bb533438b3fd80345d8fd3613df8ce0338e694a1..fd0a34685a6e670092dc331524ecd3c962400f7e 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -335,12 +335,12 @@ func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事 * type PmuDeviceAttr struct: * Metric: 指定需要采集的指标 - * PMU_DDR_READ_BW 采集每个numa的ddrc的读带宽,单位:Bytes - * PMU_DDR_WRITE_BW 采集每个numa的ddrc的写带宽,单位:Bytes + * PMU_DDR_READ_BW 采集每个channel的ddrc的读带宽,单位:Bytes + * PMU_DDR_WRITE_BW 采集每个channel的ddrc的写带宽,单位:Bytes * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count - * PMU_L3_LAT 采集每个numa的L3的总时延,单位:cycles + * PMU_L3_LAT 采集每个cluster的L3的总时延,单位:cycles * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/ns * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns @@ -370,14 +370,20 @@ func PmuGetDevMetric(dataVo PmuDataVo, deviceAttr []PmuDeviceAttr) (PmuDeviceDat * []PmuDeviceAttr: 指定需要聚合的指标参数 * typ PmuDeviceDataVo struct: * GoDeviceData []PmuDeviceData +* type DdrDataStructure struct { + ChannelId uint32 ddr数据的channel编号 + DdrNumaId uint32 ddr数据的numa编号 + SocketId uint32 ddr数据的socket编号 + } * type PmuDeviceData struct: * Metric C.enum_PmuDeviceMetric 采集的指标 * Count float64 指标的计数值 - * Mode C.enum_PmuMetricMode 指标的采集类型,按core、按numa还是按bdf号 + * Mode C.enum_PmuMetricMode 指标的采集类型,按core、按numa、按channel还是按bdf号 * CoreId uint32 数据的core编号 * NumaId uint32 数据的numa编号 * ClusterId uint32 簇ID * Bdf string 数据的bdf编号 + * DdrDataStructure ddr相关的统计数据 ### kperf.DevDataFree @@ -395,7 +401,7 @@ import "libkperf/kperf" import "fmt" func main() { - clusterId := uint(1) + clusterId := uint(1) coreList, err := kperf.PmuGetClusterCore(clusterId) if err != nil { fmt.Printf("kperf PmuGetClusterCore failed, expect err is nil, but is %v\n", err) @@ -419,7 +425,7 @@ import "libkperf/kperf" import "fmt" func main() { - nodeId := uint(0) + nodeId := uint(0) coreList, err := kperf.PmuGetNumaCore(nodeId) if err != nil { fmt.Printf("kperf PmuGetNumaCore failed, expect err is nil, but is %v\n", err) @@ -432,18 +438,19 @@ func main() { ``` -### kperf.PmuGetCpuFreq +### kperf.PmuGetCpuFreq + func PmuGetCpuFreq(core uint) (int64, error) 查询当前系统指定core的实时CPU频率 * core cpu coreId -* 返回值为int64, 时当前cpu core的实时频率,出现错误频率为-1,且error不为空 +* 返回值为int64, 为当前cpu core的实时频率,出现错误频率为-1,且error不为空 ```go import "libkperf/kperf" import "fmt" func main() { - coreId := uint(0) + coreId := uint(0) freq, err := kperf.PmuGetCpuFreq(coreId) if err != nil { fmt.Printf("kperf PmuGetCpuFreq failed, expect err is nil, but is %v\n", err) @@ -451,4 +458,80 @@ func main() { } fmt.Printf("coreId %v freq is %v\n", coreId, freq) } +``` + +### kperf.PmuOpenCpuFreqSampling + +func PmuOpenCpuFreqSampling(period uint) (error) 开启cpu频率采集 + +### kperf.PmuCloseCpuFreqSampling + +func PmuCloseCpuFreqSampling() 关闭cpu频率采集 + +### kperf.PmuReadCpuFreqDetail + +func PmuReadCpuFreqDetail() ([]PmuCpuFreqDetail) 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 +```go +import "libkperf/kperf" +import "fmt" + +func main() { + err := kperf.PmuOpenCpuFreqSampling(100) + if err != nil { + fmt.Printf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v", err) + } + + freqList := kperf.PmuReadCpuFreqDetail() + for _, v := range freqList { + fmt.Printf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) + } + + kperf.PmuCloseCpuFreqSampling() +} +``` + +### kperf.ResolvePmuDataSymbol + +func ResolvePmuDataSymbol(dataVo PmuDataVo) error 当SymbolMode不设置或者设置为0时,可通过该接口解析PmuRead返回的PmuData数据中的符号 +```go +import "libkperf/kperf" +import "fmt" + +func main() { + attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} + fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) + if err != nil { + fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v", err) + return + } + + kperf.PmuEnable(fd) + time.Sleep(time.Second) + kperf.PmuDisable(fd) + + dataVo, err := kperf.PmuRead(fd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v", err) + return + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) != 0 { + fmt.Printf("expect symbol data is empty, but is not") + } + } + + parseErr := kperf.ResolvePmuDataSymbol(dataVo) + if parseErr != nil { + fmt.Printf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v", parseErr) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) == 0 { + fmt.Printf("expect symbol data is not empty, but is empty") + } + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(fd) +} ``` \ No newline at end of file diff --git a/docs/Python_API.md b/docs/Python_API.md index 9ef8703cca4d15cb185349cef44485aad9dccae4..a0a196876d05dcb50d7d867829db8ffd902293ff 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -330,8 +330,8 @@ for func_name in kperf.sys_call_func_list(): kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指标的能力 * class PmuDeviceAttr: * metric: 指定需要采集的指标 - * PMU_DDR_READ_BW 采集每个numa的ddrc的读带宽,单位:Bytes - * PMU_DDR_WRITE_BW 采集每个numa的ddrc的写带宽,单位:Bytes + * PMU_DDR_READ_BW 采集每个channel的ddrc的读带宽,单位:Bytes + * PMU_DDR_WRITE_BW 采集每个channel的ddrc的写带宽,单位:Bytes * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count @@ -365,14 +365,19 @@ kperf.get_device_metric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) 对 * len: 数据长度 * iter: 返回iterator[ImplPmuDeviceData] * free: 释放当前PmuDeviceData +* class DdrDataStructure: + * channelId: ddr数据的channel编号 + * ddrNumaId: ddr数据的numa编号 + * socketId: ddr数据的socket编号 * class ImplPmuDeviceData: * metric: 采集的指标 * count:指标的计数值 - * mode: 指标的采集类型,按core、按numa还是按bdf号 + * mode: 指标的采集类型,按core、按numa、按channel还是按bdf号 * union: * coreId: 数据的core编号 * numaId: 数据的numa编号 * bdf: 数据的bdf编号 + * DdrDataStructure: ddr相关的统计数据 ### kperf.device_bdf_list @@ -423,4 +428,55 @@ kperf.get_numa_core(numaId: int): 查询指定numaId下对应的core列表 # python代码示例 numaId = 1 numa_cores = kperf.get_numa_core(numaId) +``` + +### kperf.open_cpu_freq_sampling + +def open_cpu_freq_sampling(period: int) 开启cpu频率采集 + +### kperf.close_cpu_freq_sampling + +def close_cpu_freq_sampling() 关闭cpu频率采集 + +### kperf.read_cpu_freq_detail + +def read_cpu_freq_detail() -> CpuFreqDetail 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 +```python +#python代码示例 +err = kperf.open_cpu_freq_sampling(100) +if err != 0: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +dataList = kperf.read_cpu_freq_detail() +for item in dataList.iter: + print(f"cpuId={item.cpuId} minFreq={item.minFreq} maxFreq={item.maxFreq} avgFreq={item.avgFreq}") + +kperf.close_cpu_freq_sampling() +``` + +### kperf.resolvePmuDataSymbol + +def resolvePmuDataSymbol(pmuData: PmuData) -> int: 当SymbolMode不设置或者设置为0时,可通过该接口解析read返回的PmuData数据中的符号 +```python +#python代码示例 +event_name = "cycles" +pmu_attr = kperf.PmuAttr( + evtList=[event_name], + sampleRate=1000, + callStack=True, + useFreq=True, +) +fd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) +if fd == -1: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +kperf.enable(fd) +time.sleep(1) +kperf.disable(fd) +pmu_data = kperf.read(fd) +err = kperf.resolvePmuDataSymbol(pmu_data) +if err != 0: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +kperf.close(fd) ``` \ No newline at end of file diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 2b5958ca39adc2fd88a5f2a6e7eb43c12fd4a631..b21f30460c6e99fe4f02d2517a0c9ac7e851c8cc 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -37,6 +37,9 @@ struct MetricDataExt { unsigned coreId; unsigned clusterId; char* bdf; + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; }; void SetPeriod(struct PmuAttr* attr, unsigned period) { @@ -122,6 +125,11 @@ void IPmuGetMetricDataExt(struct PmuDeviceData* deviceData, struct MetricDataExt case PMU_METRIC_CLUSTER: metricData->clusterId = deviceData->clusterId; break; + case PMU_METRIC_CHANNEL: + metricData->channelId = deviceData->channelId; + metricData->ddrNumaId = deviceData->ddrNumaId; + metricData->socketId = deviceData->socketId; + break; } } @@ -291,6 +299,7 @@ var ( PMU_METRIC_NUMA C.enum_PmuMetricMode = C.PMU_METRIC_NUMA PMU_METRIC_CLUSTER C.enum_PmuMetricMode = C.PMU_METRIC_CLUSTER PMU_METRIC_BDF C.enum_PmuMetricMode = C.PMU_METRIC_BDF + PMU_METRIC_CHANNEL C.enum_PmuMetricMode = C.PMU_METRIC_CHANNEL ) var fdModeMap map[int]C.enum_PmuTaskType = make(map[int]C.enum_PmuTaskType) @@ -396,6 +405,12 @@ type PmuDeviceAttr struct { Bdf string } +type DdrDataStructure struct { + ChannelId uint32 + DdrNumaId uint32 + SocketId uint32 +} + type PmuDeviceData struct { Metric C.enum_PmuDeviceMetric // The metric value. The meaning of value depends on metric type. @@ -406,6 +421,7 @@ type PmuDeviceData struct { NumaId uint32 // for pernuma metric ClusterId uint32 // for percluster metric Bdf string // for perpcie metric + DdrDataStructure // for perchannel metric } type PmuDeviceDataVo struct { @@ -413,6 +429,13 @@ type PmuDeviceDataVo struct { cDeviceData *C.struct_PmuDeviceData } +type PmuCpuFreqDetail struct { + CpuId int // core id + MinFreq uint64 // minimum frequency of core + MaxFreq uint64 // maximum frequency of core + AvgFreq uint64 // average frequency of core +} + // Initialize the collection target // On success, a task id is returned which is the unique identity for the task // On error, -1 is returned @@ -705,6 +728,31 @@ func PmuDumpData(dataVo PmuDataVo, filePath string, dumpDwf bool) error { } return nil } + +// When symbol mode is SNO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function +// param PmuDataVo the data from PmuRead +// return nil indicates resolve success, otherwise return error code +func ResolvePmuDataSymbol(dataVo PmuDataVo) error { + err := C.ResolvePmuDataSymbol(dataVo.cData) + if int(err) != 0 { + return errors.New(C.GoString(C.Perror())) + } + dataLen := len(dataVo.GoData) + ptr := unsafe.Pointer(dataVo.cData) + slice := reflect.SliceHeader { + Data: uintptr(ptr), + Len: dataLen, + Cap: dataLen, + } + cPmuDatas := *(*[]C.struct_PmuData)(unsafe.Pointer(&slice)) + for i := 0; i < dataLen; i++ { + dataObj := cPmuDatas[i] + if dataObj.stack != nil { + dataVo.GoData[i].appendSymbols(dataObj) + } + } + return nil +} // Initialize the trace collection target // On success, a trace collect task id is returned which is the unique identity for the task @@ -983,6 +1031,9 @@ func PmuGetDevMetric(dataVo PmuDataVo, deviceAttr []PmuDeviceAttr) (PmuDeviceDat goDeviceList[i].NumaId = uint32(metricDataExt.numaId) goDeviceList[i].ClusterId = uint32(metricDataExt.clusterId) goDeviceList[i].Bdf = C.GoString(metricDataExt.bdf) + goDeviceList[i].ChannelId = uint32(metricDataExt.channelId) + goDeviceList[i].DdrNumaId = uint32(metricDataExt.ddrNumaId) + goDeviceList[i].SocketId = uint32(metricDataExt.socketId) } res.GoDeviceData = goDeviceList res.cDeviceData = metricData @@ -1059,6 +1110,54 @@ func PmuGetCpuFreq(core uint) (int64, error) { return int64(freq), nil } + +// open cpu core freq sampling +// period unit ms +// return error or nil +func PmuOpenCpuFreqSampling(period uint) (error) { + c_period := C.uint32_t(period) + ret := C.PmuOpenCpuFreqSampling(c_period) + if int(ret) == -1 { + return errors.New(C.GoString(C.Perror())) + } + return nil +} + +// close cpu freq sampling +func PmuCloseCpuFreqSampling() { + C.PmuCloseCpuFreqSampling() +} + +// get the maximum frequency,minimum frequency,and average frequency of each core +// param cpuNum +// return PmuCpuFreqDetail array +func PmuReadCpuFreqDetail() ([]PmuCpuFreqDetail) { + cpuNum := C.uint32_t(0) + cpuFreqList := C.PmuReadCpuFreqDetail(&cpuNum) + + if (uint32(cpuNum) == 0) { + return nil + } + + ptr := unsafe.Pointer(cpuFreqList) + slice := reflect.SliceHeader{ + Data: uintptr(ptr), + Len: int(cpuNum), + Cap: int(cpuNum), + } + + cCpuFreqList := *(*[]C.struct_PmuCpuFreqDetail)(unsafe.Pointer(&slice)) + goCpuFreqList := make([]PmuCpuFreqDetail, int(cpuNum)) + + for i, v := range cCpuFreqList { + goCpuFreqList[i].CpuId = int(v.cpuId) + goCpuFreqList[i].MinFreq = uint64(v.minFreq) + goCpuFreqList[i].MaxFreq = uint64(v.maxFreq) + goCpuFreqList[i].AvgFreq = uint64(v.avgFreq) + } + return goCpuFreqList +} + func transferCPmuDataToGoData(cPmuData *C.struct_PmuData, dataLen int, fd int) []PmuData { ptr := unsafe.Pointer(cPmuData) slice := reflect.SliceHeader { diff --git a/go/src/libkperf_test/libkperf_test.go b/go/src/libkperf_test/libkperf_test.go index e64ea6a1687dff432c07ae8cad59b60e031200c9..d06683a8fa91a8e376f034d23b211ced03b4063a 100644 --- a/go/src/libkperf_test/libkperf_test.go +++ b/go/src/libkperf_test/libkperf_test.go @@ -273,3 +273,53 @@ func TestPmuGetNumaCore(t *testing.T) { t.Logf("coreId has:%v", v) } } + +func TestPmuGetCpuFreqDetail(t *testing.T) { + err := kperf.PmuOpenCpuFreqSampling(100) + if err != nil { + t.Fatalf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v", err) + } + + freqList := kperf.PmuReadCpuFreqDetail() + for _, v := range freqList { + t.Logf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) + } + + kperf.PmuCloseCpuFreqSampling() +} + +func TestResolvePmuDataSymbol(t *testing.T) { + attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} + fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) + if err != nil { + t.Fatalf("kperf pmuopen sample failed, expect err is nil, but is %v", err) + } + + kperf.PmuEnable(fd) + time.Sleep(time.Second) + kperf.PmuDisable(fd) + + dataVo, err := kperf.PmuRead(fd) + if err != nil { + t.Fatalf("kperf pmuread failed, expect err is nil, but is %v", err) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) != 0 { + t.Fatalf("expect symbol data is empty, but is not") + } + } + + parseErr := kperf.ResolvePmuDataSymbol(dataVo) + if parseErr != nil { + t.Fatalf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v", parseErr) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) == 0 { + t.Fatalf("expect symbol data is not empty, but is empty") + } + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(fd) +} \ No newline at end of file diff --git a/include/pcerrc.h b/include/pcerrc.h index 767a48bb3a971692c47b252fbb940cecc27b17ef..909ae4e9d8529d50677965d2f09e08a85f1432df 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -106,7 +106,10 @@ extern "C" { #define LIBPERF_ERR_CPUFREQ_NOT_CONFIG 1062 #define LIBPERF_ERR_CLUSTER_ID_OVERSIZE 1063 #define LIBPERF_ERR_INVALID_PMU_BDF_TYPE 1064 -#define LIBPERF_ERR_NOT_SUPPORT_METRIC 1065 +#define LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 1065 +#define LIBPERF_ERR_NOT_SUPPORT_METRIC 1066 +#define LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD 1067 +#define LIBPERF_ERR_PMU_DATA_NO_FOUND 1068 #define UNKNOWN_ERROR 9999 diff --git a/include/pmu.h b/include/pmu.h index c8bef10badb7012480bb988c838859dd25e907d6..7d4390c572fd35158adf426b88333fa24d576bca 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -265,6 +265,13 @@ struct PmuTraceData { const char *comm; // process command }; +struct PmuCpuFreqDetail { + int cpuId; // cpu core id + uint64_t minFreq; // minimum frequency of core + uint64_t maxFreq; // maximum frequency of core + uint64_t avgFreq; // average frequency of core +}; + /** * @brief * Initialize the collection target. @@ -345,6 +352,14 @@ void PmuStop(int pd); */ int PmuRead(int pd, struct PmuData** pmuData); +/** +* @brief +* When symbol mode is NO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function +* @param pmuData the data from PmuRead +* @return 0 indicates resolve success, otherwise return error code +*/ +int ResolvePmuDataSymbol(struct PmuData* pmuData); + /** * @brief * Append data list to another data list <*toData>. @@ -463,7 +478,8 @@ enum PmuMetricMode { PMU_METRIC_CORE, PMU_METRIC_NUMA, PMU_METRIC_CLUSTER, - PMU_METRIC_BDF + PMU_METRIC_BDF, + PMU_METRIC_CHANNEL }; /** @@ -502,6 +518,12 @@ struct PmuDeviceData { unsigned clusterId; // for perpcie metric char *bdf; + // for perchannel metric of ddr + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; @@ -618,6 +640,25 @@ const char** PmuSysCallFuncList(unsigned *numFunc); */ int64_t PmuGetCpuFreq(unsigned core); +/** + * @brief get the maximum frequency,minimum frequency,and average frequency of each core + * @param cpuNum + * @return PmuCpuFreqDetail array of pointers + */ +struct PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum); + +/** + * @brief open cpu core freq sampling + * @param time period unit ms + * @return -1 or 0 + */ +int PmuOpenCpuFreqSampling(unsigned period); + +/** + * @brief close cpu freq sampling + */ +void PmuCloseCpuFreqSampling(); + #pragma GCC visibility pop #ifdef __cplusplus } diff --git a/pmu/CMakeLists.txt b/pmu/CMakeLists.txt index c68bfe0d51db80c280cebbf74fb3b5efa980511b..4af6e76f9cdfc75afd4cdf8acaa2e4cd5b45ce64 100644 --- a/pmu/CMakeLists.txt +++ b/pmu/CMakeLists.txt @@ -31,8 +31,11 @@ include_directories(${SYMBOL_FILE_DIR}) include_directories(${PMU_DECODER_DIR}) ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC}) +ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC}) +set_target_properties(kperf_static PROPERTIES OUTPUT_NAME "kperf") target_link_libraries(kperf numa sym) target_compile_options(kperf PRIVATE -fPIC) install(TARGETS kperf DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) +install(TARGETS kperf_static DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) file(GLOB HEADER_FILES ${PROJECT_TOP_DIR}/include/*.h) install(FILES ${HEADER_FILES} DESTINATION ${CMAKE_INSTALL_PREFIX}/include) diff --git a/pmu/cpu_freq.cpp b/pmu/cpu_freq.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f5a031cff1bb67c7b4c442f0880b74c7d82f8f3 --- /dev/null +++ b/pmu/cpu_freq.cpp @@ -0,0 +1,171 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Li + * Create: 2025-05-07 + * Description: sample cpu freq. + ******************************************************************************/ +#include "cpu_freq.h" +#include "pmu.h" +#include "pcerr.h" + +using namespace pcerr; + +CpuFreqManager* CpuFreqManager::instance = nullptr; +std::mutex CpuFreqManager::singleMutex; +std::mutex CpuFreqManager::initMutex; +std::vector CpuFreqManager::freqDetailList; +bool CpuFreqManager::hasInit = false; + +PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum) { + auto& ds = CpuFreqManager::GetCpuFreqDetail(); + *cpuNum = ds.size(); + return ds.data(); +} + +int PmuOpenCpuFreqSampling(unsigned period) { + return CpuFreqManager::GetInstance()->InitCpuFreqSampling(period); +} + +void PmuCloseCpuFreqSampling() { + CpuFreqManager::Clear(); +} + +void CpuFreqManager::Clear() { + std::lock_guard lock(singleMutex); + if (instance == nullptr) { + return; + } + delete instance; + instance = nullptr; +} + +CpuFreqManager* CpuFreqManager::GetInstance() { + if (instance == nullptr) { + std::lock_guard lock(singleMutex); + if(instance == nullptr) { + instance = new CpuFreqManager(); + } + } + return instance; +} + +int CpuFreqManager::CheckCpuFreqIsExist() { + for(int cpuId = 0; cpuId < MAX_CPU_NUM; cpuId++) { + int64_t freq = PmuGetCpuFreq(cpuId); + if (freq == -1 ) { + return -1; + } + } + return 0; +} + +int CpuFreqManager::CheckSleepPeriod(unsigned period) { + if (period == 0 || period > 10000) { + New(LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD, "invalid period, the period must be less than 10000ms and greater than 0ms"); + return LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD; + } + return SUCCESS; +} + +int CpuFreqManager::InitCpuFreqSampling(unsigned period) { + this->isEnable = true; + if (hasInit) { + return 0; + } + std::lock_guard lock(initMutex); + + if (CheckSleepPeriod(period) != 0) { + return -1; + } + + if (CheckCpuFreqIsExist() != 0) { + return -1; + } + + this->sleepPeriod = period * 1000; + this->cpuFreqThread = std::thread([this]() { + while (!isEnd) { + if (!isEnable) { + continue; + } + std::lock_guard lock(mapMutex); + for (int cpu = 0; cpu < MAX_CPU_NUM; cpu++) { + int64_t freq = PmuGetCpuFreq(cpu); + if (freq == -1) { + continue; + } + if (this->freqListMap.find(cpu) != this->freqListMap.end()) { + this->freqListMap[cpu].push_back(freq); + } else { + std::vector freqList = {freq}; + this->freqListMap.insert({cpu, freqList}); + } + } + usleep(this->sleepPeriod); + } + }); + hasInit = true; + return 0; +} + +void CpuFreqManager::CalFreqDetail() { + isEnable = false; + std::lock_guard lock(mapMutex); + + if(!this->freqListMap.empty()) { + uint64_t maxFreq, minFreq, sumFreq; + for (int cpuId = 0; cpuId < MAX_CPU_NUM; cpuId++) { + std::vector freqList; + minFreq = 0; + maxFreq = 0; + sumFreq = 0; + if (this->freqListMap.find(cpuId) != this->freqListMap.end()) { + minFreq = UINT64_MAX; + freqList = freqListMap[cpuId]; + } + for (const auto& curFreq: freqList) { + minFreq = minFreq > curFreq ? curFreq : minFreq; + maxFreq = maxFreq > curFreq ? maxFreq : curFreq; + sumFreq += curFreq; + } + uint64_t avgFreq = sumFreq / freqList.size(); + PmuCpuFreqDetail detail = {.cpuId=cpuId, .minFreq=minFreq, .maxFreq=maxFreq, .avgFreq=avgFreq}; + freqDetailList.push_back(detail); + } + freqListMap.clear(); + } else { + GetCurFreqDetail(); + } + + isEnable = true; +} + +void CpuFreqManager::GetCurFreqDetail() { + for(int cpuId = 0; cpuId < MAX_CPU_NUM; cpuId++) { + uint64_t freq = PmuGetCpuFreq(cpuId); + if (freq == -1) { + freq = 0; + } + PmuCpuFreqDetail detail = {.cpuId=cpuId, .minFreq=freq, .maxFreq=freq, .avgFreq=freq}; + freqDetailList.push_back(detail); + } +} + +std::vector& CpuFreqManager::GetCpuFreqDetail() { + std::lock_guard lock(initMutex); + freqDetailList.clear(); + if (!hasInit) { + CpuFreqManager::GetCurFreqDetail(); + } else { + CpuFreqManager::GetInstance()->CalFreqDetail(); + } + return freqDetailList; +} \ No newline at end of file diff --git a/pmu/cpu_freq.h b/pmu/cpu_freq.h new file mode 100644 index 0000000000000000000000000000000000000000..0e23e44e0505befb8bbbe33bffb4d0c9beb3ceb7 --- /dev/null +++ b/pmu/cpu_freq.h @@ -0,0 +1,66 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Li + * Create: 2025-05-07 + * Description: sample cpu freq. + ******************************************************************************/ +#ifndef LIBKPERF_CPU_FREQ_H +#define LIBKPERF_CPU_FREQ_H + +#include +#include +#include +#include +#include + +#include "cpu_map.h" + +class CpuFreqManager { +public: + CpuFreqManager(): isEnable(false), isEnd(false), sleepPeriod(100) {}; + ~CpuFreqManager() { + std::lock_guard lock(initMutex); + if(!hasInit) { + return; + } + isEnable = false; + isEnd = true; + cpuFreqThread.join(); + hasInit = false; + } + static void Clear(); + static CpuFreqManager* GetInstance(); + static std::vector& GetCpuFreqDetail(); + static void GetCurFreqDetail(); + + int InitCpuFreqSampling(unsigned period); + void CalFreqDetail(); + +private: + static CpuFreqManager* instance; + static std::mutex singleMutex; + static std::mutex initMutex; + static std::vector freqDetailList; + static bool hasInit; + + std::mutex mapMutex; + std::thread cpuFreqThread; + volatile bool isEnable; + volatile bool isEnd; + unsigned int sleepPeriod; + std::map> freqListMap; + + int CheckCpuFreqIsExist(); + static int CheckSleepPeriod(unsigned period); +}; + + +#endif //LIBKPERF_CPU_FREQ_H \ No newline at end of file diff --git a/pmu/evt.cpp b/pmu/evt.cpp index db27e0cc402fe4848cb13a7c7a42f53e9a85c575..4d3b9346f2910b3682d3e56d53de71ea5883f71a 100644 --- a/pmu/evt.cpp +++ b/pmu/evt.cpp @@ -113,10 +113,17 @@ __u64 KUNPENG_PMU::ReadOnce(__u64 *head) : "memory"); break; case HEAD_SIZE::HEAD_SIZE_EIGHT: +#ifdef IS_X86 + asm volatile("mov %0, %1" + : "=r"(*(__u64 __attribute__((__may_alias__)) *)pointerUnion.charHead) + : "Q"(*head) + : "memory"); +#else asm volatile("ldar %0, %1" : "=r"(*(__u64 __attribute__((__may_alias__)) *)pointerUnion.charHead) : "Q"(*head) : "memory"); +#endif break; default: break; diff --git a/pmu/evt_list.cpp b/pmu/evt_list.cpp index 2af3e2f44451fc93e716e6f4e8326bd2c3c7c908..bee7fa8dc5e13b0c475e79007b580a6111771e5f 100644 --- a/pmu/evt_list.cpp +++ b/pmu/evt_list.cpp @@ -97,10 +97,10 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrIsMainPid()) { continue; } + if (err == LIBPERF_ERR_INVALID_EVENT) { if (branchSampleFilter != KPERF_NO_BRANCH_SAMPLE) { pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", PMU Hardware or event type doesn't support branch stack sampling"); @@ -108,6 +108,11 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrGetEvtName() + ", " + std::string{strerror(errno)}); } } + + if (err == UNKNOWN_ERROR) { + pcerr::SetCustomErr(err, std::string{strerror(errno)}); + } + return err; } fdList.insert(perfEvt->GetFd()); diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter.cpp index a6417f7b32f0cfdc26d8cf92d95ee7d80ff167f7..bf801b0637a7497eb49c7387d654cc565b51cf58 100644 --- a/pmu/perf_counter.cpp +++ b/pmu/perf_counter.cpp @@ -126,8 +126,13 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou attr.disabled = 0; this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } else { - if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE) { +#ifdef IS_X86 + if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "cpu/")) { this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); +#else + if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "armv8_")) { + this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); +#endif } else { this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } diff --git a/pmu/pfm/core.cpp b/pmu/pfm/core.cpp index 35fd1a79f403bad935fbcf3d77a7c128a9c9a66c..c177fb37e00cc549a720b5a25dedc9ec68a9f995 100644 --- a/pmu/pfm/core.cpp +++ b/pmu/pfm/core.cpp @@ -255,6 +255,33 @@ namespace HARDWARE_EVENT { KUNPENG_PMU::COMMON::BUS_CYCLES } }; + + PMU_PAIR REF_CYCLES = { + KUNPENG_PMU::COMMON::REF_CYCLES, + { + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_REF_CPU_CYCLES, + KUNPENG_PMU::COMMON::REF_CYCLES + } + }; + + PMU_PAIR BRANCHES = { + KUNPENG_PMU::COMMON::BRANCHES, + { + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + KUNPENG_PMU::COMMON::BRANCHES + } + }; + + PMU_PAIR BRANCH_INSTRUCTIONS = { + KUNPENG_PMU::COMMON::BRANCH_INSTRUCTIONS, + { + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + KUNPENG_PMU::COMMON::BRANCH_INSTRUCTIONS + } + }; } // namespace hardware event namespace HW_CACHE_EVENT { @@ -312,6 +339,26 @@ namespace HW_CACHE_EVENT { } }; + + PMU_PAIR LLC_STORE_MISSES = { + KUNPENG_PMU::COMMON::LLC_STORE_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10102, + KUNPENG_PMU::COMMON::LLC_STORE_MISSES + } + }; + + PMU_PAIR LLC_STORES = { + KUNPENG_PMU::COMMON::LLC_STORES, + { + PERF_TYPE_HW_CACHE, + 0x102, + KUNPENG_PMU::COMMON::LLC_STORES + } + }; + + PMU_PAIR BRANCH_LOAD_MISSES = { KUNPENG_PMU::COMMON::BRANCH_LOAD_MISSES, { @@ -348,6 +395,24 @@ namespace HW_CACHE_EVENT { } }; + PMU_PAIR DTLB_STORE_MISSES = { + KUNPENG_PMU::COMMON::DTLB_STORE_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10103, + KUNPENG_PMU::COMMON::DTLB_STORE_MISSES + } + }; + + PMU_PAIR DTLB_STORES = { + KUNPENG_PMU::COMMON::DTLB_STORES, + { + PERF_TYPE_HW_CACHE, + 0x103, + KUNPENG_PMU::COMMON::DTLB_STORES + } + }; + PMU_PAIR ITLB_LOAD_MISSES = { KUNPENG_PMU::COMMON::ITLB_LOAD_MISSES, { @@ -366,6 +431,42 @@ namespace HW_CACHE_EVENT { } }; + PMU_PAIR NODE_LOAD_MISSES = { + KUNPENG_PMU::COMMON::NODE_LOAD_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10006, + KUNPENG_PMU::COMMON::NODE_LOAD_MISSES + } + }; + + PMU_PAIR NODE_LOADS = { + KUNPENG_PMU::COMMON::NODE_LOADS, + { + PERF_TYPE_HW_CACHE, + 0x6, + KUNPENG_PMU::COMMON::NODE_LOADS + } + }; + + PMU_PAIR NODE_STORE_MISSES = { + KUNPENG_PMU::COMMON::NODE_STORE_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10106, + KUNPENG_PMU::COMMON::NODE_STORE_MISSES + } + }; + + PMU_PAIR NODE_STORES = { + KUNPENG_PMU::COMMON::NODE_STORES, + { + PERF_TYPE_HW_CACHE, + 0x106, + KUNPENG_PMU::COMMON::NODE_STORES + } + }; + PMU_PAIR L1_DCACHE_STORE_MISSES = { KUNPENG_PMU::COMMON::L1_DCACHE_STORE_MISSES, { @@ -872,12 +973,62 @@ const std::unordered_map HIP_E_CORE_PMU_MA SOFTWARE_EVENT::TASK_CLOCK, }; +const std::unordered_map HIP_X86_CORE_PMU_MAP{ + HARDWARE_EVENT::BRANCH_MISSES, + HARDWARE_EVENT::CACHE_MISSES, + HARDWARE_EVENT::CACHE_REFERENCES, + HARDWARE_EVENT::CPU_CYCLES, + HARDWARE_EVENT::CYCLES, + HARDWARE_EVENT::INSTRUCTIONS, + HARDWARE_EVENT::BUS_CYCLES, + HARDWARE_EVENT::REF_CYCLES, + HARDWARE_EVENT::BRANCH_INSTRUCTIONS, + HARDWARE_EVENT::BRANCHES, + + SOFTWARE_EVENT::ALIGNMENT_FAULTS, + SOFTWARE_EVENT::BPF_OUTPUT, + SOFTWARE_EVENT::CONTEXT_SWITCHES, + SOFTWARE_EVENT::CS, + SOFTWARE_EVENT::CPU_CLOCK, + SOFTWARE_EVENT::CPU_MIGRATIONS, + SOFTWARE_EVENT::MIGRATIONS, + SOFTWARE_EVENT::DUMMY, + SOFTWARE_EVENT::EMULATION_FAULTS, + SOFTWARE_EVENT::MAJOR_FAULTS, + SOFTWARE_EVENT::MINOR_FAULTS, + SOFTWARE_EVENT::PAGE_FAULTS, + SOFTWARE_EVENT::FAULTS, + SOFTWARE_EVENT::TASK_CLOCK, + + HW_CACHE_EVENT::L1_DCACHE_LOAD_MISSES, + HW_CACHE_EVENT::L1_DCACHE_LOADS, + HW_CACHE_EVENT::L1_ICACHE_LOAD_MISSES, + HW_CACHE_EVENT::L1_ICACHE_LOADS, + HW_CACHE_EVENT::LLC_LOAD_MISSES, + HW_CACHE_EVENT::LLC_LOADS, + HW_CACHE_EVENT::LLC_STORE_MISSES, + HW_CACHE_EVENT::LLC_STORES, + HW_CACHE_EVENT::BRANCH_LOAD_MISSES, + HW_CACHE_EVENT::BRANCH_LOADS, + HW_CACHE_EVENT::DTLB_LOAD_MISSES, + HW_CACHE_EVENT::DTLB_LOADS, + HW_CACHE_EVENT::DTLB_STORE_MISSES, + HW_CACHE_EVENT::DTLB_STORES, + HW_CACHE_EVENT::ITLB_LOADS, + HW_CACHE_EVENT::ITLB_LOAD_MISSES, + HW_CACHE_EVENT::NODE_LOAD_MISSES, + HW_CACHE_EVENT::NODE_LOADS, + HW_CACHE_EVENT::NODE_STORE_MISSES, + HW_CACHE_EVENT::NODE_STORES, +}; + const KUNPENG_PMU::CORE_EVT_MAP KUNPENG_PMU::CORE_EVENT_MAP = { {CHIP_TYPE::HIPA, HIP_A_CORE_PMU_MAP}, {CHIP_TYPE::HIPB, HIP_B_CORE_PMU_MAP}, {CHIP_TYPE::HIPC, HIP_C_CORE_PMU_MAP}, {CHIP_TYPE::HIPF, HIP_F_CORE_PMU_MAP}, {CHIP_TYPE::HIPE, HIP_E_CORE_PMU_MAP}, + {CHIP_TYPE::HIPX86, HIP_X86_CORE_PMU_MAP}, }; static struct PmuEvt* ConstructPmuEvtFromCore(KUNPENG_PMU::CoreConfig config, int collectType) @@ -888,7 +1039,6 @@ static struct PmuEvt* ConstructPmuEvtFromCore(KUNPENG_PMU::CoreConfig config, in pmuEvtPtr->type = config.type; pmuEvtPtr->pmuType = KUNPENG_PMU::CORE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } @@ -951,7 +1101,6 @@ static struct PmuEvt* ConstructPmuEvtFromKernel(const char* pmuName, int collect pmuEvtPtr->type = type; pmuEvtPtr->pmuType = KUNPENG_PMU::CORE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } @@ -981,6 +1130,13 @@ std::string GetPmuDevicePath() } struct dirent *dent; while (dent = readdir(dir)) { +#ifdef IS_X86 + // look for devices like /sys/bus/event_source/devices/cpu/events + if (strcmp(dent->d_name, "cpu") == 0) { + pmuDevice = DEVICE_PATH + dent->d_name; + break; + } +#else if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..") || !strcmp(dent->d_name, "cpu")) { continue; } @@ -992,6 +1148,7 @@ std::string GetPmuDevicePath() pmuDevice = DEVICE_PATH + dent->d_name; break; } +#endif } closedir(dir); return pmuDevice; diff --git a/pmu/pfm/pfm.cpp b/pmu/pfm/pfm.cpp index 7d4aa7f72a644fcaec47ce63081c3fcffa70eca0..e516dd505933a739496378729db9f5ad5d03eab8 100644 --- a/pmu/pfm/pfm.cpp +++ b/pmu/pfm/pfm.cpp @@ -54,7 +54,6 @@ static struct PmuEvt* GetRawEvent(const char* pmuName, int collectType) pmuEvtPtr->type = PERF_TYPE_RAW; pmuEvtPtr->pmuType = CORE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } @@ -120,26 +119,31 @@ static bool CheckRawEvent(const char *pmuName) static int GetEventType(const char *pmuName) { - if (pmuName[0] == 'r' && CheckRawEvent(pmuName)) { - return RAW_TYPE; - } if (CheckEventInList(CORE_EVENT, pmuName)) { return CORE_TYPE; } - std::string strName(pmuName); - // Kernel trace point event name like 'block:block_bio_complete' - if (CheckEventInList(TRACE_EVENT, pmuName)) { - return TRACE_TYPE; + + if (pmuName[0] == 'r' && CheckRawEvent(pmuName)) { + return RAW_TYPE; } + std::string strName(pmuName); // Parse uncore event name like 'hisi_sccl3_ddrc0/flux_rd/' if (CheckEventInList(UNCORE_EVENT, pmuName)) { return UNCORE_TYPE; } +#ifdef IS_X86 + return -1; +#else + // Kernel trace point event name like 'block:block_bio_complete' + if (CheckEventInList(TRACE_EVENT, pmuName)) { + return TRACE_TYPE; + } // Parse uncore event raw name like 'hisi_sccl3_ddrc0/config=0x0/' // or smmuv3_pmcg_100020/transaction,filter_enable=1,filter_stream_id=0x7d/ if (CheckUncoreRawEvent(pmuName)) { return UNCORE_RAW_TYPE; } +#endif return -1; } @@ -175,7 +179,6 @@ struct PmuEvt* PfmGetSpeEvent( evt->config = dataFilter; evt->config1 = eventFilter; evt->config2 = minLatency; - evt->cpumask = -1; return evt; } diff --git a/pmu/pfm/pfm_name.cpp b/pmu/pfm/pfm_name.cpp index 0ef9a90d82bcef5846213c2aebd3308579c37de6..8686ecdacb7c2a567a0f05776d3ecfeea34cdb10 100644 --- a/pmu/pfm/pfm_name.cpp +++ b/pmu/pfm/pfm_name.cpp @@ -24,6 +24,9 @@ const char* KUNPENG_PMU::COMMON::BRANCH_MISSES = "branch-misses"; const char* KUNPENG_PMU::COMMON::BUS_CYCLES = "bus-cycles"; const char* KUNPENG_PMU::COMMON::CACHE_MISSES = "cache-misses"; +const char* KUNPENG_PMU::COMMON::REF_CYCLES = "ref-cycles"; +const char* KUNPENG_PMU::COMMON::BRANCHES = "branches"; +const char* KUNPENG_PMU::COMMON::BRANCH_INSTRUCTIONS = "branch-instructions"; const char* KUNPENG_PMU::COMMON::CACHE_REFERENCES = "cache-references"; const char* KUNPENG_PMU::COMMON::CPU_CYCLES = "cpu-cycles"; const char* KUNPENG_PMU::COMMON::CYCLES = "cycles"; @@ -40,12 +43,20 @@ const char* KUNPENG_PMU::COMMON::IDLE_CYCLES_FRONTEND = "idle-cycles-frontend"; const char* KUNPENG_PMU::COMMON::L1_ICACHE_LOADS = "L1-icache-loads"; const char* KUNPENG_PMU::COMMON::LLC_LOAD_MISSES = "LLC-load-misses"; const char* KUNPENG_PMU::COMMON::LLC_LOADS = "LLC-loads"; +const char* KUNPENG_PMU::COMMON::LLC_STORE_MISSES = "LLC-store-misses"; +const char* KUNPENG_PMU::COMMON::LLC_STORES = "LLC-stores"; const char* KUNPENG_PMU::COMMON::BRANCH_LOAD_MISSES = "branch-load-misses"; const char* KUNPENG_PMU::COMMON::BRANCH_LOADS = "branch-loads"; const char* KUNPENG_PMU::COMMON::DTLB_LOAD_MISSES = "dTLB-load-misses"; const char* KUNPENG_PMU::COMMON::DTLB_LOADS = "dTLB-loads"; +const char* KUNPENG_PMU::COMMON::DTLB_STORE_MISSES = "dTLB-store-misses"; +const char* KUNPENG_PMU::COMMON::DTLB_STORES = "dTLB-stores"; const char* KUNPENG_PMU::COMMON::ITLB_LOAD_MISSES = "iTLB-load-misses"; const char* KUNPENG_PMU::COMMON::ITLB_LOADS = "iTLB-loads"; +const char* KUNPENG_PMU::COMMON::NODE_LOAD_MISSES = "node-load-misses"; +const char* KUNPENG_PMU::COMMON::NODE_LOADS = "node-loads"; +const char* KUNPENG_PMU::COMMON::NODE_STORE_MISSES = "node-store-misses"; +const char* KUNPENG_PMU::COMMON::NODE_STORES = "node-stores"; const char* KUNPENG_PMU::COMMON::ALIGNMENT_FAULTS = "alignment-faults"; const char* KUNPENG_PMU::COMMON::BPF_OUTPUT = "bpf-output"; const char* KUNPENG_PMU::COMMON::CONTEXT_SWITCHES = "context-switches"; diff --git a/pmu/pfm/pfm_name.h b/pmu/pfm/pfm_name.h index ec528d599244bc7797db7314aff525eec0a34b0a..15635905f795a696917c2317834b775be782d168 100644 --- a/pmu/pfm/pfm_name.h +++ b/pmu/pfm/pfm_name.h @@ -25,6 +25,9 @@ extern const char* BRANCH_MISSES; extern const char* BUS_CYCLES; extern const char* CACHE_MISSES; extern const char* CACHE_REFERENCES; +extern const char* REF_CYCLES; +extern const char* BRANCHES; +extern const char* BRANCH_INSTRUCTIONS; extern const char* CPU_CYCLES; extern const char* CYCLES; extern const char* INSTRUCTIONS; @@ -40,12 +43,20 @@ extern const char* IDLE_CYCLES_FRONTEND; extern const char* L1_ICACHE_LOADS; extern const char* LLC_LOAD_MISSES; extern const char* LLC_LOADS; +extern const char* LLC_STORE_MISSES; +extern const char* LLC_STORES; extern const char* BRANCH_LOAD_MISSES; extern const char* BRANCH_LOADS; extern const char* DTLB_LOAD_MISSES; extern const char* DTLB_LOADS; +extern const char* DTLB_STORE_MISSES; +extern const char* DTLB_STORES; extern const char* ITLB_LOAD_MISSES; extern const char* ITLB_LOADS; +extern const char* NODE_LOAD_MISSES; +extern const char* NODE_LOADS; +extern const char* NODE_STORE_MISSES; +extern const char* NODE_STORES; // Software event extern const char* ALIGNMENT_FAULTS; extern const char* BPF_OUTPUT; diff --git a/pmu/pfm/trace.cpp b/pmu/pfm/trace.cpp index a435635284c9db5b045513b3de37c6cc1cd0a9b2..54c5bec2698e62f39899c68c380a8b958ff01d67 100644 --- a/pmu/pfm/trace.cpp +++ b/pmu/pfm/trace.cpp @@ -58,6 +58,5 @@ struct PmuEvt* GetKernelTraceEvent(const char* pmuName, int collectType) pmuEvtPtr->type = PERF_TYPE_TRACEPOINT; pmuEvtPtr->pmuType = TRACE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } diff --git a/pmu/pfm/uncore.cpp b/pmu/pfm/uncore.cpp index a3e655b261f5013bb6d681e1878d99fe6f27a65b..6ae72cccc080d161f02ca1efda6822a15f2d97aa 100644 --- a/pmu/pfm/uncore.cpp +++ b/pmu/pfm/uncore.cpp @@ -44,23 +44,53 @@ static int GetDeviceType(const string &devName) return stoi(typeStr); } -static int GetCpuMask(const string &devName) +static std::vector GetCpuMask(const string &devName) { + std::vector maskList; string maskPath = "/sys/devices/" + devName + "/cpumask"; std::string realPath = GetRealPath(maskPath); if (!IsValidPath(realPath)) { - return -1; + return maskList; } ifstream maskIn(realPath); if (!maskIn.is_open()) { - return -1; + return maskList; } // Cpumask is a comma-separated list of integers, // but now make it simple for ddrc event. - string maskStr; + char maskStr[1024]; maskIn >> maskStr; - return stoi(maskStr); + if (maskStr[0] == '-') { + return maskList; + } + + char *tokStr = strtok(maskStr, ","); + while (tokStr != nullptr) { + if (strstr(tokStr, "-") != nullptr) { + int minCpu, maxCpu; + if (sscanf(tokStr, "%d-%d", &minCpu, &maxCpu) != 2) { + continue; + } + for (int i = minCpu; i <= maxCpu; i++) { + maskList.push_back(i); + } + } else { + int aloneNumber; + if (sscanf(tokStr, "%d", &aloneNumber) == 1) { + maskList.push_back(aloneNumber); + } + } + tokStr = strtok(nullptr, ","); + } + return maskList; +} + +static int64_t TransferStrToHex(const std::string& str) { + int64_t intData; + std::istringstream iss(str); + iss >> std::hex >> intData; + return intData; } static int64_t GetUncoreEventConfig(const char* pmuName) @@ -85,10 +115,25 @@ static int64_t GetUncoreEventConfig(const char* pmuName) if (findEq == string::npos) { return -1; } + +#ifdef IS_X86 + auto umaskEq = configStr.find("umask"); + if (umaskEq != string::npos) { + auto CommaEq = configStr.find(","); + if (CommaEq == string::npos) { + return -1; + } + auto lowStr = configStr.substr(findEq + 1, CommaEq - findEq); + int64_t low = TransferStrToHex(lowStr); + auto highStr = configStr.substr(umaskEq + 6, configStr.size() - umaskEq - 6); + int64_t high = TransferStrToHex(highStr); + config = (high << 8) + low; + return config; + } +#endif auto subStr = configStr.substr(findEq + 1, configStr.size() - findEq); std::istringstream iss(subStr); iss >> std::hex >> config; - return config; } @@ -103,8 +148,8 @@ int FillUncoreFields(const char* pmuName, PmuEvt *evt) return UNKNOWN_ERROR; } evt->type = devType; - int cpuMask = GetCpuMask(devName); - evt->cpumask = cpuMask; + std::vector cpuMaskList = GetCpuMask(devName); + evt->cpuMaskList = cpuMaskList; evt->name = pmuName; return SUCCESS; } diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 44414ca59ad46765d630dce91a3705eef56881db..cba1713aedba628746537d7b283d96a82248f257 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -27,7 +27,7 @@ #include "pcerr.h" #include "safe_handler.h" #include "pmu_metric.h" -#include "trace_pointer_parser.h" +#include "trace_point_parser.h" #include "pmu.h" using namespace pcerr; @@ -174,6 +174,12 @@ static int CheckCollectTypeConfig(enum PmuTaskType collectType, struct PmuAttr * New(LIBPERF_ERR_INVALID_TASK_TYPE); return LIBPERF_ERR_INVALID_TASK_TYPE; } +#ifdef IS_X86 + if (collectType != COUNTING && collectType != SAMPLING) { + New(LIBPERF_ERR_INVALID_TASK_TYPE, "The x86 architecture supports only the COUNTING mode and SMAPLING mode"); + return LIBPERF_ERR_INVALID_TASK_TYPE; + } +#endif if ((collectType == COUNTING) && attr->evtList == nullptr) { New(LIBPERF_ERR_INVALID_EVTLIST, "Counting mode requires a non-null event list."); return LIBPERF_ERR_INVALID_EVTLIST; @@ -205,7 +211,6 @@ static int CheckCollectTypeConfig(enum PmuTaskType collectType, struct PmuAttr * New(LIBPERF_ERR_INVALID_GROUP_SPE); return LIBPERF_ERR_INVALID_GROUP_SPE; } - return SUCCESS; } @@ -764,6 +769,11 @@ int PmuRead(int pd, struct PmuData** pmuData) } } +int ResolvePmuDataSymbol(struct PmuData* pmuData) +{ + return PmuList::GetInstance()->ResolvePmuDataSymbol(pmuData); +} + void PmuClose(int pd) { SetWarn(SUCCESS); @@ -789,10 +799,12 @@ static struct PmuEvt* GetPmuEvent(const char* pmuName, int collectType) static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt) { - if (pmuEvt->cpumask >= 0) { - taskParam->numCpu = 1; - taskParam->cpuList = new int[1]; - taskParam->cpuList[0] = pmuEvt->cpumask; + if (!pmuEvt->cpuMaskList.empty()) { + taskParam->numCpu = pmuEvt->cpuMaskList.size(); + taskParam->cpuList = new int[pmuEvt->cpuMaskList.size()]; + for(int i = 0; i < pmuEvt->cpuMaskList.size(); i++) { + taskParam->cpuList[i] = pmuEvt->cpuMaskList[i]; + } } else if (attr->cpuList == nullptr && attr->pidList != nullptr && pmuEvt->collectType == COUNTING) { // For counting with pid list for system wide, open fd with cpu -1 and specific pid. taskParam->numCpu = 1; @@ -818,6 +830,24 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt } } +static bool PerfEventSupported(__u64 type, __u64 config) +{ + perf_event_attr attr{}; + memset(&attr, 0, sizeof(attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = type; + attr.config = config; + attr.disabled = 1; + attr.inherit = 1; + attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; + int fd = KUNPENG_PMU::PerfEventOpen(&attr, -1, 0, -1, 0); + if (fd < 0) { + return false; + } + close(fd); + return true; +} + static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int group_id) { unique_ptr taskParam(CreateNode(), PmuTaskAttrFree); @@ -839,7 +869,26 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att } else { pmuEvt = GetPmuEvent(evtName, collectType); if (pmuEvt == nullptr) { + if (Perrorno() != SUCCESS) { + return nullptr; + } +#ifdef IS_X86 + New(LIBPERF_ERR_INVALID_EVENT, "Invalid event: " + string(evtName) + ";x86 just supports core event and raw event"); +#else New(LIBPERF_ERR_INVALID_EVENT, "Invalid event: " + string(evtName)); +#endif + return nullptr; + } + + if (!PerfEventSupported(pmuEvt->type, pmuEvt->config)) { + int err = MapErrno(errno); + if (err == LIBPERF_ERR_NO_PERMISSION) { + New(LIBPERF_ERR_NO_PERMISSION, "Current user does not have the permission to collect the event.Swtich to the root user and run the 'echo -1 > /proc/sys/kernel/perf_event_paranoid'"); + } else if(err == UNKNOWN_ERROR) { + New(UNKNOWN_ERROR, std::string{strerror(errno)}); + } else { + New(err); + } return nullptr; } } @@ -953,22 +1002,33 @@ int PmuDumpData(struct PmuData *pmuData, unsigned len, char *filepath, int dumpD } int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *value, uint32_t vSize) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86; +#else if (rawData == nullptr) { New(LIBPERF_ERR_INVALID_FIELD_ARGS, "rawData cannot be nullptr."); return LIBPERF_ERR_INVALID_FIELD_ARGS; } - return PointerPasser::ParsePointer(rawData->data, fieldName, value, vSize); + return TraceParser::ParseTraceData(rawData->data, fieldName, value, vSize); +#endif } struct SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char *fieldName) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return nullptr; +#else if (rawData == nullptr) { New(LIBPERF_ERR_INVALID_FIELD_ARGS, "rawData cannot be nullptr."); return nullptr; } - SampleRawField *rt = PointerPasser::GetSampleRawField(rawData->data, fieldName); + + SampleRawField *rt = TraceParser::GetSampleRawField(rawData->data, fieldName); if (rt) { New(SUCCESS); } return rt; +#endif } diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h index 84553779793a2cc469f2188c110026180bfb5ec9..6fc6a72ddf6a71ba78ead681c08b6b86bc2906dd 100644 --- a/pmu/pmu_event.h +++ b/pmu/pmu_event.h @@ -38,7 +38,7 @@ struct PmuEvt { int pmuType; // if pmu is CORE/UNCORE/SPE and etc (to be implemented) int collectType; std::string name; // string name of this pmu event - int cpumask; // a representative CPU number for each socket (package) in the motherboard. + std::vector cpuMaskList; // representative CPU number list for each socket (package) in the motherboard. unsigned excludeUser : 1; // don't count user unsigned excludeKernel : 1; // don't count kernel unsigned callStack : 1; // collect complete call stack diff --git a/pmu/pmu_event_list.cpp b/pmu/pmu_event_list.cpp index c85bc529006ca3d35ba478d46b232a5d49f7f4e3..547ccb2dbb735453701a8c15f66cb2d3c089a6d6 100644 --- a/pmu/pmu_event_list.cpp +++ b/pmu/pmu_event_list.cpp @@ -37,7 +37,11 @@ static const string EVENT_DIR = "/events/"; static std::mutex pmuEventListMtx; +#ifdef IS_X86 +static vector supportDevPrefixs = {"uncore_iio", "uncore_imc", "cpu"}; +#else static vector supportDevPrefixs = {"hisi", "smmuv3", "hns3", "armv8"}; +#endif static vector uncoreEventList; static vector traceEventList; @@ -57,6 +61,12 @@ static void GetEventName(const string& devName, vector& eventList) continue; } string fileName(entry->d_name); +#ifdef IS_X86 + // Included in x86 .scale .unit files not for events + if (fileName.find('.') != string::npos) { + continue; + } +#endif auto eventName = devName; eventName += SLASH + fileName; eventName += SLASH; @@ -91,24 +101,6 @@ static void GetTraceSubFolder(const std::string& traceFolder, const string& devN closedir(dir); } -static bool PerfEventSupported(__u64 type, __u64 config) -{ - perf_event_attr attr{}; - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(struct perf_event_attr); - attr.type = type; - attr.config = config; - attr.disabled = 1; - attr.inherit = 1; - attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; - int fd = KUNPENG_PMU::PerfEventOpen(&attr, -1, 0, -1, 0); - if (fd < 0) { - return false; - } - close(fd); - return true; -} - const char** QueryCoreEvent(unsigned *numEvt) { if (!coreEventList.empty()) { @@ -118,9 +110,6 @@ const char** QueryCoreEvent(unsigned *numEvt) auto coreEventMap = KUNPENG_PMU::CORE_EVENT_MAP.at(GetCpuType()); for (auto& pair : coreEventMap) { auto eventName = pair.first; - if (!PerfEventSupported(pair.second.type, pair.second.config)) { - continue; - } char* eventNameCopy = new char[eventName.length() + 1]; strcpy(eventNameCopy, eventName.c_str()); coreEventList.emplace_back(eventNameCopy); @@ -182,6 +171,10 @@ const char** QueryUncoreEvent(unsigned *numEvt) const char** QueryTraceEvent(unsigned *numEvt) { +#ifdef IS_X86 + *numEvt = 0; + return nullptr; +#else if (!traceEventList.empty()) { *numEvt = traceEventList.size(); return traceEventList.data(); @@ -189,6 +182,11 @@ const char** QueryTraceEvent(unsigned *numEvt) struct dirent *entry; const string &traceFolder = GetTraceEventDir(); if (traceFolder.empty()) { + if (errno == EACCES) { + New(LIBPERF_ERR_NO_PERMISSION, "no permission to access '/sys/kernel/tracing/events/' or '/sys/kernel/debug/tracing/events/'"); + } else { + New(LIBPERF_ERR_INVALID_EVENT, "can't find '/sys/kernel/tracing/events/' or '/sys/kernel/debug/tracing/events/'"); + } return traceEventList.data(); } DIR *dir = opendir(traceFolder.c_str()); @@ -207,6 +205,7 @@ const char** QueryTraceEvent(unsigned *numEvt) closedir(dir); *numEvt = traceEventList.size(); return traceEventList.data(); +#endif } const char** QueryAllEvent(unsigned *numEvt) { @@ -267,7 +266,6 @@ const char** PmuEventList(enum PmuEventType eventType, unsigned *numEvt) New(LIBPERF_ERR_QUERY_EVENT_LIST_FAILED, "Query event failed."); return nullptr; } - New(SUCCESS); return eventList; } diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 360b875aa9139435e791d57218412460b136e921..d0a47c4f39b9eac93ee4ca36042614c539336387 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -23,7 +23,7 @@ #include "pcerr.h" #include "util_time.h" #include "log.h" -#include "trace_pointer_parser.h" +#include "trace_point_parser.h" #include "pmu_event_list.h" #include "pmu_list.h" #include "pfm_event.h" @@ -413,12 +413,14 @@ namespace KUNPENG_PMU { } auto findToData = userDataList.find(*toData); - if (findFromData == userDataList.end()) { + if (findToData == userDataList.end()) { return LIBPERF_ERR_INVALID_PMU_DATA; } // For non-null target data list, append source list to end of target vector. auto& dataVec = findToData->second.data; + auto& ipsVec = findToData->second.sampleIps; dataVec.insert(dataVec.end(), findFromData->second.data.begin(), findFromData->second.data.end()); + ipsVec.insert(ipsVec.end(), findFromData->second.sampleIps.begin(), findFromData->second.sampleIps.end()); len = dataVec.size(); if (*toData != dataVec.data()) { @@ -457,7 +459,7 @@ namespace KUNPENG_PMU { EraseParentEventMap(pd); SymResolverDestroy(); PmuEventListFree(); - PointerPasser::FreeRawFieldMap(); + TraceParser::FreeRawFieldMap(); } int PmuList::NewPd() @@ -625,9 +627,6 @@ namespace KUNPENG_PMU { void PmuList::FillStackInfo(EventData& eventData) { auto symMode = symModeList[eventData.pd]; - if (symMode == NO_SYMBOL_RESOLVE) { - return; - } // Parse dwarf and elf info of each pid and get stack trace for each pmu data. for (size_t i = 0; i < eventData.data.size(); ++i) { auto& pmuData = eventData.data[i]; @@ -636,13 +635,48 @@ namespace KUNPENG_PMU { SymResolverRecordModuleNoDwarf(pmuData.pid); } else if (symMode == RESOLVE_ELF_DWARF) { SymResolverRecordModule(pmuData.pid); + } else if (symMode == NO_SYMBOL_RESOLVE) { + SymResolverRecordModule(pmuData.pid); + continue; } else { continue; } + if (pmuData.stack == nullptr) { pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); } } + //Exceptions generated by the symbol interface are not directly exposed and are processed as warnings. + int err = Perrorno(); + if (err < LIBPERF_ERR_NO_AVAIL_PD && err >= LIBSYM_ERR_BASE) { + pcerr::SetWarn(err, Perror()); + New(SUCCESS); + } + } + + int PmuList::ResolvePmuDataSymbol(struct PmuData* iPmuData) + { + if (iPmuData == nullptr) { + New(LIBPERF_ERR_INVALID_PMU_DATA, "ipmuData is nullptr"); + return LIBPERF_ERR_INVALID_PMU_DATA; + } + auto userData = userDataList.find(iPmuData); + if (userData == userDataList.end()) { + New(LIBPERF_ERR_PMU_DATA_NO_FOUND, "ipmuData isn't in userDataList"); + return LIBPERF_ERR_PMU_DATA_NO_FOUND; + } + + auto& eventData = userDataList[iPmuData]; + auto symMode = symModeList[eventData.pd]; + for (size_t i = 0; i < eventData.data.size(); ++i) { + auto& pmuData = eventData.data[i]; + auto& ipsData = eventData.sampleIps[i]; + if (pmuData.stack == nullptr) { + pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); + } + } + New(SUCCESS); + return SUCCESS; } void PmuList::AggregateData(const std::vector& evData, std::vector& newEvData) @@ -757,7 +791,7 @@ namespace KUNPENG_PMU { for (auto pd: findData->second.data) { if (pd.rawData != nullptr) { - PointerPasser::FreePointerData(pd.rawData->data); + TraceParser::FreeTraceData(pd.rawData->data); free(pd.rawData); pd.rawData = nullptr; } @@ -1025,9 +1059,6 @@ namespace KUNPENG_PMU { int PmuList::InitSymbolRecordModule(const unsigned pd, PmuTaskAttr* taskParam) { SymbolMode symMode = GetSymbolMode(pd); - if (symMode == NO_SYMBOL_RESOLVE) { - return SUCCESS; - } if (taskParam->pmuEvt->collectType == COUNTING) { return SUCCESS; @@ -1053,7 +1084,7 @@ namespace KUNPENG_PMU { } } - if (this->symModeList[pd] == RESOLVE_ELF_DWARF) { + if (this->symModeList[pd] == RESOLVE_ELF_DWARF || this->symModeList[pd] == NO_SYMBOL_RESOLVE) { for (const auto& pid: pidList) { int rt = SymResolverRecordModule(pid); if (rt != SUCCESS) { diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h index b44ff55e50927db9c8ca322078a2fc5fd90ee4ee..523e2ad96b18c34f34fa99665897aba9054ff56c 100644 --- a/pmu/pmu_list.h +++ b/pmu/pmu_list.h @@ -76,6 +76,7 @@ public: void StoreSplitData(unsigned pd, std::pair& previousEventList, std::unordered_map& eventSplitMap); bool IsAllPidExit(const unsigned pd); + int ResolvePmuDataSymbol(struct PmuData* iPmuData); private: using ProcPtr = std::shared_ptr; diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 980632b9b6a59ea25fb4d30ee1e994c498d3fb9b..6998c5f6fc7c88392da89d1cb9e4e8d0f6cb70ac 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,7 @@ using namespace std; using namespace pcerr; +using IdxMap = unordered_map>; static unsigned maxCpuNum = 0; static vector coreArray; @@ -86,8 +88,9 @@ namespace KUNPENG_PMU { }; set percoreMetric = {PMU_L3_TRAFFIC, PMU_L3_MISS, PMU_L3_REF}; - set pernumaMetric = {PMU_DDR_READ_BW, PMU_DDR_WRITE_BW, PMU_L3_LAT}; + set pernumaMetric = {PMU_L3_LAT}; set perClusterMetric = {PMU_L3_LAT}; + set perChannelMetric = {PMU_DDR_READ_BW, PMU_DDR_WRITE_BW}; set perpcieMetric = {PMU_PCIE_RX_MRD_BW, PMU_PCIE_RX_MWR_BW, PMU_PCIE_TX_MRD_BW, @@ -105,7 +108,7 @@ namespace KUNPENG_PMU { if (it != MetricToString.end()) { return it->second; } - return ""; + return ""; } using PMU_METRIC_PAIR = std::pair; @@ -890,6 +893,11 @@ namespace KUNPENG_PMU { unsigned numaId; unsigned clusterId; char *bdf; + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; @@ -949,7 +957,7 @@ namespace KUNPENG_PMU { switch(metric) { case PMU_DDR_READ_BW: case PMU_DDR_WRITE_BW: - return PMU_METRIC_NUMA; + return PMU_METRIC_CHANNEL; case PMU_L3_LAT: return PMU_METRIC_CLUSTER; case PMU_L3_TRAFFIC: @@ -1077,6 +1085,108 @@ namespace KUNPENG_PMU { return SUCCESS; } + static IdxMap DDRC_CHANNEL_MAP_HIPA = { + {1, {{0, 0}, {1, 1}, {2, 2}, {3, 3}}}, + {3, {{0, 4}, {1, 5}, {2, 6}, {3, 7}}}, + {5, {{0, 0}, {1, 1}, {2, 2}, {3, 3}}}, + {7, {{0, 4}, {1, 5}, {2, 6}, {3, 7}}}, + }; + static IdxMap DDRC_CHANNEL_MAP_HIPB = { + {3, {{0, 0}, {2, 1}, {3, 2}, {5, 3}}}, + {1, {{0, 4}, {2, 5}, {3, 6}, {5, 7}}}, + {11, {{0, 0}, {2, 1}, {3, 2}, {5, 3}}}, + {9, {{0, 4}, {2, 5}, {3, 6}, {5, 7}}}, + }; + + static unordered_map DDRC_CHANNEL_MAP = { + {HIPA, DDRC_CHANNEL_MAP_HIPA}, + {HIPB, DDRC_CHANNEL_MAP_HIPB}, + }; + + static int ParseDDRIdx(const string &devName, const string prefix) + { + size_t ddrcPos = devName.find(prefix); + size_t channelIndex = ddrcPos + prefix.length(); + string ddrcIndexStr = devName.substr(channelIndex); + size_t separatorPos = ddrcIndexStr.find("_"); + int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); + return ddrcIndex; + } + + static bool getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) + { + string devName; + string evtName; + if (!GetDeviceName(evt, devName, evtName)) { + return false; + } + // ddrc channel index. eg: hisi_sccl3_ddrc3_1 --> 3_1 + int ddrcIndex = ParseDDRIdx(devName, "ddrc"); + int scclIndex = ParseDDRIdx(devName, "sccl"); + + CHIP_TYPE chipType = GetCpuType(); //get channel index + if (DDRC_CHANNEL_MAP.find(chipType) == DDRC_CHANNEL_MAP.end()) { + return false; + } + + auto &ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; + auto ddrIdxMap = ddrcChannelList.find(scclIndex); + if (ddrIdxMap != ddrcChannelList.end()) { + auto channelIdx = ddrIdxMap->second.find(ddrcIndex); + if (channelIdx != ddrIdxMap->second.end()) { + channelId = channelIdx->second; + return true; + } + } + return false; + } + + struct channelKeyHash { + size_t operator()(const tuple& key) const { + auto socketIdHash = hash{}(get<0>(key)); + auto channelIdHash = hash{}(get<1>(key)); + auto ddrNumaIdHash = hash{}(get<2>(key)); + return socketIdHash ^ (channelIdHash << 1) ^ (ddrNumaIdHash << 2); + } + }; + + int AggregateByChannel(const PmuDeviceMetric metric, const vector &rawData, vector &devData) + { + unordered_map, PmuDeviceData, channelKeyHash> devDataByChannel; //Key: socketId, channelId, ddrNumaId + for (auto &data : rawData) { + unsigned channelId; + if (!getChannelId(data.evtName, data.ddrNumaId, channelId)) { + continue; + } + auto ddrDatakey = make_tuple(data.socketId, channelId, data.ddrNumaId); + auto findData = devDataByChannel.find(ddrDatakey); + if (findData == devDataByChannel.end()) { + PmuDeviceData outData; + outData.metric = data.metric; + outData.count = data.count; + outData.mode = GetMetricMode(data.metric); + outData.channelId = channelId; + outData.ddrNumaId = data.ddrNumaId; + outData.socketId = data.socketId; + devDataByChannel[ddrDatakey] = outData; + } else { + findData->second.count += data.count; + } + } + + vector, PmuDeviceData>> sortedVec(devDataByChannel.begin(), devDataByChannel.end()); + sort(sortedVec.begin(), sortedVec.end(), []( + const pair, PmuDeviceData>& a, + const pair, PmuDeviceData>& b) { + return a.first < b.first; + }); + for (auto &data : sortedVec) { + devData.push_back(data.second); + } + + return SUCCESS; + } + int PcieBWAggregate(const PmuDeviceMetric metric, const vector &rawData, vector &devData) { const auto& deviceConfig = GetDeviceMtricConfig(); @@ -1160,8 +1270,8 @@ namespace KUNPENG_PMU { {PMU_L3_TRAFFIC, L3Bw}, {PMU_L3_LAT, L3Lat}}; map aggregateMap = { - {PMU_DDR_READ_BW, AggregateByNuma}, - {PMU_DDR_WRITE_BW, AggregateByNuma}, + {PMU_DDR_READ_BW, AggregateByChannel}, + {PMU_DDR_WRITE_BW, AggregateByChannel}, {PMU_L3_LAT, AggregateByCluster}, {PMU_PCIE_RX_MRD_BW, PcieBWAggregate}, {PMU_PCIE_RX_MWR_BW, PcieBWAggregate}, @@ -1270,6 +1380,10 @@ namespace KUNPENG_PMU { if (perClusterMetric.find(devAttr.metric) != perClusterMetric.end()) { devData.clusterId = pmuData[i].cpuTopo->coreId / clusterWidth; } + if (perChannelMetric.find(devAttr.metric) != pernumaMetric.end()) { + devData.ddrNumaId = pmuData[i].cpuTopo->numaId; + devData.socketId = pmuData[i].cpuTopo->socketId; + } if (IsBdfMetric(devAttr.metric)) { devData.bdf = devAttr.bdf; } @@ -1285,6 +1399,10 @@ using namespace KUNPENG_PMU; const char** PmuDeviceBdfList(enum PmuBdfType bdfType, unsigned *numBdf) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return nullptr; +#else try { lock_guard lg(pmuBdfListMtx); SetWarn(SUCCESS); @@ -1316,6 +1434,7 @@ const char** PmuDeviceBdfList(enum PmuBdfType bdfType, unsigned *numBdf) New(UNKNOWN_ERROR, ex.what()); return nullptr; } +#endif } static void PmuBdfListFreeSingle(vector &bdfList) @@ -1338,6 +1457,10 @@ void PmuDeviceBdfListFree() int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else SetWarn(SUCCESS); try { if (CheckPmuDeviceAttr(attr, len) != SUCCESS) { @@ -1371,6 +1494,7 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } static int CheckPmuDeviceVar(struct PmuData *pmuData, unsigned len, @@ -1398,6 +1522,10 @@ int PmuGetDevMetric(struct PmuData *pmuData, unsigned len, struct PmuDeviceAttr *attr, unsigned attrLen, struct PmuDeviceData **data) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else SetWarn(SUCCESS); try { if (CheckPmuDeviceVar(pmuData, len, attr, attrLen) != SUCCESS) { @@ -1450,6 +1578,7 @@ int PmuGetDevMetric(struct PmuData *pmuData, unsigned len, New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } void DevDataFree(struct PmuDeviceData *data) @@ -1468,7 +1597,7 @@ int64_t PmuGetCpuFreq(unsigned core) cpuPath << SYS_CPU_INFO_PATH << core << "/cpufreq/scaling_cur_freq"; if (!ExistPath(cpuPath.str())) { - New(LIBPERF_ERR_CPUFREQ_NOT_CONFIG, "Kernel not config cpuFreq Or core exceed cpuNums. Not exist " + cpuPath.str()); + New(LIBPERF_ERR_CPUFREQ_NOT_CONFIG, "Kernel not config cpuFreq or core exceed cpuNums. Not exist " + cpuPath.str()); return -1; } std::string curFreqStr = ReadFileContent(cpuPath.str()); @@ -1496,6 +1625,10 @@ static void InitializeCoreArray() int PmuGetClusterCore(unsigned clusterId, unsigned **coreList) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else try { lock_guard lg(pmuCoreListMtx); InitializeCoreArray(); @@ -1527,6 +1660,7 @@ int PmuGetClusterCore(unsigned clusterId, unsigned **coreList) New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } int PmuGetNumaCore(unsigned nodeId, unsigned **coreList) diff --git a/pmu/pmu_trace_analysis.cpp b/pmu/pmu_trace_analysis.cpp index 7dbf4dace9a558f1db1b0dfb5b318ff5273c4cfc..c1c68346ecff8132c1802b1ae0ae7be76b6de07f 100644 --- a/pmu/pmu_trace_analysis.cpp +++ b/pmu/pmu_trace_analysis.cpp @@ -29,6 +29,11 @@ static vector SysCallFuncList; const char** PmuSysCallFuncList(unsigned *numFuncs) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + *numFuncs = 0; + return nullptr; +#else lock_guard lg(SysCallListMtx); SetWarn(SUCCESS); try { @@ -57,6 +62,7 @@ const char** PmuSysCallFuncList(unsigned *numFuncs) New(SUCCESS); *numFuncs = SysCallFuncList.size(); return SysCallFuncList.data(); +#endif } void PmuSysCallFuncListFree() @@ -172,6 +178,10 @@ static char **GeneratePmuAttrEvtList(const char **sysCallFuncs, const unsigned n int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else SetWarn(SUCCESS); auto err = CheckTraceAttr(traceType, traceAttr); if (err != SUCCESS) { @@ -199,6 +209,7 @@ int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr) } return pd; +#endif } int PmuTraceEnable(int pd) @@ -213,6 +224,10 @@ int PmuTraceDisable(int pd) int PmuTraceRead(int pd, struct PmuTraceData **pmuTraceData) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else PmuData *pmuData = nullptr; unsigned len = PmuRead(pd, &pmuData); if (len == -1) { @@ -246,6 +261,7 @@ int PmuTraceRead(int pd, struct PmuTraceData **pmuTraceData) New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } void PmuTraceClose(int pd) diff --git a/pmu/sample_process.cpp b/pmu/sample_process.cpp index fcd1bfaf482cee26d80bd78258a97bbfef0610d6..778b62222b2a3cc5654d0034f216a2fd34bd8b35 100644 --- a/pmu/sample_process.cpp +++ b/pmu/sample_process.cpp @@ -22,6 +22,16 @@ #define PAGE_SIZE (sysconf(_SC_PAGESIZE)) #define MB() asm volatile("dmb ish" ::: "memory") static constexpr int MAX_DATA_SIZE = 8192; +#ifdef IS_X86 +#define PerfRingbufferSmpStoreRelease(p, v) \ + ({ \ + union { \ + typeof(*p) val; \ + char charHead[1]; \ + } pointerUnion = {.val = (v)}; \ + asm volatile("mov %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ + }) +#else #define PerfRingbufferSmpStoreRelease(p, v) \ ({ \ union { \ @@ -30,6 +40,7 @@ static constexpr int MAX_DATA_SIZE = 8192; } pointerUnion = {.val = (v)}; \ asm volatile("stlr %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ }) +#endif void KUNPENG_PMU::PerfMmapConsume(PerfMmap &map) { diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index 57682fb358238be31c21767a30667ad3bdcda878..cab155621d5ddf0b295aad7042d2a558b70c0ce8 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -31,7 +31,8 @@ #include "process_map.h" #include "log.h" #include "sampler.h" -#include "trace_pointer_parser.h" +#include "pfm_event.h" +#include "trace_point_parser.h" #include "common.h" using namespace std; @@ -55,7 +56,13 @@ int KUNPENG_PMU::PerfSampler::MapPerfAttr(const bool groupEnable, const int grou attr.read_format = PERF_FORMAT_ID; attr.exclude_kernel = this->evt->excludeKernel; attr.exclude_user = this->evt->excludeUser; +#ifdef IS_X86 + if (this->pid == -1) { + attr.pinned = 0; + } +#else attr.pinned = 1; +#endif attr.disabled = 1; attr.inherit = 1; attr.mmap = 1; @@ -210,22 +217,20 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( return; } KUNPENG_PMU::PerfRawSample *sample = (KUNPENG_PMU::PerfRawSample *)event->sample.array; - if (symMode != NO_SYMBOL_RESOLVE) { - // Copy ips from ring buffer and get stack info later. - if (evt->callStack == 0) { - int i = 0; - while (i < sample->nr && !IsValidIp(sample->ips[i])) { - i++; - } - if (i < sample->nr) { + // Copy ips from ring buffer and get stack info later. + if (evt->callStack == 0) { + int i = 0; + while (i < sample->nr && !IsValidIp(sample->ips[i])) { + i++; + } + if (i < sample->nr) { + ips->ips.push_back(sample->ips[i]); + } + } else { + for (int i = sample->nr - 1; i >= 0; --i) { + if (IsValidIp(sample->ips[i])) { ips->ips.push_back(sample->ips[i]); } - } else { - for (int i = sample->nr - 1; i >= 0; --i) { - if (IsValidIp(sample->ips[i])) { - ips->ips.push_back(sample->ips[i]); - } - } } } current->cpu = sample->cpu; @@ -233,7 +238,9 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( current->tid = static_cast(sample->tid); current->period = static_cast(sample->period); current->ts = static_cast(sample->time); - PointerPasser::ParserRawFormatData(current, sample, event, this->evt->name); + if (this->evt->pmuType == TRACE_TYPE) { + TraceParser::ParserRawFormatData(current, sample, event, this->evt->name); + } ParseBranchSampleData(current, sample, event, extPool); } diff --git a/pmu/spe.cpp b/pmu/spe.cpp index a075bb88451fbeac72feb6ea5bbb1129c8c3e7d1..2bc42c400bb7ba277a6da589d52262e2d5f06ea8 100644 --- a/pmu/spe.cpp +++ b/pmu/spe.cpp @@ -359,12 +359,22 @@ void Spe::CoreDummyData(struct SpeCoreContext *context, struct ContextSwitchData uint64_t off = dataTail % mpage->data_size; struct perf_event_header *header = (struct perf_event_header *)(ringBuf + off); - if (header->type == PERF_RECORD_MMAP && symbolMode != NO_SYMBOL_RESOLVE) { + if (header->type == PERF_RECORD_MMAP) { struct PerfRecordMmap *sample = (struct PerfRecordMmap *)header; - if (symbolMode == RESOLVE_ELF_DWARF) { - SymResolverUpdateModule(sample->tid, sample->filename, sample->addr); + if (symbolMode == RESOLVE_ELF_DWARF || symbolMode == NO_SYMBOL_RESOLVE) { + int ret = SymResolverUpdateModule(sample->tid, sample->filename, sample->addr); + if (ret != SUCCESS) { + // if the module fails to be updated, a warning is recorded to overwrite the failure error code. + SetWarn(ret, Perror()); + New(SUCCESS); + } } else if (symbolMode == RESOLVE_ELF) { - SymResolverUpdateModuleNoDwarf(sample->tid, sample->filename, sample->addr); + int ret = SymResolverUpdateModuleNoDwarf(sample->tid, sample->filename, sample->addr); + if (ret != SUCCESS) { + // if the module fails to be updated, a warning is recorded to overwrite the failure error code. + SetWarn(ret, Perror()); + New(SUCCESS); + } } dataTail += header->size; continue; diff --git a/pmu/spe.h b/pmu/spe.h index aa4ba0e678d7706a9c9645520aa0b62ba839040b..f1fe64c163239737b75d1f3b392ca8d5f8124eca 100644 --- a/pmu/spe.h +++ b/pmu/spe.h @@ -28,9 +28,15 @@ #include "pmu_event.h" #include "symbol.h" +#ifdef IS_X86 +#define MB() asm volatile("mfence":::"memory") +#define RMB() asm volatile("lfence":::"memory") +#define WMB() asm volatile("sfence":::"memory") +#else #define MB() asm volatile("dsb sy") #define RMB() asm volatile("dsb ld") #define WMB() asm volatile("dsb st") +#endif #define EVENT_EXCEPTION_GEN 0x1 #define EVENT_RETIRED 0x2 diff --git a/pmu/trace_pointer_parser.cpp b/pmu/trace_point_parser.cpp similarity index 94% rename from pmu/trace_pointer_parser.cpp rename to pmu/trace_point_parser.cpp index 0e84fdaa5774ca083a0a94249346e952642be998..3e2b79b4ccfeecd053bb0fd4762ef247b7261941 100644 --- a/pmu/trace_pointer_parser.cpp +++ b/pmu/trace_point_parser.cpp @@ -13,7 +13,7 @@ * Description: Provides the capability of parsing pointer events. ******************************************************************************/ -#include "trace_pointer_parser.h" +#include "trace_point_parser.h" using namespace KUNPENG_PMU; using namespace pcerr; @@ -41,7 +41,7 @@ static std::string GetFormatRealPath(const std::string &evtName) { return GetRealPath(formatPath); } -bool PointerPasser::IsNeedFormat(std::ifstream &file, const std::string &evtName) { +bool TraceParser::IsNeedFormat(std::ifstream &file, const std::string &evtName) { std::string realPath; if (formatMap.find(evtName) != formatMap.end()) { realPath = formatMap.at(evtName); @@ -90,7 +90,7 @@ void ParseFormatFile(ifstream &file, const std::string &evtName) { efMap.insert({evtName, fnMap}); } -void PointerPasser::ParserRawFormatData(struct PmuData *pd, KUNPENG_PMU::PerfRawSample *sample, +void TraceParser::ParserRawFormatData(struct PmuData *pd, KUNPENG_PMU::PerfRawSample *sample, union KUNPENG_PMU::PerfEvent *event, const std::string &evtName) { ifstream file; @@ -171,7 +171,7 @@ int CheckFieldArgs(char *data, const string &fieldName, T *value, uint32_t vSize } template -int PointerPasser::ParseField(char *data, const std::string &fieldName, T *value, uint32_t vSize) { +int TraceParser::ParseField(char *data, const std::string &fieldName, T *value, uint32_t vSize) { int rt = CheckFieldArgs(data, fieldName, value, vSize); if (rt != SUCCESS) { return rt; @@ -201,12 +201,12 @@ int PointerPasser::ParseField(char *data, const std::string &fieldName, T *value return SUCCESS; } -int PointerPasser::ParsePointer(char *data, const std::string &fieldName, void *value, +int TraceParser::ParseTraceData(char *data, const std::string &fieldName, void *value, uint32_t vSize) { return ParseField(data, fieldName, value, vSize); } -void PointerPasser::FreePointerData(char *data) { +void TraceParser::FreeTraceData(char *data) { if (data == nullptr) { return; } @@ -217,7 +217,7 @@ void PointerPasser::FreePointerData(char *data) { data = nullptr; } -SampleRawField *PointerPasser::GetSampleRawField(char *data, const std::string &fieldName) { +SampleRawField *TraceParser::GetSampleRawField(char *data, const std::string &fieldName) { int ret = CheckFieldArgs(data, fieldName); if (ret != SUCCESS) { return nullptr; @@ -238,7 +238,7 @@ SampleRawField *PointerPasser::GetSampleRawField(char *data, const std::string & return fsrMap.at(field); } -void PointerPasser::FreeRawFieldMap() { +void TraceParser::FreeRawFieldMap() { for (auto it = fsrMap.begin(); it != fsrMap.end(); ++it) { if (!it->second) { continue; diff --git a/pmu/trace_pointer_parser.h b/pmu/trace_point_parser.h similarity index 89% rename from pmu/trace_pointer_parser.h rename to pmu/trace_point_parser.h index 701836f205e9f73a0216db4ccb1c44df0648db5a..a101d313d906904e3b9542fbe2880e9778c34ab1 100644 --- a/pmu/trace_pointer_parser.h +++ b/pmu/trace_point_parser.h @@ -12,8 +12,8 @@ * Create: 2024-07-04 * Description: Provides the capability of parsing pointer events. ******************************************************************************/ -#ifndef LIBKPERF_TRACE_POINTER_PARSER_H -#define LIBKPERF_TRACE_POINTER_PARSER_H +#ifndef LIBKPERF_TRACE_POINT_PARSER_H +#define LIBKPERF_TRACE_POINT_PARSER_H #include #include @@ -46,7 +46,7 @@ namespace KUNPENG_PMU { } }; - class PointerPasser { + class TraceParser { public: /** * @brief determine whether the event is a pointer event. @@ -68,12 +68,12 @@ namespace KUNPENG_PMU { /** * @brief the method of parsing field. */ - static int ParsePointer(char *data, const string &fieldName, void *value, uint32_t vSize); + static int ParseTraceData(char *data, const string &fieldName, void *value, uint32_t vSize); /** * @brief free the data. */ - static void FreePointerData(char *data); + static void FreeTraceData(char *data); /** * @brief get the field named fieldName of this event. @@ -89,4 +89,4 @@ namespace KUNPENG_PMU { } -#endif //LIBKPERF_TRACE_POINTER_PARSER_H +#endif //LIBKPERF_TRACE_POINT_PARSER_H diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index ae0847ce0c1e77219425944256ff879929964860..2cffe19b5da9c202b6b127743cda904d09f3d855 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -467,6 +467,12 @@ class PmuDeviceAttr: pmu_device_attr.__c_pmu_device_attr = c_pmu_device_attr return pmu_device_attr +class DdrDataStructure(ctypes.Structure): + _fields_ = [ + ('channelId', ctypes.c_uint), + ('ddrNumaId', ctypes.c_uint), + ('socketId', ctypes.c_uint) + ] class CtypesPmuDeviceData(ctypes.Structure): """ @@ -479,6 +485,11 @@ class CtypesPmuDeviceData(ctypes.Structure): unsigned numaId; unsigned clusterId; char *bdf; + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; """ @@ -487,7 +498,8 @@ class CtypesPmuDeviceData(ctypes.Structure): ('coreId', ctypes.c_uint), ('numaId', ctypes.c_uint), ('clusterId', ctypes.c_uint), - ('bdf', ctypes.c_char_p) + ('bdf', ctypes.c_char_p), + ('_structure', DdrDataStructure) ] _fields_ = [ @@ -521,6 +533,23 @@ class CtypesPmuDeviceData(ctypes.Structure): return self._union.bdf.decode(UTF_8) return "" + @property + def channelId(self) -> int: + if self.mode == 5 and self._union._structure.channelId: # PMU_METRIC_CHANNEL + return self._union._structure.channelId + return 0 + + @property + def ddrNumaId(self) -> int: + if self.mode == 5 and self._union._structure.ddrNumaId: # PMU_METRIC_CHANNEL + return self._union._structure.ddrNumaId + return 0 + + @property + def socketId(self) -> int: + if self.mode == 5 and self._union._structure.socketId: # PMU_METRIC_CHANNEL + return self._union._structure.socketId + return 0 class ImplPmuDeviceData: __slots__ = ['__c_pmu_device_data'] @@ -574,6 +603,24 @@ class ImplPmuDeviceData: return self.c_pmu_device_data._union.bdf.decode(UTF_8) return "" + @property + def channelId(self) -> int: + if self.mode == 5 and self.c_pmu_device_data._union._structure.channelId: # PMU_METRIC_CHANNEL + return self.c_pmu_device_data._union._structure.channelId + return 0 + + @property + def ddrNumaId(self) -> int: + if self.mode == 5 and self.c_pmu_device_data._union._structure.ddrNumaId: # PMU_METRIC_CHANNEL + return self.c_pmu_device_data._union._structure.ddrNumaId + return 0 + + @property + def socketId(self) -> int: + if self.mode == 5 and self.c_pmu_device_data._union._structure.socketId: # PMU_METRIC_CHANNEL + return self.c_pmu_device_data._union._structure.socketId + return 0 + @classmethod def from_c_pmu_device_data(cls, c_pmu_device_data: CtypesPmuDeviceData) -> 'ImplPmuDeviceData': pmu_device_data = cls() @@ -1572,6 +1619,15 @@ def PmuRead(pd: int) -> PmuData: c_data_len = c_PmuRead(c_pd, ctypes.byref(c_data_pointer)) return PmuData(c_data_pointer, c_data_len) +def ResolvePmuDataSymbol(pmuData: ctypes.POINTER(CtypesPmuData)) -> int: + """ + int ResolvePmuDataSymbol(struct PmuData* pmuData); + """ + c_ResolvePmuDataSymbol = kperf_so.ResolvePmuDataSymbol + c_ResolvePmuDataSymbol.argtypes = [ctypes.POINTER(CtypesPmuData)] + c_ResolvePmuDataSymbol.restype = ctypes.c_int + + return c_ResolvePmuDataSymbol(pmuData) def PmuAppendData(fromData: ctypes.POINTER(CtypesPmuData), toData: ctypes.POINTER(ctypes.POINTER(CtypesPmuData))) -> int: @@ -1857,6 +1913,138 @@ def PmuSysCallFuncListFree() -> None: c_PmuSysCallFuncListFree() +class CtypesPmuCpuFreqDetail(ctypes.Structure): + """ + struct PmuCpuFreqDetail { + int cpuId; // cpu core id + uint64_t minFreq; // minimum frequency of core + uint64_t maxFreq; // maximum frequency of core + uint64_t avgFreq; // average frequency of core + } + """ + _fields_ = [ + ('cpuId', ctypes.c_int), + ('minFreq', ctypes.c_uint64), + ('maxFreq', ctypes.c_uint64), + ('avgFreq', ctypes.c_uint64), + ] + + def __init__(self, + cpuId: int = 0, + minFreq: int = 0, + maxFreq: int = 0, + avgFreq: int = 0, + *args:Any, **kw: Any) -> None: + super().__init__(*args, **kw) + self.cpuId = ctypes.c_int(cpuId) + self.minFreq = ctypes.c_uint64(minFreq) + self.maxFreq = ctypes.c_uint64(maxFreq) + self.avgFreq = ctypes.c_uint64(avgFreq) + + +class ImplPmuCpuFreqDetail: + __slots__ = ['__c_pmu_cpu_freq_detail'] + def __init__(self, + cpuId: int = 0, + minFreq: int = 0, + maxFreq: int = 0, + avgFreq: int = 0, + *args:Any, **kw: Any) -> None: + self.__c_pmu_cpu_freq_detail = CtypesPmuCpuFreqDetail( + cpuId=cpuId, + minFreq=minFreq, + maxFreq=maxFreq, + avgFreq=avgFreq + ) + + @property + def c_pmu_cpu_freq_detail(self) -> CtypesPmuCpuFreqDetail: + return self.__c_pmu_cpu_freq_detail + + @property + def cpuId(self) -> int: + return self.__c_pmu_cpu_freq_detail.cpuId + + @cpuId.setter + def cpuId(self, cpuId: int) -> None: + self.__c_pmu_cpu_freq_detail.cpuId = ctypes.c_int(cpuId) + + @property + def minFreq(self) -> int: + return self.__c_pmu_cpu_freq_detail.minFreq + + @minFreq.setter + def minFreq(self, minFreq: int) -> None: + self.__c_pmu_cpu_freq_detail.minFreq = ctypes.c_uint64(minFreq) + + @property + def maxFreq(self) -> int: + return self.__c_pmu_cpu_freq_detail.maxFreq + + @maxFreq.setter + def maxFreq(self, maxFreq: int) -> None: + self.__c_pmu_cpu_freq_detail.maxFreq = ctypes.c_uint64(maxFreq) + + @property + def avgFreq(self) -> int: + return self.__c_pmu_cpu_freq_detail.avgFreq + + @avgFreq.setter + def avgFreq(self, avgFreq: int) -> None: + self.__c_pmu_cpu_freq_detail.avgFreq = ctypes.c_uint64(avgFreq) + + @classmethod + def from_c_pmu_cpu_freq_detail(cls, c_pmu_cpu_freq_detail: CtypesPmuCpuFreqDetail) -> 'ImplPmuCpuFreqDetail': + freq_detail = cls() + freq_detail.__c_pmu_cpu_freq_detail = c_pmu_cpu_freq_detail + return freq_detail + + +class PmuCpuFreqDetail: + __slots__ = ['__pointer', '__iter', '__len'] + + def __init__(self, pointer: ctypes.POINTER(CtypesPmuCpuFreqDetail) = None, len: int = 0) -> None: + self.__pointer = pointer + self.__len = len + self.__iter = (ImplPmuCpuFreqDetail.from_c_pmu_cpu_freq_detail(self.__pointer[i]) for i in range(self.__len)) + + @property + def len(self) -> int: + return self.__len + + @property + def iter(self) -> Iterator[ImplPmuCpuFreqDetail]: + return self.__iter + + +def PmuReadCpuFreqDetail() -> PmuCpuFreqDetail: + """ + struct PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum); + """ + c_PmuGetCpuFreqDetail = kperf_so.PmuReadCpuFreqDetail + c_PmuGetCpuFreqDetail.argtypes = [] + c_PmuGetCpuFreqDetail.restype = ctypes.POINTER(CtypesPmuCpuFreqDetail) + c_cpu_len = ctypes.c_uint(0) + c_freq_detail_pointer = c_PmuGetCpuFreqDetail(ctypes.byref(c_cpu_len)) + + return PmuCpuFreqDetail(c_freq_detail_pointer, c_cpu_len.value) + +def PmuOpenCpuFreqSampling(period: int) -> None: + """ + int PmuOpenCpuFreqSampling(unsigned period); + """ + c_PmuOpenCpuFreqSampling = kperf_so.PmuOpenCpuFreqSampling + + c_period = ctypes.c_uint(period) + return c_PmuOpenCpuFreqSampling(c_period) + +def PmuCloseCpuFreqSampling() -> None: + """ + void PmuCloseCpuFreqSampling(); + """ + c_PmuCloseCpuFreqSampling = kperf_so.PmuCloseCpuFreqSampling + c_PmuCloseCpuFreqSampling() + __all__ = [ 'CtypesEvtAttr', @@ -1903,4 +2091,9 @@ __all__ = [ 'PmuTraceDataFree', 'PmuSysCallFuncList', 'PmuSysCallFuncListFree', + 'PmuOpenCpuFreqSampling', + 'PmuReadCpuFreqDetail', + 'PmuCloseCpuFreqSampling', + 'PmuCpuFreqDetail', + 'ResolvePmuDataSymbol' ] diff --git a/python/modules/_libkperf/Symbol.py b/python/modules/_libkperf/Symbol.py index 7b3242e257fdd0cea3038e078611b151b7e0b97e..705f1a317cf43e5c8cc61bc826651d81f1a2832c 100644 --- a/python/modules/_libkperf/Symbol.py +++ b/python/modules/_libkperf/Symbol.py @@ -645,7 +645,7 @@ def SymResolverRecordModuleNoDwarf(pid: int) -> None: c_SymResolverRecordModuleNoDwarf(c_pid) -def StackToHash(pid: int, stackList: List[int]) -> Iterator[Stack]: +def StackToHash(pid: int, stackList: List[int]) -> Stack: """ struct Stack* StackToHash(int pid, unsigned long* stack, int nr); """ @@ -659,10 +659,9 @@ def StackToHash(pid: int, stackList: List[int]) -> Iterator[Stack]: c_nr = ctypes.c_int(stack_len) c_stack = c_StackToHash(c_pid, c_stack_list, c_nr) - while c_stack: - stack = Stack.from_c_stack(c_stack) - yield stack - c_stack = c_stack.contents.next + if not c_stack: + return None + return Stack.from_c_stack(c_stack.contents) def SymResolverMapAddr(pid: int, addr: int) -> Symbol: @@ -677,8 +676,9 @@ def SymResolverMapAddr(pid: int, addr: int) -> Symbol: c_addr = ctypes.c_ulong(addr) c_sym = c_SymResolverMapAddr(c_pid, c_addr) - - return Symbol.from_c_sym(c_sym) + if not c_sym: + return None + return Symbol.from_c_sym(c_sym.contents) def FreeModuleData(pid: int) -> None: diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index bc8c8e24d26b4ae14cb0a39997d343a5712f8e23..20776de6a05a9301ac787d816e54c528b6bd9ac1 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -105,7 +105,10 @@ class Error: LIBPERF_ERR_CPUFREQ_NOT_CONFIG = 1062 LIBPERF_ERR_CLUSTER_ID_OVERSIZE = 1063 LIBPERF_ERR_INVALID_PMU_BDF_TYPE = 1064 - LIBPERF_ERR_NOT_SUPPORT_METRIC = 1065 + LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 =1065 + LIBPERF_ERR_NOT_SUPPORT_METRIC = 1066 + LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD = 1067 + LIBPERF_ERR_PMU_DATA_NO_FOUND = 1068 UNKNOWN_ERROR = 9999 diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 975b1221fdf8565c2c34b717a2f201a8728abe1f..2f0a2b5235d6e80cfbb70e0ff92b74a7fda94dd4 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -173,6 +173,7 @@ class PmuMetricMode: PMU_METRIC_NUMA = 2 PMU_METRIC_CLUSTER = 3 PMU_METRIC_BDF = 4 + PMU_METRIC_CHANNEL = 5 class ImplPmuDeviceData(_libkperf.ImplPmuDeviceData): pass @@ -193,6 +194,12 @@ class PmuDeviceData(_libkperf.PmuDeviceData): unsigned numaId; // for perpcie metric char *bdf; + // for perchannel metric of ddr + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; """ @@ -383,6 +390,14 @@ def read(pd: int) -> PmuData: """ return _libkperf.PmuRead(pd) +def resolvePmuDataSymbol(pmuData: PmuData) -> int: + """ + when kperf symbol mode is NO_SYMBOL_RESOLVE during PmuRead(), this function can be used to resolve stack symbols + :param: pmuData + :return: pmu data + """ + return _libkperf.ResolvePmuDataSymbol(pmuData.pointer()) + def stop(pd: int) -> None: """ @@ -526,6 +541,18 @@ def sys_call_func_list() -> Iterator[str]: """ return _libkperf.PmuSysCallFuncList() +class CpuFreqDetail(_libkperf.PmuCpuFreqDetail): + pass + +def open_cpu_freq_sampling(period: int) -> None: + return _libkperf.PmuOpenCpuFreqSampling(period) + +def close_cpu_freq_sampling() -> None: + return _libkperf.PmuCloseCpuFreqSampling() + +def read_cpu_freq_detail() -> CpuFreqDetail: + return _libkperf.PmuReadCpuFreqDetail() + __all__ = [ 'PmuTaskType', 'PmuEventType', @@ -572,4 +599,9 @@ __all__ = [ 'trace_close', 'sys_call_func_list', 'BranchSampleFilter', + 'CpuFreqDetail', + 'open_cpu_freq_sampling', + 'close_cpu_freq_sampling', + 'read_cpu_freq_detail', + 'resolvePmuDataSymbol' ] diff --git a/python/tests/test_metric.py b/python/tests/test_metric.py index 4a878aed0ac5166d60ab52e47a2f2d05a26b001a..bf653ce808d95364bc75264ed3ad0a2cda85915a 100644 --- a/python/tests/test_metric.py +++ b/python/tests/test_metric.py @@ -112,7 +112,8 @@ def test_get_numa_cores(): def test_collect_ddr_bandwidth(): dev_attr = [ - kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_READ_BW) + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_READ_BW), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW) ] pd = kperf.device_open(dev_attr) print(kperf.error()) @@ -125,9 +126,10 @@ def test_collect_ddr_bandwidth(): dev_data = None dev_data = kperf.get_device_metric(ori_data, dev_attr) - assert len(dev_data) == 4 - assert dev_data[0].numaId == 0 - assert dev_data[0].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + assert dev_data[0].count != 0 + assert dev_data[0].metric == kperf.PmuDeviceMetric.PMU_DDR_READ_BW + assert dev_data[0].mode == kperf.PmuMetricMode.PMU_METRIC_CHANNEL + assert dev_data[len(dev_data) - 1].metric == kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW print_dev_data_details(dev_data) kperf.close(pd) @@ -150,26 +152,6 @@ def test_collect_l3_latency(): print_dev_data_details(dev_data) kperf.close(pd) -def test_collect_l3_latency_and_ddr(): - dev_attr = [ - kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_L3_LAT), - kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW) - ] - pd = kperf.device_open(dev_attr) - print(kperf.error()) - assert pd != -1, f"Expected non-negative pd, but got {pd}" - kperf.enable(pd) - time.sleep(1) - kperf.disable(pd) - ori_data = kperf.read(pd) - assert len(ori_data) != -1, f"Expected non-negative ori_len, but got {len(ori_data)}" - - dev_data = kperf.get_device_metric(ori_data, dev_attr) - assert len(dev_data) == get_cluster_nums() + 4 - print_dev_data_details(dev_data) - kperf.close(pd) - - def test_collect_l3_traffic(): dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_L3_TRAFFIC) diff --git a/symbol/CMakeLists.txt b/symbol/CMakeLists.txt index 920d59e34034f97592b4676276cbf76ce099a689..aaa8988e1e967e5d72bd7c9f6f503bbbc975ab34 100644 --- a/symbol/CMakeLists.txt +++ b/symbol/CMakeLists.txt @@ -16,6 +16,9 @@ include_directories(${INCLUDE_DIR}) message(${THIRD_PARTY}/elfin-parser/elf) ADD_LIBRARY(sym SHARED ${SYMBOL_SRC}) +ADD_LIBRARY(sym_static STATIC ${SYMBOL_SRC}) +set_target_properties(sym_static PROPERTIES OUTPUT_NAME "sym") target_link_libraries(sym elf_static dwarf_static pthread) install(TARGETS sym DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) +install(TARGETS sym_static DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) install(FILES ${SYMBOL_FILE_DIR}/symbol.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include) diff --git a/symbol/symbol_resolve.cpp b/symbol/symbol_resolve.cpp index 46f6ad5c515400fa40297563e4a1b24f3a17db86..2f971f402e24961ef9b1fb126f6ca62621a239d3 100644 --- a/symbol/symbol_resolve.cpp +++ b/symbol/symbol_resolve.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include "name_resolve.h" #include "pcerr.h" #include "symbol_resolve.h" @@ -70,16 +71,15 @@ namespace { flag = false; } - static inline bool CheckIfFile(std::string mapline) + static inline bool CheckIfFile(const std::string& mapline) { - return (!((mapline.find(HUGEPAGE) != std::string::npos) || (mapline.find(DEV_ZERO) != std::string::npos) || - (mapline.find(ANON) != std::string::npos) || (mapline.find(STACK) != std::string::npos) || - (mapline.find(SOCKET) != std::string::npos) || (mapline.find(VSYSCALL) != std::string::npos) || - (mapline.find(HEAP) != std::string::npos) || (mapline.find(VDSO) != std::string::npos) || - (mapline.find(SYSV) != std::string::npos) || (mapline.find(VVAR) != std::string::npos)) && - (mapline.find(R_XP) != std::string::npos)) - ? true - : false; + const std::vector patterns = {HUGEPAGE, DEV_ZERO, ANON, STACK, SOCKET, VSYSCALL, HEAP ,VDSO, SYSV, VVAR}; + for (const auto& pattern :patterns) { + if (mapline.find(pattern) != std::string::npos) { + return false; + } + } + return mapline.find(R_XP) != std::string::npos; } static inline char* InitChar(int len) @@ -398,7 +398,7 @@ bool MyElf::IsExecFile() void MyElf::Emplace(unsigned long addr, const ELF_SYM& elfSym) { - this->symTab.insert({addr, elfSym}); + this->symTab.emplace(addr, elfSym); } ELF_SYM* MyElf::FindSymbol(unsigned long addr) @@ -548,15 +548,11 @@ int SymbolResolve::RecordModule(int pid, RecordModuleType recordModuleType) moduleSafeHandler.releaseLock(pid); return 0; } - char mapFile[MAP_LEN]; - if (snprintf(mapFile, MAP_LEN, "/proc/%d/maps", pid) < 0) { - moduleSafeHandler.releaseLock(pid); - return LIBSYM_ERR_SNPRINF_OPERATE_FAILED; - } + std::string mapFile = "/proc/" + std::to_string(pid) + "/maps"; std::ifstream file(mapFile); if (!file.is_open()) { pcerr::New(LIBSYM_ERR_OPEN_FILE_FAILED, - "libsym can't open file named " + std::string{mapFile} + " because of " + std::string{strerror(errno)}); + "libsym can't open file named " + mapFile + " because of " + std::string{strerror(errno)}); moduleSafeHandler.releaseLock(pid); return LIBSYM_ERR_OPEN_FILE_FAILED; } @@ -588,15 +584,11 @@ int SymbolResolve::UpdateModule(int pid, RecordModuleType recordModuleType) return SUCCESS; } // Get memory maps of pid. - char mapFile[MAP_LEN]; - if (snprintf(mapFile, MAP_LEN, "/proc/%d/maps", pid) < 0) { - moduleSafeHandler.releaseLock(pid); - return LIBSYM_ERR_SNPRINF_OPERATE_FAILED; - } + std::string mapFile = "/proc/" + std::to_string(pid) + "/maps"; std::ifstream file(mapFile); if (!file.is_open()) { pcerr::New(LIBSYM_ERR_OPEN_FILE_FAILED, - "libsym can't open file named " + std::string{mapFile} + " because of " + std::string{strerror(errno)}); + "libsym can't open file named " + mapFile + " because of " + std::string{strerror(errno)}); moduleSafeHandler.releaseLock(pid); return LIBSYM_ERR_OPEN_FILE_FAILED; } @@ -618,8 +610,8 @@ int SymbolResolve::UpdateModule(int pid, RecordModuleType recordModuleType) this->RecordDwarf(item->moduleName.c_str()); } } - for (auto mod : diffModVec) { - oldModVec.push_back(mod); + for (auto& mod : diffModVec) { + oldModVec.emplace_back(mod); } pcerr::New(SUCCESS); moduleSafeHandler.releaseLock(pid); @@ -1176,11 +1168,13 @@ std::vector> SymbolResolve::FindDiffMaps( const std::vector>& newMaps) const { std::vector> diffMaps; + std::set oldStarts; + for (const auto& oldMod : oldMaps) { + oldStarts.insert(oldMod->start); + } for (auto newMod : newMaps) { - for (auto oldMod : oldMaps) { - if (newMod->start != oldMod->start) { - diffMaps.push_back(newMod); - } + if (oldStarts.find(newMod->start) == oldStarts.end()) { + diffMaps.emplace_back(newMod); } } diff --git a/test/test_perf/test_api.cpp b/test/test_perf/test_api.cpp index 6ae2eeacbcec400d483dffab4e22236ce40b2f7c..edb431123cd77a6ae315e1768de39208fdb05b9f 100644 --- a/test/test_perf/test_api.cpp +++ b/test/test_perf/test_api.cpp @@ -17,6 +17,7 @@ #include "util_time.h" #include "process_map.h" #include "common.h" +#include "cpu_map.h" #include "test_common.h" using namespace std; @@ -670,3 +671,19 @@ TEST_F(TestAPI, TestBrBeBadMode) { ASSERT_EQ(pd, -1); ASSERT_EQ(Perrorno(), LIBPERF_ERR_BRANCH_JUST_SUPPORT_SAMPLING); } + +TEST_F(TestAPI, TestCpuFreqSampling) { + int ret = PmuOpenCpuFreqSampling(100); + ASSERT_NE(ret, -1); + PmuCloseCpuFreqSampling(); + + unsigned cpuNum = 0; + PmuCpuFreqDetail* pDetail1 = PmuReadCpuFreqDetail(&cpuNum); + ASSERT_EQ(cpuNum, MAX_CPU_NUM); + ret = PmuOpenCpuFreqSampling(100); + ASSERT_NE(ret, -1); + sleep(2); + PmuCpuFreqDetail* pDetail2 = PmuReadCpuFreqDetail(&cpuNum); + ASSERT_EQ(cpuNum, MAX_CPU_NUM); + PmuCloseCpuFreqSampling(); +} \ No newline at end of file diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index 56bee10674152030a55ab56c04f59fd6c1ce8aa1..d10ca39265bd6c9a9ffacb35661be5085b9c7d0f 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -104,9 +104,10 @@ TEST_F(TestMetric, GetNumaIdList) TEST_F(TestMetric, CollectDDRBandwidth) { - PmuDeviceAttr devAttr = {}; - devAttr.metric = PMU_DDR_READ_BW; - int pd = PmuDeviceOpen(&devAttr, 1); + PmuDeviceAttr devAttr[2] = {}; + devAttr[0].metric = PMU_DDR_READ_BW; + devAttr[1].metric = PMU_DDR_WRITE_BW; + int pd = PmuDeviceOpen(devAttr, 2); cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); @@ -117,16 +118,11 @@ TEST_F(TestMetric, CollectDDRBandwidth) ASSERT_NE(oriLen, -1); PmuDeviceData *devData = nullptr; - auto len = PmuGetDevMetric(oriData, oriLen, &devAttr, 1, &devData); - ASSERT_EQ(len, 4); - ASSERT_EQ(devData[0].numaId, 0); - ASSERT_EQ(devData[0].mode, PMU_METRIC_NUMA); - ASSERT_EQ(devData[1].numaId, 1); - ASSERT_EQ(devData[1].mode, PMU_METRIC_NUMA); - ASSERT_EQ(devData[2].numaId, 2); - ASSERT_EQ(devData[2].mode, PMU_METRIC_NUMA); - ASSERT_EQ(devData[3].numaId, 3); - ASSERT_EQ(devData[3].mode, PMU_METRIC_NUMA); + auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); + ASSERT_NE(devData[0].count, 0); + ASSERT_EQ(devData[0].mode, PMU_METRIC_CHANNEL); + ASSERT_EQ(devData[0].metric, PMU_DDR_READ_BW); + ASSERT_EQ(devData[len - 1].metric, PMU_DDR_WRITE_BW); DevDataFree(devData); PmuDataFree(oriData); PmuClose(pd); @@ -160,37 +156,6 @@ TEST_F(TestMetric, CollectL3Latency) PmuClose(pd); } -TEST_F(TestMetric, CollectL3LatencyAndDDR) -{ - PmuDeviceAttr devAttr[2] = {}; - devAttr[0].metric = PMU_L3_LAT; - devAttr[1].metric = PMU_DDR_WRITE_BW; - - int pd = PmuDeviceOpen(devAttr, 2); - cout << Perror() << endl; - ASSERT_NE(pd, -1); - PmuEnable(pd); - sleep(1); - PmuDisable(pd); - PmuData* oriData = nullptr; - int oriLen = PmuRead(pd, &oriData); - ASSERT_NE(oriLen, -1); - - PmuDeviceData *devData = nullptr; - auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); - unsigned clusterCount = GetClusterCount(); - unsigned numaCount = GetNumaNodeCount(); - ASSERT_EQ(len, clusterCount + numaCount); - ASSERT_NE(devData[0].count, 0); - ASSERT_EQ(devData[0].metric, PMU_L3_LAT); - ASSERT_EQ(devData[0].mode, PMU_METRIC_CLUSTER); - ASSERT_EQ(devData[clusterCount].metric, PMU_DDR_WRITE_BW); - ASSERT_EQ(devData[clusterCount].mode, PMU_METRIC_NUMA); - DevDataFree(devData); - PmuDataFree(oriData); - PmuClose(pd); -} - TEST_F(TestMetric, CollectL3Traffic) { PmuDeviceAttr devAttr = {}; diff --git a/util/common.cpp b/util/common.cpp index 647119ae264f1ce7f96e407444917c54cc5d204d..67c4a6669b0776c652ce1e8e422c0a5994991e79 100644 --- a/util/common.cpp +++ b/util/common.cpp @@ -155,4 +155,11 @@ std::string GetTraceEventDir() return TRACE_DEBUG_EVENT_PATH; } return ""; +} + +bool StartWith(const std::string& str, const std::string& prefix) { + if (str.size() < prefix.size()) { + return false; + } + return str.substr(0, prefix.size()) == prefix; } \ No newline at end of file diff --git a/util/common.h b/util/common.h index 4ffd0c8e4cef5f193a60733916afcae753a66175..77b8a9b6c33052f98b59d040ebbf9f10d7a4844b 100644 --- a/util/common.h +++ b/util/common.h @@ -18,6 +18,15 @@ #include #include #include +#include + +#ifdef __x86_64__ +#define IS_X86 1 +#elif defined(__aarch64__) +#define IS_ARM 1 +#else +#error "Only the x86_64 and aarch64 architecture are supported." +#endif const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/"; const std::string TRACE_DEBUG_EVENT_PATH = "/sys/kernel/debug/tracing/events/"; @@ -33,5 +42,6 @@ std::vector SplitStringByDelimiter(const std::string& str, char del int RaiseNumFd(uint64_t numFd); bool ExistPath(const std::string& filePath); std::string GetTraceEventDir(); +bool StartWith(const std::string& str, const std::string& prefix); #endif // LIBKPROF_COMMON_H diff --git a/util/cpu_map.cpp b/util/cpu_map.cpp index c8e0c9ea394b02a3bfd12e5e5899d471573acd47..4c1577d4a137f11c4e50aa417fb014cf7b692ddf 100644 --- a/util/cpu_map.cpp +++ b/util/cpu_map.cpp @@ -99,10 +99,14 @@ bool InitCpuType() CHIP_TYPE GetCpuType() { +#ifdef IS_X86 + return HIPX86; +#else if (g_chipType == UNDEFINED_TYPE && !InitCpuType()) { return UNDEFINED_TYPE; } return g_chipType; +#endif } set GetOnLineCpuIds() diff --git a/util/cpu_map.h b/util/cpu_map.h index 6279954ab53b6490920f76d241855152d9fdf490..df163206bcbf2e19bef2f3b4681bf71c53ee16a3 100644 --- a/util/cpu_map.h +++ b/util/cpu_map.h @@ -27,6 +27,7 @@ enum CHIP_TYPE { HIPC = 3, HIPF = 4, HIPE = 5, + HIPX86 = 6, }; struct CpuTopology* GetCpuTopology(int coreId); diff --git a/util/pcerr.cpp b/util/pcerr.cpp index ab1c083ac52a3e57eeaa125469507151680165d4..3efc851a23999d170b0caf7f639d5b6c8281cdaa 100644 --- a/util/pcerr.cpp +++ b/util/pcerr.cpp @@ -14,6 +14,7 @@ ******************************************************************************/ #include #include +#include #include "pcerrc.h" #include "pcerr.h" @@ -53,6 +54,7 @@ namespace pcerr { {LIBPERF_ERR_BRANCH_JUST_SUPPORT_SAMPLING, "branch filter just support sampling mode"}, {LIBPERF_ERR_RESET_FD, "failed to reset fd output"}, {LIBPERF_ERR_SET_FD_RDONLY_NONBLOCK, "failed to set fd readonly and nonbolock"}, + {LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86, "the current interface does not support x86"}, }; static std::unordered_map warnMsgs = { {LIBPERF_WARN_CTXID_LOST, "Some SPE context packets are not found in the traces."}, @@ -63,6 +65,8 @@ namespace pcerr { static std::string warnMsg = ""; static int errCode = SUCCESS; static std::string errMsg = ""; + static std::mutex errMutex; + static std::mutex warnMutex; static std::string GetCustomMsg(int code) { std::string msg; @@ -91,6 +95,7 @@ namespace pcerr { void New(int code, const std::string& msg) { + std::lock_guard lock(errMutex); errCode = code; errMsg = msg; } @@ -107,6 +112,7 @@ namespace pcerr { void SetWarn(int code, const std::string& msg) { + std::lock_guard lock(warnMutex); warnCode = code; warnMsg = msg; }