From 3fc86f4687d0d543d564cfd418ec631765cfbc70 Mon Sep 17 00:00:00 2001 From: glx Date: Tue, 17 Jun 2025 19:54:42 +0800 Subject: [PATCH] Add groupId in PmuData MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PmuData增加groupId字段,groupId与EvtAttr.groupId对应,表示PmuData属于哪个事件分组; 修正perf_counter的enable、disable和reset,只有group leader才能调用这些接口,leader调用后所有分组成员也会同时起作用。如果每个成员事件都调用这些接口,那么每个成员事件采集的时长会不一致。 --- docs/Details_Usage.md | 89 ++++++++++++++++++++++----------- go/src/libkperf/kperf/kperf.go | 2 + include/pmu.h | 3 +- pmu/evt_list.cpp | 1 + pmu/evt_list.h | 8 +-- pmu/perf_counter.cpp | 22 ++++++++ pmu/perf_counter.h | 3 ++ pmu/pmu.cpp | 14 +++--- pmu/pmu_list.cpp | 2 +- pmu/pmu_list.h | 2 +- python/modules/_libkperf/Pmu.py | 38 +++++++++----- 11 files changed, 128 insertions(+), 56 deletions(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 68c5f2c..4cb31e3 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -657,56 +657,60 @@ libkperf提供了事件分组的能力,能够让多个事件同时处于采集 perf stat -e "{cycles,branch-loads,branch-load-misses,iTLB-loads}",inst_retired ``` -对于libkperf,可以通过设置PmuAttr的evtAttr字段来设定哪些事件放在一个group内。 -比如,可以这样调用: +如果对多个相关联的事件采集,可以把关联的事件放到一个事件组。比如,计算bad speculation需要用到事件inst_retired,inst_spec和cycles,计算retiring需要用到事件inst_retired和cycles。那么perf应该这样使用: +``` +perf stat -e "{inst_retired,inst_spec,cycles}","{inst_spec,cycles}" +``` +用libkperf可以这样实现: ```c++ -// c++代码示例 -#include -#include "symbol.h" -#include "pmu.h" -#include "pcerrc.h" - -unsigned numEvt = 5; -char *evtList[numEvt] = {"cycles","branch-loads","branch-load-misses","iTLB-loads","inst_retired"}; -// 前四个事件是一个分组 -struct EvtAttr groupId[numEvt] = {1,1,1,1,-1}; +// 指定5个事件,因为inst_retired和cycles会重复出现在多个指标中,所以需要重复指定事件。 +char *evtList[5] = {"inst_retired", "inst_spec", "cycles", "inst_retired", "cycles"}; +// 指定事件分组编号,前三个事件为一组,后两个事件为一组。 +EvtAttr groupId[5] = {1,1,1,2,2}; PmuAttr attr = {0}; attr.evtList = evtList; -attr.numEvt = numEvt; +attr.numEvt = 5; attr.evtAttr = groupId; - int pd = PmuOpen(COUNTING, &attr); -if ( pd == -1) { - printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); -} PmuEnable(pd); sleep(1); PmuDisable(pd); -PmuData* data = nullptr; +PmuData *data = nullptr; int len = PmuRead(pd, &data); -for (int i = 0; i < len; i++) { - printf("evt=%s, count=%d evt=%d\n", data[i].evt, data[i].count, data[i].evt); +// 根据分组来聚合数据 +map> evtMap; +for (int i=0;ipmuEvt->name.c_str(); + data[i].groupId = this->groupId; if (data[i].comm == nullptr) { data[i].comm = procTopo->comm; } diff --git a/pmu/evt_list.h b/pmu/evt_list.h index 109dba5..6764d4d 100644 --- a/pmu/evt_list.h +++ b/pmu/evt_list.h @@ -54,8 +54,8 @@ public: using ProcPtr = std::shared_ptr; using CpuPtr = std::shared_ptr; EvtList(const SymbolMode &symbolMode, std::vector &cpuList, std::vector &pidList, - std::shared_ptr pmuEvt, const int group_id) - : symMode(symbolMode), cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt), group_id(group_id) + std::shared_ptr pmuEvt, const int groupId) + : symMode(symbolMode), cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt), groupId(groupId) { this->numCpu = this->cpuList.size(); this->numPid = this->pidList.size(); @@ -99,7 +99,7 @@ public: int GetGroupId() const { - return group_id; + return groupId; } int GetBlockedSample() const @@ -121,7 +121,7 @@ private: std::vector pidList; std::vector unUsedPidList; std::shared_ptr pmuEvt; - int group_id; // event group id + int groupId; // event group id std::vector>> xyCounterArray; std::shared_ptr MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent); unsigned int numCpu = 0; diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter.cpp index bf801b0..7a212fe 100644 --- a/pmu/perf_counter.cpp +++ b/pmu/perf_counter.cpp @@ -137,6 +137,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } } + this->groupFd = groupFd; DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd); if (__glibc_unlikely(this->fd < 0)) { @@ -150,6 +151,11 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou */ int KUNPENG_PMU::PerfCounter::Enable() { + if (groupFd != -1) { + // Only group leader should use ioctl to enable, disable or reset, + // otherwise each event in the group will be collected for different durations. + return SUCCESS; + } int err = PerfEvt::Enable(); if (err != SUCCESS) { return err; @@ -159,3 +165,19 @@ int KUNPENG_PMU::PerfCounter::Enable() this->running = 0; return SUCCESS; } + +int KUNPENG_PMU::PerfCounter::Disable() +{ + if (groupFd != -1) { + return SUCCESS; + } + return PerfEvt::Disable(); +} + +int KUNPENG_PMU::PerfCounter::Reset() +{ + if (groupFd != -1) { + return SUCCESS; + } + return PerfEvt::Reset(); +} \ No newline at end of file diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h index 31280c7..585bf58 100644 --- a/pmu/perf_counter.h +++ b/pmu/perf_counter.h @@ -40,12 +40,15 @@ namespace KUNPENG_PMU { std::vector &extPool, std::vector &swtichData) override; int MapPerfAttr(const bool groupEnable, const int groupFd) override; int Enable() override; + int Disable() override; + int Reset() override; private: // Accumulated pmu count, time enabled and time running. __u64 count = 0; __u64 enabled = 0; __u64 running = 0; + int groupFd = 0; }; } // namespace KUNPENG_PMU #endif diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 4ffd5d7..30de9ef 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -270,12 +270,12 @@ static void CopyAttrData(PmuAttr* newAttr, PmuAttr* inputAttr, enum PmuTaskType newAttr->evtList = newEvtList; newAttr->numEvt = inputAttr->numEvt; - // If the event group ID is not enabled, set the group_id to -1. It indicates that the event is not grouped. + // If the event group ID is not enabled, set the groupId to -1. It indicates that the event is not grouped. if ((collectType == SAMPLING || collectType == COUNTING) && inputAttr->evtAttr == nullptr) { struct EvtAttr *evtAttr = new struct EvtAttr[newAttr->numEvt]; // handle event group id. -1 means that it doesn't run event group feature. for (int i = 0; i < newAttr->numEvt; ++i) { - evtAttr[i].group_id = -1; + evtAttr[i].groupId = -1; } newAttr->evtAttr = evtAttr; } @@ -289,13 +289,13 @@ static bool FreeEvtAttr(struct PmuAttr *attr) bool flag = false; int notGroupId = -1; for (int i = 0; i < attr->numEvt; ++i) { - if (attr->evtAttr[i].group_id != notGroupId ) { + if (attr->evtAttr[i].groupId != notGroupId ) { flag = true; break; } } - // when the values of group_id are all -1, the applied memory is released. + // when the values of groupId are all -1, the applied memory is released. if (!flag) { delete[] attr->evtAttr; attr->evtAttr = nullptr; @@ -831,7 +831,7 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt } } -static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int group_id) +static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int groupId) { unique_ptr taskParam(CreateNode(), PmuTaskAttrFree); /** @@ -868,7 +868,7 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att */ PrepareCpuList(attr, taskParam.get(), pmuEvt); - taskParam->group_id = group_id; + taskParam->groupId = groupId; taskParam->pmuEvt = shared_ptr(pmuEvt, PmuEvtFree); taskParam->pmuEvt->useFreq = attr->useFreq; @@ -890,7 +890,7 @@ struct PmuTaskAttr* AssignPmuTaskParam(enum PmuTaskType collectType, struct PmuA return taskParam; } for (int i = 0; i < attr->numEvt; i++) { - struct PmuTaskAttr* current = AssignTaskParam(collectType, attr, attr->evtList[i], attr->evtAttr[i].group_id); + struct PmuTaskAttr* current = AssignTaskParam(collectType, attr, attr->evtList[i], attr->evtAttr[i].groupId); if (current == nullptr) { return nullptr; } diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 8ea83ac..96ea5df 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -150,7 +150,7 @@ namespace KUNPENG_PMU { } fdNum += CalRequireFd(cpuTopoList.size(), procTopoList.size(), taskParam->pmuEvt->collectType); std::shared_ptr evtList = - std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->group_id); + std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId); needBytesNum += PredictRequiredMemory(taskParam->pmuEvt->collectType, cpuTopoList.size(), procTopoList.size()); evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd)); InsertEvtList(pd, evtList); diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h index 523e2ad..99539d2 100644 --- a/pmu/pmu_list.h +++ b/pmu/pmu_list.h @@ -34,7 +34,7 @@ struct PmuTaskAttr { // list length has to be as the same as numPid std::shared_ptr pmuEvt; // which pmu to be collected - int group_id; // event group id + int groupId; // event group id struct PmuTaskAttr* next; // next task attribute }; diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index 2cffe19..2255a16 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -28,32 +28,32 @@ class SampleRateUnion(ctypes.Union): class CtypesEvtAttr(ctypes.Structure): """ struct EvtAttr { - int group_id; + int groupId; }; """ - _fields_ = [('group_id', ctypes.c_int)] + _fields_ = [('groupId', ctypes.c_int)] - def __init__(self, group_id: int=0, *args: Any, **kw: Any) -> None: + def __init__(self, groupId: int=0, *args: Any, **kw: Any) -> None: super().__init__(*args, **kw) - self.group_id = ctypes.c_int(group_id) + self.groupId = ctypes.c_int(groupId) class EvtAttr: __slots__ = ['__c_evt_attr'] - def __init__(self, group_id: int=0) -> None: - self.__c_evt_attr = CtypesEvtAttr(group_id) + def __init__(self, groupId: int=0) -> None: + self.__c_evt_attr = CtypesEvtAttr(groupId) @property def c_evt_attr(self) -> CtypesEvtAttr: return self.__c_evt_attr @property - def group_id(self) -> int: - return int(self.c_evt_attr.group_id) + def groupId(self) -> int: + return int(self.c_evt_attr.groupId) - @group_id.setter - def group_id(self, group_id: int) -> None: - self.c_evt_attr.group_id = ctypes.c_int(group_id) + @groupId.setter + def groupId(self, groupId: int) -> None: + self.c_evt_attr.groupId = ctypes.c_int(groupId) @classmethod def from_c_evt_attr(cls, c_evt_attr: CtypesEvtAttr) -> 'EvtAttr': @@ -1115,7 +1115,8 @@ class CtypesPmuData(ctypes.Structure): int64_t ts; // time stamp. unit: ns pid_t pid; // process id int tid; // thread id - unsigned cpu; // cpu id + int cpu; // cpu id + int groupId; // id for group event struct CpuTopology *cpuTopo; // cpu topology const char *comm; // process command uint64_t period; // number of Samples @@ -1132,6 +1133,7 @@ class CtypesPmuData(ctypes.Structure): ('pid', ctypes.c_int), ('tid', ctypes.c_int), ('cpu', ctypes.c_int), + ('groupId', ctypes.c_int), ('cpuTopo', ctypes.POINTER(CtypesCpuTopology)), ('comm', ctypes.c_char_p), ('period', ctypes.c_uint64), @@ -1148,6 +1150,7 @@ class CtypesPmuData(ctypes.Structure): pid: int=0, tid: int=0, cpu: int=0, + groupId: int=0, cpuTopo: CtypesCpuTopology=None, comm: str='', period: int=0, @@ -1164,6 +1167,7 @@ class CtypesPmuData(ctypes.Structure): self.pid = ctypes.c_int(pid) self.tid = ctypes.c_int(tid) self.cpu = ctypes.c_int(cpu) + self.groupId = ctypes.c_int(groupId) self.cpuTopo = cpuTopo self.comm = ctypes.c_char_p(comm.encode(UTF_8)) self.period = ctypes.c_uint64(period) @@ -1183,6 +1187,7 @@ class ImplPmuData: pid: int=0, tid: int=0, cpu: int=0, + groupId: int=0, cpuTopo: CpuTopology=None, comm: str='', period: int=0, @@ -1197,6 +1202,7 @@ class ImplPmuData: pid=pid, tid=tid, cpu=cpu, + groupId=groupId, cpuTopo=cpuTopo.c_cpu_topo if cpuTopo else None, comm=comm, period=period, @@ -1258,6 +1264,14 @@ class ImplPmuData: def cpu(self, cpu: int) -> None: self.c_pmu_data.cpu = ctypes.c_int(cpu) + @property + def groupId(self) -> int: + return self.c_pmu_data.groupId + + @groupId.setter + def groupId(self, groupId: int) -> None: + self.c_pmu_data.groupId = ctypes.c_int(groupId) + @property def cpuTopo(self) -> CpuTopology: return CpuTopology.from_c_cpu_topo(self.c_pmu_data.cpuTopo.contents) if self.c_pmu_data.cpuTopo else None -- Gitee