From 47c735ede45a4a89d62d5eaa6af8361f9f9872f2 Mon Sep 17 00:00:00 2001 From: glx Date: Thu, 19 Jun 2025 20:00:12 +0800 Subject: [PATCH] =?UTF-8?q?Implement=20group=20read=20for=20group=20events?= =?UTF-8?q?=20=E5=AE=9E=E7=8E=B0group=20read=EF=BC=9A=20=E5=BD=93=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E4=BA=8B=E4=BB=B6=E5=88=86=E7=BB=84=E7=9A=84=E6=97=B6?= =?UTF-8?q?=E5=80=99=EF=BC=8C=E5=8F=AA=E6=9C=89group=20leader=E9=9C=80?= =?UTF-8?q?=E8=A6=81read=E8=AE=A1=E6=95=B0=EF=BC=8C=E5=85=B6=E4=BD=99?= =?UTF-8?q?=E7=9A=84group=20members=E6=97=A0=E9=9C=80read=E3=80=82=20?= =?UTF-8?q?=E8=BF=99=E6=A0=B7=E5=87=8F=E5=B0=91=E4=BA=86read=E7=9A=84?= =?UTF-8?q?=E6=AC=A1=E6=95=B0=EF=BC=8C=E6=8F=90=E5=8D=87=E4=BA=86=E6=80=A7?= =?UTF-8?q?=E8=83=BD=EF=BC=9B=E8=80=8C=E4=B8=94=E8=83=BD=E4=BF=9D=E8=AF=81?= =?UTF-8?q?group=E7=9A=84=E6=89=80=E6=9C=89=E4=BA=8B=E4=BB=B6=E9=83=BD?= =?UTF-8?q?=E6=98=AF=E5=9C=A8=E5=90=8C=E6=97=B6=E8=A2=AB=E8=AF=BB=E5=8F=96?= =?UTF-8?q?=EF=BC=8C=E6=89=80=E6=9C=89=E4=BA=8B=E4=BB=B6=E7=9A=84=E9=87=87?= =?UTF-8?q?=E9=9B=86=E6=97=B6=E9=95=BF=E8=83=BD=E5=AE=8C=E5=85=A8=E4=B8=80?= =?UTF-8?q?=E8=87=B4=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 实现方法: - 在Open的时候,对于group events,read_format加上PERF_FORMAT_GROUP。 - 在Read的时候,对于group events,读取所有member的计数,并且把每个计数和事件名词对应起来。 --- pmu/evt_list.cpp | 21 ++++- pmu/evt_list.h | 5 + pmu/perf_counter.cpp | 124 +++++++++++++++++++++---- pmu/perf_counter.h | 25 ++++- pmu/pmu_list.cpp | 3 +- pmu/pmu_metric.cpp | 6 +- test/test_perf/test_metric.cpp | 28 ++---- test/test_perf/test_trace_analysis.cpp | 7 +- test/test_perf/test_trace_pointer.cpp | 3 - 9 files changed, 162 insertions(+), 60 deletions(-) diff --git a/pmu/evt_list.cpp b/pmu/evt_list.cpp index d12f1df..29bcf1d 100644 --- a/pmu/evt_list.cpp +++ b/pmu/evt_list.cpp @@ -91,7 +91,9 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrSetBranchSampleFilter(branchSampleFilter); int err = 0; if (groupEnable) { - err = perfEvt->Init(groupEnable, evtLeader->xyCounterArray[row][col]->GetFd(), resetOutPutFd); + // If evtLeader is nullptr, I am the leader. + auto groupFd = evtLeader?evtLeader->xyCounterArray[row][col]->GetFd():-1; + err = perfEvt->Init(groupEnable, groupFd, resetOutPutFd); } else { err = perfEvt->Init(groupEnable, -1, resetOutPutFd); } @@ -168,7 +170,14 @@ void KUNPENG_PMU::EvtList::FillFields( { for (auto i = start; i < end; ++i) { data[i].cpuTopo = cpuTopo; - data[i].evt = this->pmuEvt->name.c_str(); + if (groupInfo && pmuEvt->collectType == COUNTING && i - start > 0) { + // For group events, PmuData are all read by event leader, + // and then some PmuData elements should be related to group members. + data[i].evt = groupInfo->evtGroupChildList[i-start-1]->pmuEvt->name.c_str(); + } else { + // For no group events or group leader. + data[i].evt = this->pmuEvt->name.c_str(); + } data[i].groupId = this->groupId; if (data[i].comm == nullptr) { data[i].comm = procTopo->comm; @@ -269,7 +278,8 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons int err = 0; if (groupEnable) { int sz = this->pidList.size(); - err = perfEvt->Init(groupEnable, evtLeader->xyCounterArray[row][sz - 1]->GetFd(), -1); + auto groupFd = evtLeader?evtLeader->xyCounterArray[row][sz - 1]->GetFd():-1; + err = perfEvt->Init(groupEnable, groupFd, -1); } else { err = perfEvt->Init(groupEnable, -1, -1); } @@ -348,4 +358,9 @@ void KUNPENG_PMU::EvtList::ClearExitFd() procMap.erase(exitPid); numPid--; } +} + +void KUNPENG_PMU::EvtList::SetGroupInfo(const EventGroupInfo &grpInfo) +{ + this->groupInfo = unique_ptr(new EventGroupInfo(grpInfo)); } \ No newline at end of file diff --git a/pmu/evt_list.h b/pmu/evt_list.h index 6764d4d..d064143 100644 --- a/pmu/evt_list.h +++ b/pmu/evt_list.h @@ -49,6 +49,7 @@ enum class UncoreState { OnlyOther = 0b01, }; +struct EventGroupInfo; class EvtList { public: using ProcPtr = std::shared_ptr; @@ -72,6 +73,8 @@ public: int Read(std::vector& pmuData, std::vector& sampleIps, std::vector& extPool, std::vector& switchData); + void SetGroupInfo(const EventGroupInfo &grpInfo); + void SetTimeStamp(const int64_t& timestamp) { this->ts = timestamp; @@ -134,6 +137,8 @@ private: int prevStat; int evtStat; std::mutex mutex; + // Fixme: decouple group event with normal event, use different classes to implement Read and Init. + std::unique_ptr groupInfo = nullptr; }; struct EventGroupInfo { diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter.cpp index 7a212fe..9b2db6d 100644 --- a/pmu/perf_counter.cpp +++ b/pmu/perf_counter.cpp @@ -30,8 +30,20 @@ #include "perf_counter.h" using namespace std; +using namespace pcerr; static constexpr int MAX_ATTR_SIZE = 120; + +struct GroupReadFormat { + __u64 nr; + __u64 timeEnabled; + __u64 timeRunning; + struct { + __u64 value; + __u64 id; + } values[]; +}; + /** * Read pmu counter and deal with pmu multiplexing * Right now we do not implement grouping logic, thus we ignore the @@ -40,18 +52,90 @@ static constexpr int MAX_ATTR_SIZE = 120; int KUNPENG_PMU::PerfCounter::Read(vector &data, std::vector &sampleIps, std::vector &extPool, std::vector &swtichData) { - struct ReadFormat perfCountValue; - - /** - * If some how the file descriptor is less than 0, - * we make the count to be 0 and return - */ if (__glibc_unlikely(this->fd < 0)) { - this->count = 0; + this->accumCount.clear(); + return UNKNOWN_ERROR; + } + + if (groupStatus == GroupStatus::NO_GROUP) { + return ReadSingleEvent(data); + } else if (groupStatus == GroupStatus::GROUP_LEADER) { + return ReadGroupEvents(data); + } + + // Group members do not need to read counters, + // Group leader will read them all. + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector &data) +{ + ReadFormat perfCountValue; + int len = read(this->fd, &perfCountValue, sizeof(perfCountValue)); + if (len < 0) { + New(UNKNOWN_ERROR, strerror(errno)); + return UNKNOWN_ERROR; + } + if (accumCount.empty()) { + accumCount.assign(1, 0); + } + + int err = CountValueToData(perfCountValue.value, perfCountValue.timeEnabled, + perfCountValue.timeRunning, accumCount[0], data); + if (err != SUCCESS) { + return err; + } + + this->enabled = perfCountValue.timeEnabled; + this->running = perfCountValue.timeRunning; + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector &data) +{ + // Fixme: + // In current class, we do not know how many events in group. + // Then we read for max struct size: nr+timeEnabled+timeRunning+ MAX_GROUP_EVENTS*(value+id) + static const unsigned MAX_GROUP_EVENTS = 14; + unsigned readSize = sizeof(__u64)*3 + sizeof(__u64)*2*MAX_GROUP_EVENTS; + GroupReadFormat *perfCountValue = static_cast(malloc(readSize)); + if (perfCountValue == NULL) { + return COMMON_ERR_NOMEM; + } + int len = read(this->fd, perfCountValue, readSize); + if (len < 0) { + free(perfCountValue); + New(UNKNOWN_ERROR, strerror(errno)); return UNKNOWN_ERROR; } - read(this->fd, &perfCountValue, sizeof(perfCountValue)); - if (perfCountValue.value < count || perfCountValue.timeEnabled < enabled || perfCountValue.timeRunning < running) { + + if (accumCount.empty()) { + accumCount.assign(perfCountValue->nr, 0); + } + + for (int i = 0;i < accumCount.size(); ++i) { + auto err = CountValueToData(perfCountValue->values[i].value, + perfCountValue->timeEnabled, + perfCountValue->timeRunning, + accumCount[i], + data + ); + if (err != SUCCESS) { + free(perfCountValue); + return err; + } + } + + this->enabled = perfCountValue->timeEnabled; + this->running = perfCountValue->timeRunning; + free(perfCountValue); + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 timeEnabled, + const __u64 timeRunning, __u64 &accumCount, vector &data) +{ + if (value < accumCount || timeEnabled < enabled || timeRunning < running) { return LIBPERF_ERR_COUNT_OVERFLOW; } @@ -60,17 +144,14 @@ int KUNPENG_PMU::PerfCounter::Read(vector &data, std::vector(perfCountValue.timeEnabled - enabled) / static_cast(perfCountValue.timeRunning - running); - increCount = static_cast((perfCountValue.value - count)* percent); + percent = static_cast(timeEnabled - enabled) / static_cast(timeRunning - running); + increCount = static_cast((value - accumCount)* percent); } - - this->count = perfCountValue.value; - this->enabled = perfCountValue.timeEnabled; - this->running = perfCountValue.timeRunning; + accumCount = value; data.emplace_back(PmuData{0}); auto& current = data.back(); @@ -123,7 +204,13 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou * and any child events are initialized with disabled bit set to 0. Despite disabled bit being set to 0, * the child events will not start counting until the group leader is enabled. */ - attr.disabled = 0; + if (groupFd != -1) { + attr.disabled = 0; + groupStatus = GroupStatus::GROUP_MEMBER; + } else { + groupStatus = GroupStatus::GROUP_LEADER; + } + attr.read_format |= PERF_FORMAT_GROUP; this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } else { #ifdef IS_X86 @@ -136,6 +223,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou } else { this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } + groupStatus = GroupStatus::NO_GROUP; } this->groupFd = groupFd; DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", @@ -160,7 +248,7 @@ int KUNPENG_PMU::PerfCounter::Enable() if (err != SUCCESS) { return err; } - this->count = 0; + this->accumCount.clear(); this->enabled = 0; this->running = 0; return SUCCESS; diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h index 585bf58..8937bdb 100644 --- a/pmu/perf_counter.h +++ b/pmu/perf_counter.h @@ -44,11 +44,26 @@ namespace KUNPENG_PMU { int Reset() override; private: - // Accumulated pmu count, time enabled and time running. - __u64 count = 0; - __u64 enabled = 0; - __u64 running = 0; - int groupFd = 0; + enum class GroupStatus + { + NO_GROUP, + GROUP_LEADER, + GROUP_MEMBER + }; + + int CountValueToData(const __u64 value, const __u64 timeEnabled, + const __u64 timeRunning, __u64 &accumCount, std::vector &data); + int ReadSingleEvent(std::vector &data); + int ReadGroupEvents(std::vector &data); + + // Accumulated pmu count, time enabled and time running. + __u64 enabled = 0; + __u64 running = 0; + // For group events, is the accum counts of all members. + // For normal events, has only one element. + std::vector<__u64> accumCount; + int groupFd = 0; + GroupStatus groupStatus = GroupStatus::NO_GROUP; }; } // namespace KUNPENG_PMU #endif diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 96ea5df..9e8feb0 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -199,7 +199,7 @@ namespace KUNPENG_PMU { continue; } if (eventGroupInfoMap.find(evtList->GetGroupId()) == eventGroupInfoMap.end()) { - auto err = EvtInit(false, nullptr, pd, evtList, isMemoryEnough); + auto err = EvtInit(true, nullptr, pd, evtList, isMemoryEnough); if (err != SUCCESS) { return err; } @@ -233,6 +233,7 @@ namespace KUNPENG_PMU { return err; } } + evtGroup.second.evtLeader->SetGroupInfo(evtGroup.second); } groupMapPtr eventDataEvtGroup = std::make_shared>(eventGroupInfoMap); InsertDataEvtGroupList(pd, eventDataEvtGroup); diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 1fe04d7..56cf758 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -1722,21 +1722,21 @@ int PmuGetNumaCore(unsigned nodeId, unsigned **coreList) ifstream in(nodeListFile); if (!in.is_open()) { New(LIBPERF_ERR_KERNEL_NOT_SUPPORT); - return LIBPERF_ERR_KERNEL_NOT_SUPPORT; + return -1; } std::string cpulist; in >> cpulist; auto split = SplitStringByDelimiter(cpulist, '-'); if (split.size() != 2) { New(LIBPERF_ERR_KERNEL_NOT_SUPPORT); - return LIBPERF_ERR_KERNEL_NOT_SUPPORT; + return -1; } auto start = stoi(split[0]); auto end = stoi(split[1]); int coreNums = end - start + 1; if (coreNums <= 0) { New(LIBPERF_ERR_KERNEL_NOT_SUPPORT); - return LIBPERF_ERR_KERNEL_NOT_SUPPORT; + return -1; } InitializeCoreArray(); *coreList = &coreArray[start]; diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index 68710cb..d6b5e10 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -37,7 +37,7 @@ TEST_F(TestMetric, GetInvalidBdfList) enum PmuBdfType bdfType = (enum PmuBdfType)5; unsigned bdfLen = 0; const char** bdfList = PmuDeviceBdfList(bdfType, &bdfLen); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_EQ(bdfList, nullptr); } @@ -46,7 +46,7 @@ TEST_F(TestMetric, GetPcieBdfList) enum PmuBdfType bdfType = PMU_BDF_TYPE_PCIE; unsigned bdfLen = 0; const char** bdfList = PmuDeviceBdfList(bdfType, &bdfLen); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(bdfList, nullptr); } @@ -55,7 +55,7 @@ TEST_F(TestMetric, GetSmmuBdfList) enum PmuBdfType bdfType = PMU_BDF_TYPE_SMMU; unsigned bdfLen = 0; const char** bdfList = PmuDeviceBdfList(bdfType, &bdfLen); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(bdfList, nullptr); } @@ -63,7 +63,7 @@ TEST_F(TestMetric, GetCpuFreq) { unsigned core = 6; int64_t cpu6Freq = PmuGetCpuFreq(core); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(cpu6Freq, -1); } @@ -72,12 +72,8 @@ TEST_F(TestMetric, GetClusterIdListSuccess) unsigned clusterId = 3; unsigned* coreList = nullptr; int len = PmuGetClusterCore(clusterId, &coreList); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(len, -1); - for (int i = 0; i < len; ++i) { - cout << coreList[i] << " "; - } - cout << endl; } TEST_F(TestMetric, GetClusterIdListOverSize) @@ -85,7 +81,7 @@ TEST_F(TestMetric, GetClusterIdListOverSize) unsigned clusterId = 33; unsigned* coreList = nullptr; int len = PmuGetClusterCore(clusterId, &coreList); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_EQ(len, -1); } @@ -94,12 +90,8 @@ TEST_F(TestMetric, GetNumaIdList) unsigned numaId = 2; unsigned* coreList = nullptr; int len = PmuGetNumaCore(numaId, &coreList); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(len, -1); - for (int i = 0; i < len; ++i) { - cout << coreList[i] << " "; - } - cout << endl; } TEST_F(TestMetric, CollectDDRBandwidth) @@ -108,7 +100,6 @@ TEST_F(TestMetric, CollectDDRBandwidth) devAttr[0].metric = PMU_DDR_READ_BW; devAttr[1].metric = PMU_DDR_WRITE_BW; int pd = PmuDeviceOpen(devAttr, 2); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -133,7 +124,6 @@ TEST_F(TestMetric, CollectL3Latency) PmuDeviceAttr devAttr = {}; devAttr.metric = PMU_L3_LAT; int pd = PmuDeviceOpen(&devAttr, 1); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -212,7 +202,6 @@ TEST_F(TestMetric, CollectL3LatencyAndL3Miss) devAttr[1].metric = PMU_L3_MISS; int pd = PmuDeviceOpen(devAttr, 2); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -248,7 +237,6 @@ TEST_F(TestMetric, GetMetricPcieBandwidth) } int pd = PmuDeviceOpen(devAttr, bdfLen); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -276,7 +264,6 @@ TEST_F(TestMetric, GetMetricSmmuTransaction) const char** bdfList = nullptr; unsigned bdfLen = 0; bdfList = PmuDeviceBdfList(PMU_BDF_TYPE_SMMU, &bdfLen); - cout << Perror() << endl; ASSERT_NE(bdfList, nullptr); PmuDeviceAttr devAttr[bdfLen] = {}; for (int i = 0; i < bdfLen; ++i) { @@ -285,7 +272,6 @@ TEST_F(TestMetric, GetMetricSmmuTransaction) } int pd = PmuDeviceOpen(devAttr, bdfLen); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); diff --git a/test/test_perf/test_trace_analysis.cpp b/test/test_perf/test_trace_analysis.cpp index 7062d69..5abec52 100644 --- a/test/test_perf/test_trace_analysis.cpp +++ b/test/test_perf/test_trace_analysis.cpp @@ -93,11 +93,6 @@ TEST_F(TestAnaylzeData, collect_single_trace_data_success) { EnableTracePointer(pd, 1); int len = PmuTraceRead(pd, &data); EXPECT_TRUE(data != nullptr); - for (int i = 0; i < len; i++) { - cout << "funcName: " << data[i].funcs << " startTs: " << data[i].startTs << " elapsedTime: " << data[i].elapsedTime - << " pid: " << data[i].pid << " tid: " << data[i].tid << " cpu: " << data[i].cpu - << " comm: " << data[i].comm << endl; - } } /** @@ -118,7 +113,7 @@ TEST_F(TestAnaylzeData, collect_sleep_trace_data_success) { ASSERT_NE(pd, -1); EnableTracePointer(pd, 1); int len = PmuTraceRead(pd, &data); - EXPECT_TRUE(data != nullptr); + ASSERT_TRUE(data != nullptr); ASSERT_LT(data[0].elapsedTime, 0.1); } diff --git a/test/test_perf/test_trace_pointer.cpp b/test/test_perf/test_trace_pointer.cpp index 6bdb60d..430f394 100644 --- a/test/test_perf/test_trace_pointer.cpp +++ b/test/test_perf/test_trace_pointer.cpp @@ -142,8 +142,6 @@ TEST_F(TestTraceRaw, trace_pointer_net_napi) { bool l4_hash; rt = PmuGetField(rawData, "l4_hash", &l4_hash, sizeof(l4_hash)); ASSERT_EQ(rt, SUCCESS); - printf("name=%s napi_id=%d queue_mapping=%hd ip_summed=%02X l4_hash=%d ", name, napi_id, queue_mapping, - ip_summed, l4_hash); } } @@ -163,7 +161,6 @@ TEST_F(TestTraceRaw, trace_pointer_skb_copy_datagram_iovec) { unsigned int len; rt = PmuGetField(rawData, "len", &len, sizeof(len)); ASSERT_EQ(rt, SUCCESS); - printf("skbaddr=%p len=%d", skbaddr, len); } } -- Gitee