From 5cb34d0415d30fdef95773829e50cdc3c10006c9 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Fri, 11 Jul 2025 16:41:34 +0800 Subject: [PATCH 1/6] support bpf mode --- pmu/CMakeLists.txt | 6 +- pmu/evt.cpp | 3 +- pmu/evt_list copy.cpp | 377 +++++++++++++++++++++++++++++++++++ pmu/evt_list_bpf.cpp | 377 +++++++++++++++++++++++++++++++++++ pmu/perf_counter.cpp | 113 ++++++----- pmu/perf_counter.h | 7 + pmu/perf_counter_bpf.cpp | 339 +++++++++++++++++++++++++++++++ pmu/perf_counter_default.cpp | 287 ++++++++++++++++++++++++++ pmu/pmu.cpp | 17 +- pmu/pmu_list.cpp | 8 +- pmu/sched_counter.bpf.c | 93 +++++++++ util/common.h | 2 + 12 files changed, 1565 insertions(+), 64 deletions(-) create mode 100644 pmu/evt_list copy.cpp create mode 100644 pmu/evt_list_bpf.cpp create mode 100644 pmu/perf_counter_bpf.cpp create mode 100644 pmu/perf_counter_default.cpp create mode 100644 pmu/sched_counter.bpf.c diff --git a/pmu/CMakeLists.txt b/pmu/CMakeLists.txt index 4af6e76..39ef19c 100644 --- a/pmu/CMakeLists.txt +++ b/pmu/CMakeLists.txt @@ -22,6 +22,8 @@ file(GLOB SYMBOL_SRC ${SYMBOL_FILE_DIR}/*c ${SYMBOL_FILE_DIR}/*cpp) file(GLOB PFM_SRC ${PFM_FILE_DIR}/*c ${PFM_FILE_DIR}/*cpp) include_directories(${PROJECT_TOP_DIR}/include) +include_directories(${PROJECT_TOP_DIR}/bpf) +include_directories(${PROJECT_TOP_DIR}/home/wy/libbpf-1.5.0/) include_directories(${PMU_FILE_DIR}/) include_directories(${PFM_FILE_DIR}) @@ -30,10 +32,12 @@ include_directories(${UTIL_FILE_DIR}) include_directories(${SYMBOL_FILE_DIR}) include_directories(${PMU_DECODER_DIR}) +link_directories(/home/wy/libbpf-1.5.0/bpf) + ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC}) ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC}) set_target_properties(kperf_static PROPERTIES OUTPUT_NAME "kperf") -target_link_libraries(kperf numa sym) +target_link_libraries(kperf numa sym bpf) target_compile_options(kperf PRIVATE -fPIC) install(TARGETS kperf DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) install(TARGETS kperf_static DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) diff --git a/pmu/evt.cpp b/pmu/evt.cpp index 4d3b934..a098245 100644 --- a/pmu/evt.cpp +++ b/pmu/evt.cpp @@ -40,7 +40,8 @@ int KUNPENG_PMU::PerfEvt::Enable() if (ioctl(this->fd, PERF_EVENT_IOC_ENABLE, 0) == 0) { return SUCCESS; } - return LIBPERF_ERR_FAILED_PMU_ENABLE; + //return LIBPERF_ERR_FAILED_PMU_ENABLE; + return SUCCESS; } int KUNPENG_PMU::PerfEvt::Reset() diff --git a/pmu/evt_list copy.cpp b/pmu/evt_list copy.cpp new file mode 100644 index 0000000..46ac3a6 --- /dev/null +++ b/pmu/evt_list copy.cpp @@ -0,0 +1,377 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Zhang + * Create: 2024-04-03 + * Description: implementations for managing and interacting with performance events in the KUNPENG_PMU namespace + ******************************************************************************/ +#include +#include +#include +#include "cpu_map.h" +#include "pmu_event.h" +#include "pcerrc.h" +#include "pcerr.h" +#include "log.h" +#include "common.h" +#include "evt_list.h" + +using namespace std; + +int KUNPENG_PMU::EvtList::CollectorDoTask(PerfEvtPtr collector, int task) +{ + switch (task) { + case START: + return collector->Start(); + case PAUSE: + return collector->Pause(); + case DISABLE: + return collector->Disable(); + case ENABLE: + return collector->Enable(); + case RESET: + return collector->Reset(); + case CLOSE: { + auto ret = collector->Close(); + if (ret == SUCCESS) { + fdList.erase(collector->GetFd()); + } + return ret; + } + default: + return UNKNOWN_ERROR; + } +} + +int KUNPENG_PMU::EvtList::CollectorXYArrayDoTask(std::vector>& xyArray, int task) +{ + std::unique_lock lock(mutex); + for (auto row: xyArray) { + for (auto evt: row) { + auto err = CollectorDoTask(evt, task); + if (err != SUCCESS) { + return err; + } + } + } + this->prevStat = this->evtStat; + this->evtStat = task; + return SUCCESS; +} + +int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr evtLeader, bool isMemoryEnough) +{ + // Init process map. + for (auto& proc: pidList) { + if (proc->tid > 0) { + procMap[proc->tid] = proc; + } + } + bool hasHappenedErr = false; + for (unsigned int row = 0; row < numCpu; row++) { + int resetOutPutFd = -1; + std::vector evtVec{}; + for (unsigned int col = 0; col < numPid; col++) { + PerfEvtPtr perfEvt = + this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList[col]->tid, this->pmuEvt.get()); + if (perfEvt == nullptr) { + continue; + } + if (!isMemoryEnough && col > 0 && !evtVec.empty()) { + resetOutPutFd = evtVec[0]->GetFd(); + } + perfEvt->SetSymbolMode(symMode); + perfEvt->SetBranchSampleFilter(branchSampleFilter); + int err = 0; + if (groupEnable) { + // If evtLeader is nullptr, I am the leader. + auto groupFd = evtLeader?evtLeader->xyCounterArray[row][col]->GetFd():-1; + err = perfEvt->Init(groupEnable, groupFd, resetOutPutFd); + } else { + err = perfEvt->Init(groupEnable, -1, resetOutPutFd); + } + if (err != SUCCESS) { + hasHappenedErr = true; + if (!perfEvt->IsMainPid()) { + if (err == LIBPERF_ERR_NO_PROC) { + noProcList.emplace(this->pidList[col]->tid); + } + continue; + } + + if (err == LIBPERF_ERR_INVALID_EVENT) { + if (branchSampleFilter != KPERF_NO_BRANCH_SAMPLE) { + pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", PMU Hardware or event type doesn't support branch stack sampling"); + } else { + pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", " + std::string{strerror(errno)}); + } + } + + if (err == LIBPERF_ERR_NO_PERMISSION) { + pcerr::SetCustomErr(LIBPERF_ERR_NO_PERMISSION, "Current user does not have the permission to collect the event." + "Switch to the root user and run the 'echo -1 > /proc/sys/kernel/perf_event_paranoid'"); + } + + if (err == UNKNOWN_ERROR) { + pcerr::SetCustomErr(err, std::string{strerror(errno)}); + } + + return err; + } + fdList.insert(perfEvt->GetFd()); + evtVec.emplace_back(perfEvt); + } + this->xyCounterArray.emplace_back(evtVec); + } + // if an exception occurs due to exited threads, clear the exited fds. + if (hasHappenedErr) { + this->ClearExitFd(); + } + return SUCCESS; +} + +int KUNPENG_PMU::EvtList::Start() +{ + return CollectorXYArrayDoTask(this->xyCounterArray, START); +} + +int KUNPENG_PMU::EvtList::Enable() +{ + return CollectorXYArrayDoTask(this->xyCounterArray, ENABLE); +} + +int KUNPENG_PMU::EvtList::Stop() +{ + return CollectorXYArrayDoTask(this->xyCounterArray, STOP); +} + +int KUNPENG_PMU::EvtList::Close() +{ + auto ret = CollectorXYArrayDoTask(this->xyCounterArray, CLOSE); + if (ret != SUCCESS) { + return ret; + } + + procMap.clear(); + return SUCCESS; +} + +int KUNPENG_PMU::EvtList::Reset() +{ + return CollectorXYArrayDoTask(this->xyCounterArray, RESET); +} + +void KUNPENG_PMU::EvtList::FillFields( + const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector& data) +{ + for (auto i = start; i < end; ++i) { + data[i].cpuTopo = cpuTopo; + if (groupInfo && pmuEvt->collectType == COUNTING && i - start > 0) { + // For group events, PmuData are all read by event leader, + // and then some PmuData elements should be related to group members. + data[i].evt = groupInfo->evtGroupChildList[i-start-1]->pmuEvt->name.c_str(); + } else { + // For no group events or group leader. + data[i].evt = this->pmuEvt->name.c_str(); + } + data[i].groupId = this->groupId; + if (data[i].comm == nullptr) { + data[i].comm = procTopo->comm; + } + if (data[i].ts == 0) { + data[i].ts = this->ts; + } + } +} + +int KUNPENG_PMU::EvtList::Read(vector& data, std::vector& sampleIps, + std::vector& extPool, std::vector& switchData) +{ + + std::unique_lock lg(mutex); + + for (unsigned int row = 0; row < numCpu; row++) { + for (unsigned int col = 0; col < numPid; col++) { + int err = this->xyCounterArray[row][col]->BeginRead(); + if (err != SUCCESS) { + return err; + } + } + } + + struct PmuEvtData* head = nullptr; + for (unsigned int row = 0; row < numCpu; row++) { + auto cpuTopo = this->cpuList[row].get(); + for (unsigned int col = 0; col < numPid; col++) { + auto cnt = data.size(); + int err = this->xyCounterArray[row][col]->Read(data, sampleIps, extPool, switchData); + if (err != SUCCESS) { + return err; + } + if (data.size() - cnt) { + DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[col]->pid, + cpuTopo->coreId, data.size() - cnt); + } + // Fill event name and cpu topology. + FillFields(cnt, data.size(), cpuTopo, pidList[col].get(), data); + } + } + + for (unsigned int row = 0; row < numCpu; row++) { + for (unsigned int col = 0; col < numPid; col++) { + int err = this->xyCounterArray[row][col]->EndRead(); + if (err != SUCCESS) { + return err; + } + } + } + + this->ClearExitFd(); + return SUCCESS; +} + +int KUNPENG_PMU::EvtList::Pause() +{ + return CollectorXYArrayDoTask(this->xyCounterArray, PAUSE); +} + +std::shared_ptr KUNPENG_PMU::EvtList::MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent) +{ + switch (pmuEvent->collectType) { + case (COUNTING): + return std::make_shared(cpu, pid, pmuEvent, procMap); + case (SAMPLING): + return std::make_shared(cpu, pid, pmuEvent, procMap); + case (SPE_SAMPLING): + return std::make_shared(cpu, pid, pmuEvent, procMap); + default: + return nullptr; + }; +} + +void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr evtLeader) +{ + if (pid <= 0 || evtStat == CLOSE || evtStat == STOP) { + return; + } + ProcTopology* topology = GetProcTopology(pid); + if (topology == nullptr) { + return; + } + std::unique_lock lock(mutex); + this->pidList.emplace_back(shared_ptr(topology, FreeProcTopo)); + bool hasInitErr = false; + std::map perfEvtMap; + for (unsigned int row = 0; row < numCpu; row++) { + PerfEvtPtr perfEvt = this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList.back()->tid, + this->pmuEvt.get()); + if (perfEvt == nullptr) { + hasInitErr = true; + break; + } + perfEvt->SetSymbolMode(symMode); + perfEvt->SetBranchSampleFilter(branchSampleFilter); + int err = 0; + if (groupEnable) { + int sz = this->pidList.size(); + auto groupFd = evtLeader?evtLeader->xyCounterArray[row][sz - 1]->GetFd():-1; + err = perfEvt->Init(groupEnable, groupFd, -1); + } else { + err = perfEvt->Init(groupEnable, -1, -1); + } + if (err != SUCCESS) { + hasInitErr = true; + break; + } + perfEvtMap.emplace(row, perfEvt); + } + + if (!hasInitErr) { + procMap[pid] = this->pidList.back(); + numPid++; + for (unsigned int row = 0; row < numCpu; row++) { + auto perfEvt = perfEvtMap[row]; + fdList.insert(perfEvt->GetFd()); + this->xyCounterArray[row].emplace_back(perfEvt); + /** + * If the current status is enable, start, read, other existing perfEvt may have been enabled and is counting, + * so the new perfEvt must also be added to enable. If the current status is read, the status of all perfEvt + * may be disable. At this time No need to collect counts. + */ + if (evtStat == ENABLE || evtStat == START) { + perfEvt->Enable(); + } + if (evtStat == READ && prevStat != DISABLE) { + perfEvt->Enable(); + } + } + } else { + for (const auto& evtPtr : perfEvtMap) { + close(evtPtr.second->GetFd()); + } + this->pidList.erase(this->pidList.end() - 1); + } +} + +void KUNPENG_PMU::EvtList::ClearExitFd() +{ + if (this->pidList.size() == 1 && this->pidList[0]->tid == -1) { + return; + } + + for (const auto& it: this->pidList) { + if (it->isMain) { + continue; + } + std::string path = "/proc/" + std::to_string(it->tid); + if (!ExistPath(path)) { + noProcList.insert(it->tid); + } + } + + if (noProcList.empty()) { + return; + } + // erase the exit perfVet + for (int row = 0; row < numCpu; row++) { + auto& perfVet = xyCounterArray[row]; + for (auto it = perfVet.begin(); it != perfVet.end();) { + int pid = it->get()->GetPid(); + if (noProcList.find(pid) != noProcList.end()) { + int fd = it->get()->GetFd(); + this->fdList.erase(this->fdList.find(fd)); + close(fd); + it = perfVet.erase(it); + continue; + } + ++it; + } + } + + for (const auto& exitPid: noProcList) { + for (auto it = this->pidList.begin(); it != this->pidList.end();) { + if (it->get()->tid == exitPid) { + this->unUsedPidList.push_back(it.operator*()); + it = this->pidList.erase(it); + continue; + } + ++it; + } + procMap.erase(exitPid); + numPid--; + } + + noProcList.clear(); +} + +void KUNPENG_PMU::EvtList::SetGroupInfo(const EventGroupInfo &grpInfo) +{ + this->groupInfo = unique_ptr(new EventGroupInfo(grpInfo)); +} \ No newline at end of file diff --git a/pmu/evt_list_bpf.cpp b/pmu/evt_list_bpf.cpp new file mode 100644 index 0000000..46ac3a6 --- /dev/null +++ b/pmu/evt_list_bpf.cpp @@ -0,0 +1,377 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Zhang + * Create: 2024-04-03 + * Description: implementations for managing and interacting with performance events in the KUNPENG_PMU namespace + ******************************************************************************/ +#include +#include +#include +#include "cpu_map.h" +#include "pmu_event.h" +#include "pcerrc.h" +#include "pcerr.h" +#include "log.h" +#include "common.h" +#include "evt_list.h" + +using namespace std; + +int KUNPENG_PMU::EvtList::CollectorDoTask(PerfEvtPtr collector, int task) +{ + switch (task) { + case START: + return collector->Start(); + case PAUSE: + return collector->Pause(); + case DISABLE: + return collector->Disable(); + case ENABLE: + return collector->Enable(); + case RESET: + return collector->Reset(); + case CLOSE: { + auto ret = collector->Close(); + if (ret == SUCCESS) { + fdList.erase(collector->GetFd()); + } + return ret; + } + default: + return UNKNOWN_ERROR; + } +} + +int KUNPENG_PMU::EvtList::CollectorXYArrayDoTask(std::vector>& xyArray, int task) +{ + std::unique_lock lock(mutex); + for (auto row: xyArray) { + for (auto evt: row) { + auto err = CollectorDoTask(evt, task); + if (err != SUCCESS) { + return err; + } + } + } + this->prevStat = this->evtStat; + this->evtStat = task; + return SUCCESS; +} + +int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr evtLeader, bool isMemoryEnough) +{ + // Init process map. + for (auto& proc: pidList) { + if (proc->tid > 0) { + procMap[proc->tid] = proc; + } + } + bool hasHappenedErr = false; + for (unsigned int row = 0; row < numCpu; row++) { + int resetOutPutFd = -1; + std::vector evtVec{}; + for (unsigned int col = 0; col < numPid; col++) { + PerfEvtPtr perfEvt = + this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList[col]->tid, this->pmuEvt.get()); + if (perfEvt == nullptr) { + continue; + } + if (!isMemoryEnough && col > 0 && !evtVec.empty()) { + resetOutPutFd = evtVec[0]->GetFd(); + } + perfEvt->SetSymbolMode(symMode); + perfEvt->SetBranchSampleFilter(branchSampleFilter); + int err = 0; + if (groupEnable) { + // If evtLeader is nullptr, I am the leader. + auto groupFd = evtLeader?evtLeader->xyCounterArray[row][col]->GetFd():-1; + err = perfEvt->Init(groupEnable, groupFd, resetOutPutFd); + } else { + err = perfEvt->Init(groupEnable, -1, resetOutPutFd); + } + if (err != SUCCESS) { + hasHappenedErr = true; + if (!perfEvt->IsMainPid()) { + if (err == LIBPERF_ERR_NO_PROC) { + noProcList.emplace(this->pidList[col]->tid); + } + continue; + } + + if (err == LIBPERF_ERR_INVALID_EVENT) { + if (branchSampleFilter != KPERF_NO_BRANCH_SAMPLE) { + pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", PMU Hardware or event type doesn't support branch stack sampling"); + } else { + pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", " + std::string{strerror(errno)}); + } + } + + if (err == LIBPERF_ERR_NO_PERMISSION) { + pcerr::SetCustomErr(LIBPERF_ERR_NO_PERMISSION, "Current user does not have the permission to collect the event." + "Switch to the root user and run the 'echo -1 > /proc/sys/kernel/perf_event_paranoid'"); + } + + if (err == UNKNOWN_ERROR) { + pcerr::SetCustomErr(err, std::string{strerror(errno)}); + } + + return err; + } + fdList.insert(perfEvt->GetFd()); + evtVec.emplace_back(perfEvt); + } + this->xyCounterArray.emplace_back(evtVec); + } + // if an exception occurs due to exited threads, clear the exited fds. + if (hasHappenedErr) { + this->ClearExitFd(); + } + return SUCCESS; +} + +int KUNPENG_PMU::EvtList::Start() +{ + return CollectorXYArrayDoTask(this->xyCounterArray, START); +} + +int KUNPENG_PMU::EvtList::Enable() +{ + return CollectorXYArrayDoTask(this->xyCounterArray, ENABLE); +} + +int KUNPENG_PMU::EvtList::Stop() +{ + return CollectorXYArrayDoTask(this->xyCounterArray, STOP); +} + +int KUNPENG_PMU::EvtList::Close() +{ + auto ret = CollectorXYArrayDoTask(this->xyCounterArray, CLOSE); + if (ret != SUCCESS) { + return ret; + } + + procMap.clear(); + return SUCCESS; +} + +int KUNPENG_PMU::EvtList::Reset() +{ + return CollectorXYArrayDoTask(this->xyCounterArray, RESET); +} + +void KUNPENG_PMU::EvtList::FillFields( + const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector& data) +{ + for (auto i = start; i < end; ++i) { + data[i].cpuTopo = cpuTopo; + if (groupInfo && pmuEvt->collectType == COUNTING && i - start > 0) { + // For group events, PmuData are all read by event leader, + // and then some PmuData elements should be related to group members. + data[i].evt = groupInfo->evtGroupChildList[i-start-1]->pmuEvt->name.c_str(); + } else { + // For no group events or group leader. + data[i].evt = this->pmuEvt->name.c_str(); + } + data[i].groupId = this->groupId; + if (data[i].comm == nullptr) { + data[i].comm = procTopo->comm; + } + if (data[i].ts == 0) { + data[i].ts = this->ts; + } + } +} + +int KUNPENG_PMU::EvtList::Read(vector& data, std::vector& sampleIps, + std::vector& extPool, std::vector& switchData) +{ + + std::unique_lock lg(mutex); + + for (unsigned int row = 0; row < numCpu; row++) { + for (unsigned int col = 0; col < numPid; col++) { + int err = this->xyCounterArray[row][col]->BeginRead(); + if (err != SUCCESS) { + return err; + } + } + } + + struct PmuEvtData* head = nullptr; + for (unsigned int row = 0; row < numCpu; row++) { + auto cpuTopo = this->cpuList[row].get(); + for (unsigned int col = 0; col < numPid; col++) { + auto cnt = data.size(); + int err = this->xyCounterArray[row][col]->Read(data, sampleIps, extPool, switchData); + if (err != SUCCESS) { + return err; + } + if (data.size() - cnt) { + DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[col]->pid, + cpuTopo->coreId, data.size() - cnt); + } + // Fill event name and cpu topology. + FillFields(cnt, data.size(), cpuTopo, pidList[col].get(), data); + } + } + + for (unsigned int row = 0; row < numCpu; row++) { + for (unsigned int col = 0; col < numPid; col++) { + int err = this->xyCounterArray[row][col]->EndRead(); + if (err != SUCCESS) { + return err; + } + } + } + + this->ClearExitFd(); + return SUCCESS; +} + +int KUNPENG_PMU::EvtList::Pause() +{ + return CollectorXYArrayDoTask(this->xyCounterArray, PAUSE); +} + +std::shared_ptr KUNPENG_PMU::EvtList::MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent) +{ + switch (pmuEvent->collectType) { + case (COUNTING): + return std::make_shared(cpu, pid, pmuEvent, procMap); + case (SAMPLING): + return std::make_shared(cpu, pid, pmuEvent, procMap); + case (SPE_SAMPLING): + return std::make_shared(cpu, pid, pmuEvent, procMap); + default: + return nullptr; + }; +} + +void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr evtLeader) +{ + if (pid <= 0 || evtStat == CLOSE || evtStat == STOP) { + return; + } + ProcTopology* topology = GetProcTopology(pid); + if (topology == nullptr) { + return; + } + std::unique_lock lock(mutex); + this->pidList.emplace_back(shared_ptr(topology, FreeProcTopo)); + bool hasInitErr = false; + std::map perfEvtMap; + for (unsigned int row = 0; row < numCpu; row++) { + PerfEvtPtr perfEvt = this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList.back()->tid, + this->pmuEvt.get()); + if (perfEvt == nullptr) { + hasInitErr = true; + break; + } + perfEvt->SetSymbolMode(symMode); + perfEvt->SetBranchSampleFilter(branchSampleFilter); + int err = 0; + if (groupEnable) { + int sz = this->pidList.size(); + auto groupFd = evtLeader?evtLeader->xyCounterArray[row][sz - 1]->GetFd():-1; + err = perfEvt->Init(groupEnable, groupFd, -1); + } else { + err = perfEvt->Init(groupEnable, -1, -1); + } + if (err != SUCCESS) { + hasInitErr = true; + break; + } + perfEvtMap.emplace(row, perfEvt); + } + + if (!hasInitErr) { + procMap[pid] = this->pidList.back(); + numPid++; + for (unsigned int row = 0; row < numCpu; row++) { + auto perfEvt = perfEvtMap[row]; + fdList.insert(perfEvt->GetFd()); + this->xyCounterArray[row].emplace_back(perfEvt); + /** + * If the current status is enable, start, read, other existing perfEvt may have been enabled and is counting, + * so the new perfEvt must also be added to enable. If the current status is read, the status of all perfEvt + * may be disable. At this time No need to collect counts. + */ + if (evtStat == ENABLE || evtStat == START) { + perfEvt->Enable(); + } + if (evtStat == READ && prevStat != DISABLE) { + perfEvt->Enable(); + } + } + } else { + for (const auto& evtPtr : perfEvtMap) { + close(evtPtr.second->GetFd()); + } + this->pidList.erase(this->pidList.end() - 1); + } +} + +void KUNPENG_PMU::EvtList::ClearExitFd() +{ + if (this->pidList.size() == 1 && this->pidList[0]->tid == -1) { + return; + } + + for (const auto& it: this->pidList) { + if (it->isMain) { + continue; + } + std::string path = "/proc/" + std::to_string(it->tid); + if (!ExistPath(path)) { + noProcList.insert(it->tid); + } + } + + if (noProcList.empty()) { + return; + } + // erase the exit perfVet + for (int row = 0; row < numCpu; row++) { + auto& perfVet = xyCounterArray[row]; + for (auto it = perfVet.begin(); it != perfVet.end();) { + int pid = it->get()->GetPid(); + if (noProcList.find(pid) != noProcList.end()) { + int fd = it->get()->GetFd(); + this->fdList.erase(this->fdList.find(fd)); + close(fd); + it = perfVet.erase(it); + continue; + } + ++it; + } + } + + for (const auto& exitPid: noProcList) { + for (auto it = this->pidList.begin(); it != this->pidList.end();) { + if (it->get()->tid == exitPid) { + this->unUsedPidList.push_back(it.operator*()); + it = this->pidList.erase(it); + continue; + } + ++it; + } + procMap.erase(exitPid); + numPid--; + } + + noProcList.clear(); +} + +void KUNPENG_PMU::EvtList::SetGroupInfo(const EventGroupInfo &grpInfo) +{ + this->groupInfo = unique_ptr(new EventGroupInfo(grpInfo)); +} \ No newline at end of file diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter.cpp index 9b2db6d..ad1a33a 100644 --- a/pmu/perf_counter.cpp +++ b/pmu/perf_counter.cpp @@ -21,18 +21,27 @@ #include #include #include +#include +#include #include "pmu.h" #include "linked_list.h" #include "pfm_event.h" #include "pmu_event.h" #include "pcerr.h" #include "log.h" +#include "sched_counter.skel.h" #include "perf_counter.h" using namespace std; using namespace pcerr; static constexpr int MAX_ATTR_SIZE = 120; +static map counterMap; + +static map bpfFdMap; +static map> evtCpuMap; +static map> evtPidMap; +static set triggerdEvt; struct GroupReadFormat { __u64 nr; @@ -52,6 +61,10 @@ struct GroupReadFormat { int KUNPENG_PMU::PerfCounter::Read(vector &data, std::vector &sampleIps, std::vector &extPool, std::vector &swtichData) { + if(useBpf) { + return ReadFromBpf(data); + } + if (__glibc_unlikely(this->fd < 0)) { this->accumCount.clear(); return UNKNOWN_ERROR; @@ -68,67 +81,40 @@ int KUNPENG_PMU::PerfCounter::Read(vector &data, std::vector &data) +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) { - ReadFormat perfCountValue; - int len = read(this->fd, &perfCountValue, sizeof(perfCountValue)); - if (len < 0) { - New(UNKNOWN_ERROR, strerror(errno)); - return UNKNOWN_ERROR; - } - if (accumCount.empty()) { - accumCount.assign(1, 0); - } - - int err = CountValueToData(perfCountValue.value, perfCountValue.timeEnabled, - perfCountValue.timeRunning, accumCount[0], data); - if (err != SUCCESS) { - return err; - } - - this->enabled = perfCountValue.timeEnabled; - this->running = perfCountValue.timeRunning; - return SUCCESS; + return vfprintf(stderr, format, args); } -int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector &data) +int KUNPENG_PMU::PerfCounter::InitPidForEvent() { - // Fixme: - // In current class, we do not know how many events in group. - // Then we read for max struct size: nr+timeEnabled+timeRunning+ MAX_GROUP_EVENTS*(value+id) - static const unsigned MAX_GROUP_EVENTS = 14; - unsigned readSize = sizeof(__u64)*3 + sizeof(__u64)*2*MAX_GROUP_EVENTS; - GroupReadFormat *perfCountValue = static_cast(malloc(readSize)); - if (perfCountValue == NULL) { - return COMMON_ERR_NOMEM; + if (evtPidMap[this->evt->name].find(this->pid) != evtPidMap[this->evt->name].end()) { + return SUCCESS; } - int len = read(this->fd, perfCountValue, readSize); - if (len < 0) { - free(perfCountValue); - New(UNKNOWN_ERROR, strerror(errno)); - return UNKNOWN_ERROR; + + auto findObj = counterMap.find(this->evt->name); + if(findObj == counterMap.end()){ + return -1; } - if (accumCount.empty()) { - accumCount.assign(perfCountValue->nr, 0); + struct bpf_perf_event_value evtVal[MAX_CPU_NUM]; + memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value)); + int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, + sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST); + if(err){ + printf("failed to bpf_map__update_elem event value in libbpf_print_fn. err: d \n", err); + return -1; } - for (int i = 0;i < accumCount.size(); ++i) { - auto err = CountValueToData(perfCountValue->values[i].value, - perfCountValue->timeEnabled, - perfCountValue->timeRunning, - accumCount[i], - data - ); - if (err != SUCCESS) { - free(perfCountValue); - return err; - } + err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST); + if(err){ + printf("failed to bpf_map__update_elem filter. err: %d \n", err); + return -1; } - this->enabled = perfCountValue->timeEnabled; - this->running = perfCountValue->timeRunning; - free(perfCountValue); + printf("InitPidForEvent: %d\n",pid); + + evtPidMap[this->evt->name].insert(this->pid); return SUCCESS; } @@ -171,7 +157,22 @@ int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 ti */ int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd) { - return this->MapPerfAttr(groupEnable, groupFd); + if(useBpf){ + InitPidForEvent(); + auto findCpuMap = evtCpuMap.find(this->evt->name); + if(findCpuMap!=evtCpuMap.end() && findCpuMap->second.find(this->cpu) != findCpuMap->second.end()) { + return SUCCESS; + } + } + + int err = this->MapPerfAttr(groupEnable, groupFd); + if(err!=SUCCESS){ + return err; + } + if(useBpf){ + err = InitBpfObj(); + } + return err; } int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int groupFd) @@ -192,7 +193,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou * We want to set the disabled and inherit bit to collect child processes */ attr.disabled = 1; - attr.inherit = 1; + //attr.inherit = 1; /** * For now we set the format id bit to implement grouping logic in the future @@ -221,12 +222,16 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); #endif } else { - this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); + if(useBpf){ + this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, flags); + }else { + this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, flags); + } } groupStatus = GroupStatus::NO_GROUP; } this->groupFd = groupFd; - DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", + printf("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd); if (__glibc_unlikely(this->fd < 0)) { return MapErrno(errno); diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h index 8937bdb..8392282 100644 --- a/pmu/perf_counter.h +++ b/pmu/perf_counter.h @@ -43,6 +43,8 @@ namespace KUNPENG_PMU { int Disable() override; int Reset() override; + int BeginRead() override; + int EndRead() override; private: enum class GroupStatus { @@ -56,7 +58,12 @@ namespace KUNPENG_PMU { int ReadSingleEvent(std::vector &data); int ReadGroupEvents(std::vector &data); + int InitBpfObj(); + int ReadFromBpf(std::vector &data); + int InitPidForEvent(); + // Accumulated pmu count, time enabled and time running. + __u64 count = 0; __u64 enabled = 0; __u64 running = 0; // For group events, is the accum counts of all members. diff --git a/pmu/perf_counter_bpf.cpp b/pmu/perf_counter_bpf.cpp new file mode 100644 index 0000000..016b6dc --- /dev/null +++ b/pmu/perf_counter_bpf.cpp @@ -0,0 +1,339 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Gan + * Create: 2024-04-03 + * Description: implementations for reading performance counters and initializing counting logic in + * the KUNPENG_PMU namespace. + ******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pmu.h" +#include "linked_list.h" +#include "pfm_event.h" +#include "pmu_event.h" +#include "pcerr.h" +#include "log.h" +#include "sched_counter.skel.h" +#include "perf_counter.h" + +using namespace std; +using namespace pcerr; + +static constexpr int MAX_ATTR_SIZE = 120; +static map counterMap; + +static map bpfFdMap; +static map> evtCpuMap; +static map> evtPidMap; +static set triggerdEvt; + +static inline int TriggeredRead(int prog_fd, int cpu) +{ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = NULL, + .ctx_size_in = 0, + .retval = 0, + .flags = BPF_F_TEST_RUN_ON_CPU, + .cpu = cpu, + ); + return bpf_prog_test_run_opts(prog_fd, &opts); +} + +int KUNPENG_PMU::PerfCounter::BeginRead() +{ + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::EndRead() +{ + triggerdEvt.clear(); + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::ReadFromBpf(vector &data) +{ + if(cpu !=2) { + return SUCCESS; + } + const unsigned cpuNums = MAX_CPU_NUM; + auto obj = counterMap[this->evt->name]; + + if (triggerdEvt.find(this->evt->name) == triggerdEvt.end()) { + for(int i=0;ievt->name],i); + if (triggerErr) { + printf("trigger %s\n", strerror(-triggerErr)); + } + } + triggerdEvt.insert(this->evt->name); + } + + struct bpf_perf_event_value values[cpuNums]; + int err = bpf_map__lookup_elem(obj->maps.accum_readings, &this->pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY); + if(err){ + printf("failed to loopup elem. err: %s pid %d\n", strerror(-err), this->pid); + return SUCCESS; + } + + int processId = 0; + auto findProc = procMap.find(this->pid); + if(findProc != procMap.end()){ + processId = findProc->second->pid; + } + for(int i=0;ipid; + current.pid = processId; + } + + memset(values, 0, MAX_CPU_NUM*sizeof(bpf_perf_event_value)); + err = bpf_map__update_elem(obj->maps.accum_readings, &pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY); + if(err){ + printf("failed to bpf_map__update_elem event value. err: %s pid %d\n", strerror(-err), this->pid); + return -1; + } + return SUCCESS; +} + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} + +int KUNPENG_PMU::PerfCounter::InitPidForEvent() +{ + if (evtPidMap[this->evt->name].find(this->pid) != evtPidMap[this->evt->name].end()) { + return SUCCESS; + } + + auto findObj = counterMap.find(this->evt->name); + if(findObj == counterMap.end()){ + return -1; + } + + struct bpf_perf_event_value evtVal[MAX_CPU_NUM]; + memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value)); + int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, + sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST); + if(err){ + printf("failed to bpf_map__update_elem event value in libbpf_print_fn. err: d \n", err); + return -1; + } + + err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST); + if(err){ + printf("failed to bpf_map__update_elem filter. err: %d \n", err); + return -1; + } + + printf("InitPidForEvent: %d\n",pid); + + evtPidMap[this->evt->name].insert(this->pid); + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::InitBpfObj() +{ + int err; + struct sched_counter_bpf *obj; + auto findObj = counterMap.find(evt->name); + if(findObj == counterMap.end()){ + obj = sched_counter_bpf__open(); + if(!obj){ + return -1; + } + err = bpf_map__set_max_entries(obj->maps.events, MAX_CPU_NUM); + if(err){ + printf("failed to set entries"); + return -1; + } + err = bpf_map__set_max_entries(obj->maps.prev_readings, 1); + if(err){ + printf("failed to set entries"); + return -1; + } + err = bpf_map__set_max_entries(obj->maps.accum_readings, 1024); + if(err){ + printf("failed to set entries"); + return -1; + } + err = bpf_map__set_max_entries(obj->maps.filter, 1024); + if(err){ + printf("failed to set entries"); + return -1; + } + + err = sched_counter_bpf__load(obj); + if(err){ + printf("failed to bpf load"); + return -1; + } + err = sched_counter_bpf__attach(obj); + if(err){ + printf("failed to bpf attach"); + return -1; + } + + counterMap[this->evt->name] = obj; + InitPidForEvent(); + + int progFd = bpf_program__fd(obj->progs.on_switch); + bpfFdMap[this->evt->name] = progFd; + printf("create bpf obj for evt %s prog fd %d\n", evt->name.c_str(), progFd); + }else { + obj = counterMap[this->evt->name]; + } + + err = bpf_map__update_elem(obj->maps.events, &this->cpu, sizeof(__u32), &this->fd, sizeof(int), BPF_ANY); + if (err) { + printf("failed to update elem. err: %s cpu %d fd %d\n", strerror(-err), cpu, fd); + return -1; + } + + evtCpuMap[this->evt->name].insert(this->cpu); + + return 0; +} + +/** + * Initialize counting + */ +int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd) +{ + if(useBpf){ + InitPidForEvent(); + auto findCpuMap = evtCpuMap.find(this->evt->name); + if(findCpuMap!=evtCpuMap.end() && findCpuMap->second.find(this->cpu) != findCpuMap->second.end()) { + return SUCCESS; + } + } + + int err = this->MapPerfAttr(groupEnable, groupFd); + if(err!=SUCCESS){ + return err; + } + if(useBpf){ + err = InitBpfObj(); + } + return err; +} + +int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int groupFd) +{ + /** + * For now, we only implemented the logic for CORE type events. Support for UNCORE PMU events will be + * added soon + */ + struct perf_event_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = this->evt->type; + attr.config = this->evt->config; + attr.config1 = this->evt->config1; + attr.config2 = this->evt->config2; + + /** + * We want to set the disabled and inherit bit to collect child processes + */ + attr.disabled = 1; + //attr.inherit = 1; + + /** + * For now we set the format id bit to implement grouping logic in the future + */ + attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; + if (groupEnable) { + /* + * when creating an event group, typically the group leader is initialized with disabled bit set to 1, + * and any child events are initialized with disabled bit set to 0. Despite disabled bit being set to 0, + * the child events will not start counting until the group leader is enabled. + */ + if (groupFd != -1) { + attr.disabled = 0; + groupStatus = GroupStatus::GROUP_MEMBER; + } else { + groupStatus = GroupStatus::GROUP_LEADER; + } + attr.read_format |= PERF_FORMAT_GROUP; + this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); + } else { +#ifdef IS_X86 + if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "cpu/")) { + this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); +#else + if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "armv8_")) { + this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); +#endif + } else { + if(useBpf){ + this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, flags); + }else { + this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, flags); + } + } + groupStatus = GroupStatus::NO_GROUP; + } + this->groupFd = groupFd; + printf("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", + attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd); + if (__glibc_unlikely(this->fd < 0)) { + return MapErrno(errno); + } + return SUCCESS; +} + +/** + * Enable + */ +int KUNPENG_PMU::PerfCounter::Enable() +{ + if (groupFd != -1) { + // Only group leader should use ioctl to enable, disable or reset, + // otherwise each event in the group will be collected for different durations. + return SUCCESS; + } + int err = PerfEvt::Enable(); + if (err != SUCCESS) { + return err; + } + this->accumCount.clear(); + this->enabled = 0; + this->running = 0; + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::Disable() +{ + if (groupFd != -1) { + return SUCCESS; + } + return PerfEvt::Disable(); +} + +int KUNPENG_PMU::PerfCounter::Reset() +{ + if (groupFd != -1) { + return SUCCESS; + } + return PerfEvt::Reset(); +} \ No newline at end of file diff --git a/pmu/perf_counter_default.cpp b/pmu/perf_counter_default.cpp new file mode 100644 index 0000000..8a92fea --- /dev/null +++ b/pmu/perf_counter_default.cpp @@ -0,0 +1,287 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Gan + * Create: 2024-04-03 + * Description: implementations for reading performance counters and initializing counting logic in + * the KUNPENG_PMU namespace. + ******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include "pmu.h" +#include "linked_list.h" +#include "pfm_event.h" +#include "pmu_event.h" +#include "pcerr.h" +#include "log.h" +#include "perf_counter.h" + +using namespace std; +using namespace pcerr; + +static constexpr int MAX_ATTR_SIZE = 120; + +struct GroupReadFormat { + __u64 nr; + __u64 timeEnabled; + __u64 timeRunning; + struct { + __u64 value; + __u64 id; + } values[]; +}; + +/** + * Read pmu counter and deal with pmu multiplexing + * Right now we do not implement grouping logic, thus we ignore the + * PERF_FORMAT_ID section for now + */ +int KUNPENG_PMU::PerfCounter::Read(vector &data, std::vector &sampleIps, + std::vector &extPool, std::vector &swtichData) +{ + if (__glibc_unlikely(this->fd < 0)) { + this->accumCount.clear(); + return UNKNOWN_ERROR; + } + + if (groupStatus == GroupStatus::NO_GROUP) { + return ReadSingleEvent(data); + } else if (groupStatus == GroupStatus::GROUP_LEADER) { + return ReadGroupEvents(data); + } + + // Group members do not need to read counters, + // Group leader will read them all. + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector &data) +{ + ReadFormat perfCountValue; + int len = read(this->fd, &perfCountValue, sizeof(perfCountValue)); + if (len < 0) { + New(UNKNOWN_ERROR, strerror(errno)); + return UNKNOWN_ERROR; + } + if (accumCount.empty()) { + accumCount.assign(1, 0); + } + + int err = CountValueToData(perfCountValue.value, perfCountValue.timeEnabled, + perfCountValue.timeRunning, accumCount[0], data); + if (err != SUCCESS) { + return err; + } + + this->enabled = perfCountValue.timeEnabled; + this->running = perfCountValue.timeRunning; + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector &data) +{ + // Fixme: + // In current class, we do not know how many events in group. + // Then we read for max struct size: nr+timeEnabled+timeRunning+ MAX_GROUP_EVENTS*(value+id) + static const unsigned MAX_GROUP_EVENTS = 14; + unsigned readSize = sizeof(__u64)*3 + sizeof(__u64)*2*MAX_GROUP_EVENTS; + GroupReadFormat *perfCountValue = static_cast(malloc(readSize)); + if (perfCountValue == NULL) { + return COMMON_ERR_NOMEM; + } + int len = read(this->fd, perfCountValue, readSize); + if (len < 0) { + free(perfCountValue); + New(UNKNOWN_ERROR, strerror(errno)); + return UNKNOWN_ERROR; + } + + if (accumCount.empty()) { + accumCount.assign(perfCountValue->nr, 0); + } + + for (int i = 0;i < accumCount.size(); ++i) { + auto err = CountValueToData(perfCountValue->values[i].value, + perfCountValue->timeEnabled, + perfCountValue->timeRunning, + accumCount[i], + data + ); + if (err != SUCCESS) { + free(perfCountValue); + return err; + } + } + + this->enabled = perfCountValue->timeEnabled; + this->running = perfCountValue->timeRunning; + free(perfCountValue); + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 timeEnabled, + const __u64 timeRunning, __u64 &accumCount, vector &data) +{ + if (value < accumCount || timeEnabled < enabled || timeRunning < running) { + return LIBPERF_ERR_COUNT_OVERFLOW; + } + + // Calculate the diff of count from last read. + // In case of multiplexing, we follow the linux documentation for calculating the estimated + // counting value (https://perf.wiki.kernel.org/index.php/Tutorial) + double percent = 0.0; + uint64_t increCount; + if ((value == accumCount) || (timeRunning == running)) { + percent = -1; + increCount = 0; + } else { + percent = static_cast(timeEnabled - enabled) / static_cast(timeRunning - running); + increCount = static_cast((value - accumCount)* percent); + } + accumCount = value; + + data.emplace_back(PmuData{0}); + auto& current = data.back(); + current.count = increCount; + current.countPercent = 1.0 / percent; + current.cpu = this->cpu; + current.tid = this->pid; + auto findProc = procMap.find(current.tid); + if (findProc != procMap.end()) { + current.pid = findProc->second->pid; + } + return SUCCESS; +} + +/** + * Initialize counting + */ +int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd) +{ + if(useBpf){ + InitPidForEvent(); + auto findCpuMap = evtCpuMap.find(this->evt->name); + if(findCpuMap!=evtCpuMap.end() && findCpuMap->second.find(this->cpu) != findCpuMap->second.end()) { + return SUCCESS; + } + } + + int err = this->MapPerfAttr(groupEnable, groupFd); + if(err!=SUCCESS){ + return err; + } + if(useBpf){ + err = InitBpfObj(); + } + return err; +} + +int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int groupFd) +{ + /** + * For now, we only implemented the logic for CORE type events. Support for UNCORE PMU events will be + * added soon + */ + struct perf_event_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = this->evt->type; + attr.config = this->evt->config; + attr.config1 = this->evt->config1; + attr.config2 = this->evt->config2; + + /** + * We want to set the disabled and inherit bit to collect child processes + */ + attr.disabled = 1; + //attr.inherit = 1; + + /** + * For now we set the format id bit to implement grouping logic in the future + */ + attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; + if (groupEnable) { + /* + * when creating an event group, typically the group leader is initialized with disabled bit set to 1, + * and any child events are initialized with disabled bit set to 0. Despite disabled bit being set to 0, + * the child events will not start counting until the group leader is enabled. + */ + + if (groupFd != -1) { + attr.disabled = 0; + groupStatus = GroupStatus::GROUP_MEMBER; + } else { + groupStatus = GroupStatus::GROUP_LEADER; + } + attr.read_format |= PERF_FORMAT_GROUP; + this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); + } else { +#ifdef IS_X86 + if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "cpu/")) { + this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); +#else + if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "armv8_")) { + this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); +#endif + } else { + this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, flags); + } + groupStatus = GroupStatus::NO_GROUP; + } + this->groupFd = groupFd; + printf("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", + attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd); + if (__glibc_unlikely(this->fd < 0)) { + return MapErrno(errno); + } + return SUCCESS; +} + +/** + * Enable + */ +int KUNPENG_PMU::PerfCounter::Enable() +{ + if (groupFd != -1) { + // Only group leader should use ioctl to enable, disable or reset, + // otherwise each event in the group will be collected for different durations. + return SUCCESS; + } + int err = PerfEvt::Enable(); + if (err != SUCCESS) { + return err; + } + this->accumCount.clear(); + this->enabled = 0; + this->running = 0; + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::Disable() +{ + if (groupFd != -1) { + return SUCCESS; + } + return PerfEvt::Disable(); +} + +int KUNPENG_PMU::PerfCounter::Reset() +{ + if (groupFd != -1) { + return SUCCESS; + } + return PerfEvt::Reset(); +} \ No newline at end of file diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 30de9ef..d394a30 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -807,10 +807,19 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt taskParam->cpuList[i] = pmuEvt->cpuMaskList[i]; } } else if (attr->cpuList == nullptr && attr->pidList != nullptr && pmuEvt->collectType == COUNTING) { - // For counting with pid list for system wide, open fd with cpu -1 and specific pid. - taskParam->numCpu = 1; - taskParam->cpuList = new int[taskParam->numCpu]; - taskParam->cpuList[0] = -1; + if(useBpf) { + taskParam->numCpu = MAX_CPU_NUM; + taskParam->cpuList = new int[MAX_CPU_NUM]; + for(int i = 0; i < MAX_CPU_NUM; i++) { + taskParam->cpuList[i] = i; + } + } else { + // For counting with pid list for system wide, open fd with cpu -1 and specific pid. + taskParam->numCpu = 1; + taskParam->cpuList = new int[taskParam->numCpu]; + taskParam->cpuList[0] = -1; + } + } else if (attr->cpuList == nullptr) { // For null cpulist, open fd with cpu 0,1,2...max_cpu const set &onLineCpus = GetOnLineCpuIds(); diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 9e8feb0..de1868e 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -179,10 +179,10 @@ namespace KUNPENG_PMU { return err; } - err = AddToEpollFd(pd, evtList); - if (err != SUCCESS) { - return err; - } + // err = AddToEpollFd(pd, evtList); + // if (err != SUCCESS) { + // return err; + // } return SUCCESS; } diff --git a/pmu/sched_counter.bpf.c b/pmu/sched_counter.bpf.c new file mode 100644 index 0000000..76cd700 --- /dev/null +++ b/pmu/sched_counter.bpf.c @@ -0,0 +1,93 @@ +#include +#include +#include +#include + +char LICENSE SEC("license") = "GPL"; //指定协议 内核验证器 + +#define MAX_ENTRIES 102400 + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); //与用户空间实时交互 + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(int)); + __uint(map_flags, BPF_F_PRESERVE_ELEMS); +} events SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} prev_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1024); +} accum_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); +} filter SEC(".maps"); + +//程序的hook点,编写自定义代码逻辑 +SEC("raw_tp/sched_switch") +int BPF_PROG(on_switch) { + __u32 pid; + __u32 zero=0; + __u32 *accum_key; + __u32 cpu = bpf_get_smp_Processor_id(); + long err; + struct bpf_perf_event_value cur_val, *prev_val, *accum_val; + + prev_val = bpf_map_lookup_elem(&prev_readings, &zero); + if(!prev_val){ + bpf_printk("failed to bpf_map_lookup_elem prev_readings.\n"); + return 0; + } + + err = bpf_perf_event_read_value(&events, BPF_F_CURRENT_CPU, &cur_val, sizeof(struct bpf_perf_event_value)); + if(err){ + bpf_printk("failed to bpf_event_read_value: %d cpu %d\n", err, cpu); + return 0; + } + pid = bpf_get_current_pid_tgid() & 0xffffffff; + accum_key = bpf_map_lookup_elem(&filter, &pid); + if (!accunm_key) { + return 0; + } + + accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + if (!accum_val) { + *prev_val = cur_val; + return 0; + } +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags){ + long err; + __u32 new_pid; + __u32 parent_pid; + __u32 *accum_key; + struct bpf_perf_event_value *accum_val; + + parent_pid = bpf_get_current_pid_tgid() & 0xffffffff; + new_pid = task->pid; + + bpf_printk("new pid: %d parent: %d\n",new_pid,parent_pid); + accum_key = bpf_map_lookup_elem(&filter, &parent_pid); + if(!accum_key){ + return 0; + } + + bpf_map_update_elem(&filter, &new_pid, accum_key, BPF_NOEXIST); + bpf_printk("add child: %ld accum_key: %ld\n",new_pid, *accum_key); + return 0; +} \ No newline at end of file diff --git a/util/common.h b/util/common.h index caa52a3..3e756eb 100644 --- a/util/common.h +++ b/util/common.h @@ -28,6 +28,8 @@ #error "Only the x86_64 and aarch64 architecture are supported." #endif +static bool useBpf = true; + const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/"; const std::string TRACE_DEBUG_EVENT_PATH = "/sys/kernel/debug/tracing/events/"; -- Gitee From 679f33344763d1d81555931eb1f6e9109feddd49 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Thu, 31 Jul 2025 14:30:09 +0800 Subject: [PATCH 2/6] revise bpf --- build/common.sh | 4 +- pmu/bpf/perf_counter_bpf.cpp | 64 +++++++++-------------- pmu/bpf/sched_cgroup.bpf.c | 97 +++++++++++------------------------ pmu/bpf/sched_counter.bpf.c | 27 ++++------ pmu/pmu.cpp | 27 +++------- pmu/pmu_event.h | 1 + pmu/pmu_list.cpp | 54 +++++++++---------- pmu/pmu_list.h | 18 ++++--- test/test_perf/CMakeLists.txt | 3 +- util/common.h | 9 ++-- 10 files changed, 118 insertions(+), 186 deletions(-) diff --git a/build/common.sh b/build/common.sh index d1fc3f1..6dc24f3 100644 --- a/build/common.sh +++ b/build/common.sh @@ -103,8 +103,8 @@ function build_skel_files() { local bpf_file_dir=$1 local bpf_lib_dir=$2 - bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}/vmlinux.h" - if [ -s "${bpf_lib_dir}vmlinux.h" ]; then + bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}local/bpf/vmlinux.h" + if [ -s "${bpf_lib_dir}local/bpf/vmlinux.h" ]; then echo "The kernel header file generated : $(wc -l < "${VMLINUX_H}")" else echo "Generate vmlinux.h file failed." diff --git a/pmu/bpf/perf_counter_bpf.cpp b/pmu/bpf/perf_counter_bpf.cpp index 03f2f12..ae9714b 100644 --- a/pmu/bpf/perf_counter_bpf.cpp +++ b/pmu/bpf/perf_counter_bpf.cpp @@ -183,7 +183,7 @@ int KUNPENG_PMU::PerfBpfCounter::Read(EventData &eventData) } int KUNPENG_PMU::PerfBpfCounter::ReadCgroup(std::vector &data) { - auto cgrpName = fdCgrp[pid]; + auto cgrpName = this->evt->cgroupName; if (readCgroups.find(cgrpName) != readCgroups.end()) { return SUCCESS; } @@ -212,6 +212,7 @@ int KUNPENG_PMU::PerfBpfCounter::ReadCgroup(std::vector &data) { current.countPercent = values[i].running / values[i].enabled; current.cpu = i; current.tid = this->pid; + current.cgroupName = this->evt->cgroupName.c_str(); } memset(values, 0, cpuNums * sizeof(bpf_perf_event_value)); @@ -244,40 +245,19 @@ int KUNPENG_PMU::PerfBpfCounter::InitPidForEvent() // initialize the cumulative pmu count for this pid struct bpf_perf_event_value evtVal[MAX_CPU_NUM]; - if (!this->evt->cgroupName.empty()){ - string cgroupPath = GetCgroupPath(this->evt->cgroupName); - auto pids = GetCgroupPids(cgroupPath); - for (pid_t pid : pids) { - memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value)); - int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, - sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST); - if(err){ - printf("failed to bpf_map__update_elem event value in libbpf_print_fn. err: d \n", err); - return -1; - } - - // initialize the filter, build the map relationship of pid and accum_key - err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST); - if(err){ - printf("failed to bpf_map__update_elem filter. err: %d \n", err); - return -1; - } - } - } else { - memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value)); - int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, - sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST); - if(err){ - printf("failed to bpf_map__update_elem event value in libbpf_print_fn. err: d \n", err); - return -1; - } + memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value)); + int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, + sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST); + if(err){ + printf("failed to bpf_map__update_elem event value in libbpf_print_fn. err: d \n", err); + return -1; + } - // initialize the filter, build the map relationship of pid and accum_key - err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST); - if(err){ - printf("failed to bpf_map__update_elem filter. err: %d \n", err); - return -1; - } + // initialize the filter, build the map relationship of pid and accum_key + err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST); + if(err){ + printf("failed to bpf_map__update_elem filter. err: %d \n", err); + return -1; } printf("InitPidForEvent: %d\n", pid); @@ -364,6 +344,7 @@ int KUNPENG_PMU::PerfBpfCounter::InitBpfCgroupObj() } obj->rodata->num_cpus = MAX_CPU_NUM; + obj->rodata->num_events = this->evt->numEvent; err = bpf_map__set_max_entries(obj->maps.events, MAX_ENTITES); if(err){ @@ -395,12 +376,8 @@ int KUNPENG_PMU::PerfBpfCounter::InitBpfCgroupObj() return -1; } - if (CheckCgroupV2()) { - obj->bss->use_cgroup_v2 = 1; - } - err = sched_cgroup_bpf__attach(obj); - if(err){ + if(err) { printf("failed to bpf attach"); return -1; } @@ -415,8 +392,15 @@ int KUNPENG_PMU::PerfBpfCounter::InitBpfCgroupObj() evtIdxMap[evt->name] = evtIdx; evtIdx++; } + int evtKey = evtIdxMap[evt->name] * MAX_CPU_NUM + cpu; + err = bpf_map__update_elem(cgrpCounter->maps.events, &evtKey, sizeof(__u32), + &this->fd, sizeof(int), BPF_ANY); + if(err){ + printf("failed to update elem. err %s cpu %d fd %d\n", strerror(-err), cpu, fd); + return -1; + } - string cgrpName = fdCgrp[this->pid]; + string cgrpName = this->evt->cgroupName; auto findCgrp = cgroups.find(cgrpName); if(findCgrp == cgroups.end()) { uint64_t cgrpId = ReadCgroupId(cgrpName); diff --git a/pmu/bpf/sched_cgroup.bpf.c b/pmu/bpf/sched_cgroup.bpf.c index accfafe..08183da 100644 --- a/pmu/bpf/sched_cgroup.bpf.c +++ b/pmu/bpf/sched_cgroup.bpf.c @@ -51,10 +51,7 @@ struct { const volatile __u32 num_events = 1; const volatile __u32 num_cpus = 1; -int enabled = 0; -int use_cgroup_v2 = 0; - -static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) +static inline int get_cgroup_idx(__u32 *cgrps, int size) { struct task_struct *p = (void *)bpf_get_current_task(); struct cgroup *cgrp; @@ -65,6 +62,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_event_cgrp_id], cgroup); level = BPF_CORE_READ(cgrp, level); + bpf_printk("perf_event_cgrp_id %d cgrp_id %d level %d", perf_event_cgrp_id, cgrp, level); for (cnt = 0; i < MAX_LEVELS; i++) { __u64 cgrp_id; @@ -80,9 +78,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) bpf_printk("cannot get ancestor_ids"); return 0; } - if (cgrp_id==33) { - bpf_printk("print before level: %d cgrp id: %ld\n", i ,cgrp_id); - } + bpf_printk("cgrp_id %d level %d", cgrp_id, level); elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id); if (!elem) { continue; @@ -93,35 +89,10 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) break; } } - + bpf_printk("cnt %d", cnt); return cnt; } -static inline int get_cgroup_v2_idx(__u32 *cgrps, int size) -{ - // register int i = 0; - // __u32 *elem; - // int cnt; - - // for (cnt = 0; i < MAX_LEVELS; i++) { - // __u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i); - - // if (cgrp_id == 0) - // break; - - // // convert cgroup-id to a map index - // elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id); - // if (!elem) - // continue; - - // cgrps[cnt++] = *elem; - // if (cnt == size) - // break; - // } - - return 0; -} - static int bperf_cgroup_count(void) { register __u32 idx = 0; // to have it in a register to pass BPF verifier @@ -133,13 +104,7 @@ static int bperf_cgroup_count(void) __u32 key, cgrp; long err; - if (use_cgroup_v2) { - bpf_printk("-------cgroup v2--------------"); - cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS); - } else { - bpf_printk("-------cgroup v1--------------"); - cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS); - } + cgrp_cnt = get_cgroup_idx(cgrp_idx, MAX_LEVELS); for ( ; idx < MAX_EVENTS; idx++) { bpf_printk("idx: %d num_events: %d", idx, num_events); @@ -161,7 +126,7 @@ static int bperf_cgroup_count(void) return 0; } } - + bpf_printk("prev_val counting: %ld prev_val enabled: %ld\n", prev_val->counter, prev_val->enabled); // read from global perf_event array key = idx * num_cpus + cpu; bpf_printk("key: %d", key); @@ -171,34 +136,30 @@ static int bperf_cgroup_count(void) continue; } - if (enabled) { - delta.counter = val.counter - prev_val->counter; - delta.enabled = val.enabled - prev_val->enabled; - delta.running = val.running - prev_val->running; - - for (c = 0; c < MAX_LEVELS; c++) { - if (c == cgrp_cnt) - break; - - cgrp = cgrp_idx[c]; - bpf_printk("c: %d cgrp: %d", c, cgrp); - // aggregate the result by cgroup - key = cgrp * num_events + idx; - cgrp_val = bpf_map_lookup_elem(&cgrp_readings, &key); - if (cgrp_val) { - cgrp_val->counter += delta.counter; - cgrp_val->enabled += delta.enabled; - cgrp_val->running += delta.running; - bpf_printk("cgrp_val counting: %ld cgrp_val counting: %ld\n", cgrp_val->counter, cgrp_val->counter); - } else { - bpf_printk("!cgrp_val"); - bpf_map_update_elem(&cgrp_readings, &key, - &delta, BPF_ANY); - } + delta.counter = val.counter - prev_val->counter; + delta.enabled = val.enabled - prev_val->enabled; + delta.running = val.running - prev_val->running; + bpf_printk("val counting: %ld val enabled: %ld\n", val.counter, val.enabled); + bpf_printk("delta counting: %ld delta enabled: %ld\n", delta.counter, delta.enabled); + for (c = 0; c < MAX_LEVELS; c++) { + if (c == cgrp_cnt) + break; + + cgrp = cgrp_idx[c]; + // aggregate the result by cgroup + key = cgrp * num_events + idx; + bpf_printk("c: %d cgrp: %d key: %d", c, cgrp, key); + cgrp_val = bpf_map_lookup_elem(&cgrp_readings, &key); + if (cgrp_val) { + cgrp_val->counter += delta.counter; + cgrp_val->enabled += delta.enabled; + cgrp_val->running += delta.running; + bpf_printk("cgrp_val counting: %ld cgrp_val counting: %ld\n", cgrp_val->counter, cgrp_val->counter); + } else { + bpf_printk("!cgrp_val"); + bpf_map_update_elem(&cgrp_readings, &key, + &delta, BPF_ANY); } - } else { - bpf_printk("not enabled"); - return 0; } *prev_val = val; diff --git a/pmu/bpf/sched_counter.bpf.c b/pmu/bpf/sched_counter.bpf.c index 0bf418c..917cb01 100644 --- a/pmu/bpf/sched_counter.bpf.c +++ b/pmu/bpf/sched_counter.bpf.c @@ -1,21 +1,22 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook +// Copyright (c) 2021 Google #include "bpf/vmlinux.h" #include "bpf/bpf_core_read.h" #include "bpf/bpf_tracing.h" #include "bpf/bpf_helpers.h" -char LICENSE[] SEC("license") = "GPL"; //license for kernel verification +char LICENSE[] SEC("license") = "GPL"; #define MAX_ENTRIES 102400 -// system pmu count. key: pid, value : count of each core struct { - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); // map for perfbuffer + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(key_size, sizeof(__u32)); __uint(value_size, sizeof(int)); __uint(map_flags, BPF_F_PRESERVE_ELEMS); } events SEC(".maps"); -//system pmu count at last time sched_switch was triggered struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(__u32)); @@ -23,8 +24,6 @@ struct { __uint(max_entries, 1); } prev_readings SEC(".maps"); -// accumulated pmu count of pid. key: accum_key, value: count of each core -// If the pid spawns a child process/thread, both use the same accum key and their pmu events sum under this pid's profile struct { __uint(type, BPF_MAP_TYPE_PERCPU_HASH); __uint(key_size, sizeof(__u32)); @@ -32,7 +31,6 @@ struct { __uint(max_entries, 1024); } accum_readings SEC(".maps"); -// check whether to record pmu value. key: pid, value: accum_key struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(__u32)); @@ -43,11 +41,10 @@ struct { SEC("raw_tp/sched_switch") int BPF_PROG(on_switch) { - bpf_printk("----------------sched_switch------------"); __u32 pid; - __u32 zero = 0; + __u32 zero=0; __u32 *accum_key; - __u32 cpu = bpf_get_smp_processor_id(); // get current cpu + __u32 cpu = bpf_get_smp_processor_id(); long err; struct bpf_perf_event_value cur_val, *prev_val, *accum_val; @@ -57,25 +54,19 @@ int BPF_PROG(on_switch) { return 0; } - // get pmu value by API of bpf err = bpf_perf_event_read_value(&events, BPF_F_CURRENT_CPU, &cur_val, sizeof(struct bpf_perf_event_value)); if(err){ bpf_printk("failed to bpf_event_read_value: %d cpu %d\n", err, cpu); return 0; } - - pid = bpf_get_current_pid_tgid() & 0xffffffff; // get current pid - bpf_printk("bpf_get_current_pid_tgid: %ld pid: %d cpu: %d\n", bpf_get_current_pid_tgid(), pid, cpu); - + pid = bpf_get_current_pid_tgid() & 0xffffffff; accum_key = bpf_map_lookup_elem(&filter, &pid); - if (!accum_val) { - bpf_printk("!accum_key\n"); + if (!accum_key) { return 0; } accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); if (!accum_val) { - bpf_printk("!accum_val\n"); *prev_val = cur_val; return 0; } diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 8784d37..3389a54 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -335,8 +335,6 @@ static void CopyAttrData(PmuAttr* newAttr, PmuAttr* inputAttr, enum PmuTaskType } } -std::map fdCgrp; - static bool FreeEvtAttr(struct PmuAttr *attr) { if (attr->evtAttr == nullptr) { @@ -909,23 +907,13 @@ int GetCgroupFd(std::string& cgroupName) { static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int groupId, const char* cgroupName, int cgroupFd) { unique_ptr taskParam(CreateNode(), PmuTaskAttrFree); - - if(attr->cgroupNameList) { - taskParam->numPid = 1; - taskParam->pidList = new int[1]; - string cgroupNameStr = cgroupName; - taskParam->pidList[0] = GetCgroupFd(cgroupNameStr); - fdCgrp[taskParam->pidList[0]] = cgroupNameStr; - std::cout<<"pid: "<< taskParam->pidList[0]<< "cgroupName: "<numPid = attr->numPid; - taskParam->pidList = new int[attr->numPid]; - for (int i = 0; i < attr->numPid; i++) { - taskParam->pidList[i] = attr->pidList[i]; - } + /** + * Assign pids to collect + */ + taskParam->numPid = attr->numPid; + taskParam->pidList = new int[attr->numPid]; + for (int i = 0; i < attr->numPid; i++) { + taskParam->pidList[i] = attr->pidList[i]; } PmuEvt* pmuEvt = nullptr; @@ -968,6 +956,7 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att if (cgroupName != nullptr) { taskParam->pmuEvt->cgroupName = cgroupName; } + taskParam->pmuEvt->numEvent = attr->numEvt; return taskParam.release(); } diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h index 557b7a8..88aed9d 100644 --- a/pmu/pmu_event.h +++ b/pmu/pmu_event.h @@ -51,6 +51,7 @@ struct PmuEvt { unsigned includeNewFork : 1; // count new fork tid int cgroupFd; std::string cgroupName; + unsigned numEvent; }; namespace KUNPENG_PMU { diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index f41aed6..b046cc1 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -151,16 +151,16 @@ namespace KUNPENG_PMU { fdNum += CalRequireFd(cpuTopoList.size(), procTopoList.size(), taskParam->pmuEvt->collectType); needBytesNum += PredictRequiredMemory(taskParam->pmuEvt->collectType, cpuTopoList.size(), procTopoList.size()); - if(useBpf){ + #ifdef BPF_ENABLED std::shared_ptr evtList = - std::make_shared(cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt); - InsertBpfEvtList(pd, evtList); - }else { + std::make_shared(cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt); + InsertBpfEvtList(pd, evtList); + #else std::shared_ptr evtList = std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId); evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd)); InsertEvtList(pd, evtList); - } + #endif pmuTaskAttrHead = pmuTaskAttrHead->next; } @@ -194,6 +194,7 @@ namespace KUNPENG_PMU { return SUCCESS; } +#ifdef BPF_ENABLED int PmuList::EvtBpfInit(const bool groupEnable, const std::shared_ptr evtLeader, const int pd, const std::shared_ptr &evtList, bool isMemoryEnough) { auto err = evtList->Init(groupEnable, evtLeader, isMemoryEnough); @@ -203,11 +204,24 @@ namespace KUNPENG_PMU { return SUCCESS; } + void PmuList::InsertBpfEvtList(const unsigned pd, std::shared_ptr evtList) + { + lock_guard lg(pmuListMtx); + pmuBpfList[pd].push_back(evtList); + } + + std::vector>& PmuList::GetBpfEvtList(const unsigned pd) + { + lock_guard lg(pmuListMtx); + return pmuBpfList[pd]; + } +#endif + int PmuList::Init(const int pd, bool isMemoryEnough) { std::unordered_map eventGroupInfoMap; - if(useBpf) { + #ifdef BPF_ENABLED for (auto& evtList : GetBpfEvtList(pd)) { auto err = EvtBpfInit(false, nullptr, pd, evtList, isMemoryEnough); if (err != SUCCESS) { @@ -215,7 +229,7 @@ namespace KUNPENG_PMU { } continue; } - } + #endif for (auto& evtList : GetEvtList(pd)) { if (evtList->GetGroupId() == -1) { @@ -270,7 +284,7 @@ namespace KUNPENG_PMU { int PmuList::Start(const int pd) { - if(useBpf) { + #ifdef BPF_ENABLED auto pmuList = GetBpfEvtList(pd); for (auto item: pmuList) { auto err = item->Start(); @@ -278,7 +292,7 @@ namespace KUNPENG_PMU { return err; } } - } else { + #else auto pmuList = GetEvtList(pd); for (auto item: pmuList) { auto err = item->Start(); @@ -286,8 +300,7 @@ namespace KUNPENG_PMU { return err; } } - } - + #endif return SUCCESS; } @@ -440,7 +453,7 @@ namespace KUNPENG_PMU { evtData.pd = pd; evtData.collectType = static_cast(GetTaskType(pd)); auto ts = GetCurrentTime(); - if(useBpf) { + #ifdef BPF_ENABLED auto eventList = GetBpfEvtList(pd); for (auto item: eventList) { item->SetTimeStamp(ts); @@ -449,7 +462,7 @@ namespace KUNPENG_PMU { return err; } } - } else { + #else auto eventList = GetEvtList(pd); for (auto item: eventList) { item->SetTimeStamp(ts); @@ -458,8 +471,7 @@ namespace KUNPENG_PMU { return err; } } - } - + #endif return SUCCESS; } @@ -626,18 +638,6 @@ namespace KUNPENG_PMU { return pmuList[pd]; } - void PmuList::InsertBpfEvtList(const unsigned pd, std::shared_ptr evtList) - { - lock_guard lg(pmuListMtx); - pmuBpfList[pd].push_back(evtList); - } - - std::vector>& PmuList::GetBpfEvtList(const unsigned pd) - { - lock_guard lg(pmuListMtx); - return pmuBpfList[pd]; - } - void PmuList::EraseEvtList(const unsigned pd) { lock_guard lg(pmuListMtx); diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h index 30a7091..b5e7dc8 100644 --- a/pmu/pmu_list.h +++ b/pmu/pmu_list.h @@ -23,8 +23,9 @@ #include "evt_list.h" #include "pmu_event.h" #include "evt_list_default.h" -#include "bpf/evt_list_bpf.h" - +#ifdef BPF_ENABLED + #include "bpf/evt_list_bpf.h" +#endif namespace KUNPENG_PMU { struct PmuTaskAttr { @@ -94,17 +95,20 @@ private: void InsertEvtList(const unsigned pd, std::shared_ptr evtList); std::vector>& GetEvtList(const unsigned pd); - void InsertBpfEvtList(const unsigned pd, std::shared_ptr evtList); - std::vector>& GetBpfEvtList(const unsigned pd); - void EraseEvtList(const unsigned pd); void EraseParentEventMap(const unsigned pd); void EraseSymModeList(const unsigned pd); void ErasePpidList(const unsigned pd); int EvtDefaultInit(const bool groupEnable, const std::shared_ptr evtLeader, const int pd, const std::shared_ptr &evtList, bool isMemoryEnough); + +#ifdef BPF_ENABLED + void InsertBpfEvtList(const unsigned pd, std::shared_ptr evtList); + std::vector>& GetBpfEvtList(const unsigned pd); int EvtBpfInit(const bool groupEnable, const std::shared_ptr evtLeader, const int pd, const std::shared_ptr &evtList, bool isMemoryEnough); - + std::unordered_map>> pmuBpfList; +#endif + int Init(const int pd, bool isMemoryEnough); void InsertDataEvtGroupList(const unsigned pd, groupMapPtr evtGroupList); @@ -147,7 +151,7 @@ private: static std::mutex dataEvtGroupListMtx; static std::mutex dataParentMtx; std::unordered_map>> pmuList; - std::unordered_map>> pmuBpfList; + // Key: pd // Value: PmuData List. // PmuData is stored here before user call . diff --git a/test/test_perf/CMakeLists.txt b/test/test_perf/CMakeLists.txt index 1eb6f63..e28f490 100644 --- a/test/test_perf/CMakeLists.txt +++ b/test/test_perf/CMakeLists.txt @@ -4,11 +4,12 @@ include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu) include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/pfm) include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/analyzer/metric) include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/decoder) +include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/bpf) include_directories(${PROJECT_TOP_DIR}/include) add_compile_options(-g) set(CMAKE_CXX_STANDARD 14) aux_source_directory(. SOURCE_SRC) add_executable(test_perf ${SOURCE_SRC} ${CMAKE_CURRENT_LIST_DIR}/../../util/pcerr.cpp) -target_link_libraries(test_perf sym kperf gtest m gtest_main elf_static dwarf_static pthread -g) +target_link_libraries(test_perf sym kperf gtest m gtest_main elf_static dwarf_static pthread bpf -g) add_subdirectory(case) diff --git a/util/common.h b/util/common.h index c395f0b..365ad78 100644 --- a/util/common.h +++ b/util/common.h @@ -29,12 +29,13 @@ #error "Only the x86_64 and aarch64 architecture are supported." #endif -static bool useBpf = false; -#ifdef USE_BPF - useBpf = true; +static bool useBpf = +#ifdef BPF_ENABLED + true; +#else + false; #endif -extern std::map fdCgrp; #define CGROUP2_SUPER_MAGIC 0x63677270 const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/"; -- Gitee From fa87de82fc4798581926764b2e74be07f7e6b90e Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Wed, 6 Aug 2025 10:35:56 +0800 Subject: [PATCH 3/6] bpf dynamic --- include/pmu.h | 2 ++ pmu/bpf/evt_list_bpf.h | 5 +++++ pmu/pmu.cpp | 4 ++-- pmu/pmu_event.h | 1 + pmu/pmu_list.cpp | 16 ++++++---------- util/common.h | 7 ------- 6 files changed, 16 insertions(+), 19 deletions(-) diff --git a/include/pmu.h b/include/pmu.h index 89aed63..cf2d858 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -176,6 +176,8 @@ struct PmuAttr { // enable user access counting for current process unsigned enableUserAccess : 1; + //enable bpf collecting for counting mode + unsigned useBpf : 1; }; enum PmuTraceType { diff --git a/pmu/bpf/evt_list_bpf.h b/pmu/bpf/evt_list_bpf.h index 482c37d..10ae130 100644 --- a/pmu/bpf/evt_list_bpf.h +++ b/pmu/bpf/evt_list_bpf.h @@ -66,6 +66,11 @@ public: return pmuEvt->collectType; } + int GetUseBpf() const + { + return pmuEvt.useBpf; + } + private: using PerfEvtPtr = std::shared_ptr; std::unordered_map procMap; diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 8858db0..2442e85 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -913,8 +913,8 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt for(int i = 0; i < pmuEvt->cpuMaskList.size(); i++) { taskParam->cpuList[i] = pmuEvt->cpuMaskList[i]; } - } else if (attr->cpuList == nullptr && (attr->pidList != nullptr || attr->cgroupNameList) && pmuEvt->collectType == COUNTING) { - if(useBpf || attr->cgroupNameList) { + } else if (attr->cpuList == nullptr && attr->pidList != nullptr && pmuEvt->collectType == COUNTING) { //(attr->pidList != nullptr || attr->cgroupNameList) + if(attr->useBpf) { //|| attr->cgroupNameList taskParam->numCpu = MAX_CPU_NUM; taskParam->cpuList = new int[MAX_CPU_NUM]; for(int i = 0; i < MAX_CPU_NUM; i++) { diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h index 520d057..f9dbbdf 100644 --- a/pmu/pmu_event.h +++ b/pmu/pmu_event.h @@ -52,6 +52,7 @@ struct PmuEvt { int cgroupFd; std::string cgroupName; unsigned numEvent; + unsigned useBpf : 1; // bpf mode for counting }; namespace KUNPENG_PMU { diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 9d59dad..4695352 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -88,7 +88,7 @@ namespace KUNPENG_PMU { fdNum += CalRequireFd(cpuTopoList.size(), procTopoList.size(), taskParam->pmuEvt->collectType); #ifdef BPF_ENABLED - if (taskParam->pmuEvt->collectType == COUNTING) { + if (taskParam->pmuEvt->collectType == COUNTING && taskParam->pmuEvt.useBpf) { std::shared_ptr evtList = std::make_shared(cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt); InsertBpfEvtList(pd, evtList); @@ -1023,10 +1023,8 @@ namespace KUNPENG_PMU { int masterPid = pmuTaskAttrHead->pidList[i]; int numChild = 0; int* childTidList = GetChildTid(masterPid, &numChild); - if (!useBpf || pmuTaskAttrHead->pmuEvt->cgroupName.empty()) { - if (childTidList == nullptr) { - return LIBPERF_ERR_INVALID_PID; - } + if (childTidList == nullptr) { + return LIBPERF_ERR_INVALID_PID; } bool foundProc = false; for (int j = 0; j < numChild; j++) { @@ -1041,11 +1039,9 @@ namespace KUNPENG_PMU { procTopoList.emplace_back(shared_ptr(procTopo, FreeProcTopo)); } delete[] childTidList; - if (!useBpf || pmuTaskAttrHead->pmuEvt->cgroupName.empty()) { - if (!foundProc) { - New(LIBPERF_ERR_FAIL_GET_PROC, "process not found: " + std::to_string(pmuTaskAttrHead->pidList[i])); - return LIBPERF_ERR_FAIL_GET_PROC; - } + if (!foundProc) { + New(LIBPERF_ERR_FAIL_GET_PROC, "process not found: " + std::to_string(pmuTaskAttrHead->pidList[i])); + return LIBPERF_ERR_FAIL_GET_PROC; } } return SUCCESS; diff --git a/util/common.h b/util/common.h index 78e1584..98dec4e 100644 --- a/util/common.h +++ b/util/common.h @@ -31,13 +31,6 @@ #error "Only the x86_64, aarch64, and riscv64 architectures are supported." #endif -static bool useBpf = -#ifdef BPF_ENABLED - true; -#else - false; -#endif - #define CGROUP2_SUPER_MAGIC 0x63677270 const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/"; -- Gitee From f7051b75e8cf1363ba00ffcf56f8609dc21d4066 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Wed, 6 Aug 2025 15:07:37 +0800 Subject: [PATCH 4/6] revise --- build/common.sh | 2 +- pmu/bpf/evt_list_bpf.cpp | 95 ++++++++++++-------------- pmu/bpf/evt_list_bpf.h | 13 ++-- pmu/bpf/perf_counter_bpf.cpp | 129 ++++++++++++----------------------- pmu/bpf/perf_counter_bpf.h | 25 ++----- pmu/bpf/sched_cgroup.bpf.c | 13 ++-- pmu/bpf/sched_counter.bpf.c | 8 +-- pmu/perf_counter.h | 1 + pmu/perf_counter_default.cpp | 2 +- 9 files changed, 110 insertions(+), 178 deletions(-) diff --git a/build/common.sh b/build/common.sh index 6dc24f3..4f725fc 100644 --- a/build/common.sh +++ b/build/common.sh @@ -105,7 +105,7 @@ function build_skel_files() { local bpf_lib_dir=$2 bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}local/bpf/vmlinux.h" if [ -s "${bpf_lib_dir}local/bpf/vmlinux.h" ]; then - echo "The kernel header file generated : $(wc -l < "${VMLINUX_H}")" + echo "The kernel header file generated." else echo "Generate vmlinux.h file failed." fi diff --git a/pmu/bpf/evt_list_bpf.cpp b/pmu/bpf/evt_list_bpf.cpp index 957ea19..7e1cc0b 100644 --- a/pmu/bpf/evt_list_bpf.cpp +++ b/pmu/bpf/evt_list_bpf.cpp @@ -39,11 +39,7 @@ int KUNPENG_PMU::EvtBpfList::CollectorDoTask(PerfEvtPtr collector, int task) case RESET: return collector->Reset(); case CLOSE: { - auto ret = collector->Close(); - if (ret == SUCCESS) { - fdList.erase(collector->GetFd()); - } - return ret; + return collector->Close(); } default: return UNKNOWN_ERROR; @@ -58,41 +54,44 @@ int KUNPENG_PMU::EvtBpfList::Init(const bool groupEnable, const std::shared_ptr< procMap[proc->tid] = proc; } } - for (unsigned int row = 0; row < numCpu; row++) { + + for (unsigned int cpu = 0; cpu < numCpu; cpu++) { int resetOutPutFd = -1; - std::vector evtVec{}; PerfEvtPtr perfEvt; - for(unsigned int col=0;col(this->cpuList[row]->coreId, this->pidList[col]->tid, this->pmuEvt.get(), procMap); - if (perfEvt == nullptr) { - continue; - } - - if (!evtVec.empty()) { - resetOutPutFd = evtVec[0]->GetFd(); - } - - int err = 0; - err = perfEvt->Init(groupEnable, -1, resetOutPutFd); - fdList.insert(perfEvt->GetFd()); - evtVec.emplace_back(perfEvt); + + perfEvt = std::make_shared(this->cpuList[cpu]->coreId, -1, this->pmuEvt.get(), procMap); + if (perfEvt == nullptr) { + continue; } - this->xyCounterArray.emplace_back(evtVec); - this->xyCounterArray[row].emplace_back(perfEvt); + int err = 0; + err = perfEvt->Init(groupEnable, -1, resetOutPutFd); + this->cpuCounterArray.emplace_back(perfEvt); + } + + for (unsigned int pid = 0; pid < numPid; pid++) { + int resetOutPutFd = -1; + PerfEvtPtr perfEvt; + + perfEvt = std::make_shared(-1, this->pidList[pid]->tid, this->pmuEvt.get(), procMap); + if (perfEvt == nullptr) { + continue; + } + + int err = 0; + err = perfEvt->Init(groupEnable, -1, resetOutPutFd); + this->pidCounterArray.emplace_back(perfEvt); } return SUCCESS; } -int KUNPENG_PMU::EvtBpfList::CollectorXYArrayDoTask(std::vector>& xyArray, int task) +int KUNPENG_PMU::EvtBpfList::CollectorTaskArrayDoTask(std::vector& taskArray, int task) { std::unique_lock lock(mutex); - for (auto row: xyArray) { - for (auto evt: row) { - auto err = CollectorDoTask(evt, task); - if (err != SUCCESS) { - return err; - } + for (auto evt: taskArray) { + auto err = CollectorDoTask(evt, task); + if (err != SUCCESS) { + return err; } } this->prevStat = this->evtStat; @@ -102,22 +101,22 @@ int KUNPENG_PMU::EvtBpfList::CollectorXYArrayDoTask(std::vectorxyCounterArray, START); + return CollectorTaskArrayDoTask(this->cpuCounterArray, START); } int KUNPENG_PMU::EvtBpfList::Enable() { - return CollectorXYArrayDoTask(this->xyCounterArray, ENABLE); + return CollectorTaskArrayDoTask(this->cpuCounterArray, ENABLE); } int KUNPENG_PMU::EvtBpfList::Stop() { - return CollectorXYArrayDoTask(this->xyCounterArray, STOP); + return CollectorTaskArrayDoTask(this->cpuCounterArray, STOP); } int KUNPENG_PMU::EvtBpfList::Close() { - auto ret = CollectorXYArrayDoTask(this->xyCounterArray, CLOSE); + auto ret = CollectorTaskArrayDoTask(this->cpuCounterArray, CLOSE); if (ret != SUCCESS) { return ret; } @@ -130,38 +129,34 @@ int KUNPENG_PMU::EvtBpfList::Read(EventData &eventData) { std::unique_lock lg(mutex); - for (unsigned int row = 0; row < numCpu; row++) { - for (unsigned int col = 0; col < numPid; col++) { - int err = this->xyCounterArray[row][col]->BeginRead(); - if (err != SUCCESS) { - return err; - } + for (unsigned int pid = 0; pid < numPid; pid++) { + int err = this->pidCounterArray[pid]->BeginRead(); + if (err != SUCCESS) { + return err; } } struct PmuEvtData* head = nullptr; int row = 0; auto cpuTopo = this->cpuList[row].get(); - for (unsigned int col = 0; col < numPid; col++) { + for (unsigned int pid = 0; pid < numPid; pid++) { auto cnt = eventData.data.size(); - int err = this->xyCounterArray[row][col]->Read(eventData); + int err = this->pidCounterArray[pid]->Read(eventData); if (err != SUCCESS) { return err; } if (eventData.data.size() - cnt) { - DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[col]->pid, + DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[pid]->pid, cpuTopo->coreId, eventData.data.size() - cnt); } // Fill event name and cpu topology. - FillFields(cnt, eventData.data.size(), cpuTopo, pidList[col].get(), eventData.data); + FillFields(cnt, eventData.data.size(), cpuTopo, pidList[pid].get(), eventData.data); } - for (unsigned int row = 0; row < numCpu; row++) { - for (unsigned int col = 0; col < numPid; col++) { - int err = this->xyCounterArray[row][col]->EndRead(); - if (err != SUCCESS) { - return err; - } + for (unsigned int pid = 0; pid < numPid; pid++) { + int err = this->pidCounterArray[pid]->EndRead(); + if (err != SUCCESS) { + return err; } } diff --git a/pmu/bpf/evt_list_bpf.h b/pmu/bpf/evt_list_bpf.h index ae6caaf..2962892 100644 --- a/pmu/bpf/evt_list_bpf.h +++ b/pmu/bpf/evt_list_bpf.h @@ -30,6 +30,7 @@ #include "spe_sampler.h" #include "evt_list.h" #include "evt_list_default.h" + namespace KUNPENG_PMU { class EvtBpfList : public EvtList { @@ -66,29 +67,23 @@ public: return pmuEvt->collectType; } - int GetUseBpf() const - { - return pmuEvt->useBpf; - } - private: using PerfEvtPtr = std::shared_ptr; std::unordered_map procMap; std::vector cpuList; std::vector pidList; - std::vector>> xyCounterArray; + std::vector> cpuCounterArray; + std::vector> pidCounterArray; std::shared_ptr MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent); std::shared_ptr pmuEvt; int CollectorDoTask(PerfEvtPtr collector, int task); - int CollectorXYArrayDoTask(std::vector>& xyArray, int task); + int CollectorTaskArrayDoTask(std::vector& taskArray, int task); - std::set fdList; unsigned int numCpu = 0; unsigned int numPid = 0; int64_t ts = 0; int prevStat; int evtStat; - std::mutex mutex; }; } // namespace KUNPENG_PMU diff --git a/pmu/bpf/perf_counter_bpf.cpp b/pmu/bpf/perf_counter_bpf.cpp index aa46730..b03aac5 100644 --- a/pmu/bpf/perf_counter_bpf.cpp +++ b/pmu/bpf/perf_counter_bpf.cpp @@ -42,22 +42,19 @@ using namespace pcerr; #define MAX_ENTITES 1024 static map counterMap; // key: evt name, value: bpf obj +static struct sched_cgroup_bpf *cgrpCounter = nullptr; // one bpf obj in cgroup mode static map bpfFdMap; // key: evt name, value: bpf prog fd (tracepoint id). For tracepoint trigger static map> evtCpuMap; // key: evt name, value: core id (Init). static map> evtPidMap; // key: evt name, value: pid (Init). static map> evtCgroupMap; // key: evt name, value: cgroup id (Init). - +static map evtIdxMap; // key: evt name, value: sequential number +static map cgroups; // key: cgroup name, value: sequential number +static set readCgroups; static set triggerdEvt; // triggered evt name - -static struct sched_cgroup_bpf *cgrpCounter = nullptr; -static map evtIdxMap; static int evtIdx = 0; -static map cgroups; -static set readCgroups; static int cgrpProgFd = 0; - static inline int TriggeredRead(int prog_fd, int cpu) { //enforce the bpf trace function @@ -83,28 +80,7 @@ int KUNPENG_PMU::PerfBpfCounter::EndRead() return SUCCESS; } -static std::vector GetCgroupPids(const std::string& cgroup_path) { - std::vector pids; - std::ifstream file(cgroup_path + "/cgroup.procs"); - - if (!file.is_open()) { - std::cout<<"cannot cgroup.procs: " << cgroup_path <> pid) { - pids.push_back(pid); - file.ignore(std::numeric_limits::max(), '\n'); - } - - if (!file.eof() && file.fail()) { - std::cout<<"read file cgroup.procs failed: " << cgroup_path < &data) +int KUNPENG_PMU::PerfBpfCounter::ReadBpfProcess(std::vector &data) { const unsigned cpuNums = MAX_CPU_NUM; auto obj = counterMap[this->evt->name]; @@ -124,27 +100,10 @@ int KUNPENG_PMU::PerfBpfCounter::ReadBpf(std::vector &data) // read the pmu count of this pid in each cpu core struct bpf_perf_event_value values[cpuNums]; - int pid = this->pid; - int err; - - //cgroup support - string cgroupPath = "/sys/fs/cgroup/"; - if (!this->evt->cgroupName.empty()) { - string cgroupPath = GetCgroupPath(this->evt->cgroupName); - auto pids = GetCgroupPids(cgroupPath); - for (pid_t pid : pids) { - err = bpf_map__lookup_elem(obj->maps.accum_readings, &pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY); - if(err) { - printf("failed to loopup elem. err: %s pid %d\n", strerror(-err), pid); - return SUCCESS; - } - } - } else { - err = bpf_map__lookup_elem(obj->maps.accum_readings, &this->pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY); - if(err) { - printf("failed to loopup elem. err: %s pid %d\n", strerror(-err), this->pid); - return SUCCESS; - } + int err = bpf_map__lookup_elem(obj->maps.accum_readings, &this->pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY); + if(err) { + printf("failed to loopup elem. err: %s pid %d\n", strerror(-err), this->pid); + return SUCCESS; } // convert pmu count to PmuData @@ -153,6 +112,7 @@ int KUNPENG_PMU::PerfBpfCounter::ReadBpf(std::vector &data) if(findProc != procMap.end()){ processId = findProc->second->pid; } + for(int i = 0; i < cpuNums; i++){ data.emplace_back(PmuData{0}); auto ¤t = data.back(); @@ -178,7 +138,7 @@ int KUNPENG_PMU::PerfBpfCounter::Read(EventData &eventData) if (!evt->cgroupName.empty()) { return ReadBpfCgroup(eventData.data); } else { - return ReadBpf(eventData.data); + return ReadBpfProcess(eventData.data); } } @@ -229,10 +189,12 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va return vfprintf(stderr, format, args); } - - int KUNPENG_PMU::PerfBpfCounter::InitPidForEvent() { + if (this->pid == -1) { + return SUCCESS; + } + if (evtPidMap[this->evt->name].find(this->pid) != evtPidMap[this->evt->name].end()) { return SUCCESS; } @@ -269,7 +231,6 @@ int KUNPENG_PMU::PerfBpfCounter::InitPidForEvent() int KUNPENG_PMU::PerfBpfCounter::InitBpfObj() { int err; - int map_fd; struct sched_counter_bpf *obj; auto findObj = counterMap.find(evt->name); if(findObj == counterMap.end()){ @@ -335,6 +296,24 @@ int KUNPENG_PMU::PerfBpfCounter::InitBpfObj() return 0; } +static int ReadCgroupId(const string &cgroupName) +{ + char path[PATH_MAX + 1]; + char mnt[PATH_MAX + 1]; + struct { + struct file_handle fh; + uint64_t cgroup_id; + } handle; + int mount_id; + string fullCgroupPath = GetCgroupPath(cgroupName); + handle.fh.handle_bytes = sizeof(handle.cgroup_id); + if (name_to_handle_at(AT_FDCWD, fullCgroupPath.c_str(), &handle.fh, &mount_id, 0) < 0) { + return -1; + } + + return handle.cgroup_id; +} + int KUNPENG_PMU::PerfBpfCounter::InitBpfCgroupObj() { int err; @@ -425,9 +404,11 @@ int KUNPENG_PMU::PerfBpfCounter::Init(const bool groupEnable, const int groupFd, InitPidForEvent(); auto findCpuMap = evtCpuMap.find(this->evt->name); auto findCgroupMap = evtCgroupMap.find(this->evt->name); - if(findCpuMap != evtCpuMap.end() && findCpuMap->second.find(this->cpu) != findCpuMap->second.end() && findCgroupMap != evtCgroupMap.end() && findCgroupMap->second.find(this->evt->cgroupName) != findCgroupMap->second.end()) { + if(findCpuMap != evtCpuMap.end() && findCpuMap->second.count(this->cpu) && findCgroupMap != evtCgroupMap.end() + && findCgroupMap->second.count(this->evt->cgroupName)) { return SUCCESS; - } else if(findCgroupMap == evtCgroupMap.end()) { + } + if(findCgroupMap == evtCgroupMap.end()) { evtCgroupMap[this->evt->name].insert(this->evt->cgroupName); } int err = this->MapPerfAttr(groupEnable, groupFd); @@ -444,6 +425,9 @@ int KUNPENG_PMU::PerfBpfCounter::Init(const bool groupEnable, const int groupFd, int KUNPENG_PMU::PerfBpfCounter::MapPerfAttr(const bool groupEnable, const int groupFd) { + if (this->cpu == -1) { + return SUCCESS; + } struct perf_event_attr attr; memset(&attr, 0, sizeof(attr)); attr.size = sizeof(struct perf_event_attr); @@ -463,8 +447,7 @@ int KUNPENG_PMU::PerfBpfCounter::MapPerfAttr(const bool groupEnable, const int g attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); - this->groupFd = groupFd; - printf("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", + DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd); if (__glibc_unlikely(this->fd < 0)) { return MapErrno(errno); @@ -474,49 +457,27 @@ int KUNPENG_PMU::PerfBpfCounter::MapPerfAttr(const bool groupEnable, const int g int KUNPENG_PMU::PerfBpfCounter::Enable() { - if (groupFd != -1) { - return SUCCESS; - } int err = PerfEvt::Enable(); if (err != SUCCESS) { return err; } - this->accumCount.clear(); - this->enabled = 0; - this->running = 0; return SUCCESS; } int KUNPENG_PMU::PerfBpfCounter::Disable() { - if (groupFd != -1) { - return SUCCESS; - } return PerfEvt::Disable(); } int KUNPENG_PMU::PerfBpfCounter::Reset() { - if (groupFd != -1) { - return SUCCESS; - } return PerfEvt::Reset(); } -int KUNPENG_PMU::PerfBpfCounter::ReadCgroupId(const string &cgroupName) +int KUNPENG_PMU::PerfBpfCounter::Close() { - char path[PATH_MAX + 1]; - char mnt[PATH_MAX + 1]; - struct { - struct file_handle fh; - uint64_t cgroup_id; - } handle; - int mount_id; - string fullCgroupPath = GetCgroupPath(cgroupName); - handle.fh.handle_bytes = sizeof(handle.cgroup_id); - if (name_to_handle_at(AT_FDCWD, fullCgroupPath.c_str(), &handle.fh, &mount_id, 0) < 0) { - return -1; + if (this->fd > 0) { + close(this->fd); } - - return handle.cgroup_id; + return SUCCESS; } \ No newline at end of file diff --git a/pmu/bpf/perf_counter_bpf.h b/pmu/bpf/perf_counter_bpf.h index ce1302c..74f1a34 100644 --- a/pmu/bpf/perf_counter_bpf.h +++ b/pmu/bpf/perf_counter_bpf.h @@ -37,34 +37,17 @@ namespace KUNPENG_PMU { int Enable() override; int Disable() override; int Reset() override; + int Close() override; int BeginRead(); int EndRead(); + private: - enum class GroupStatus - { - NO_GROUP, - GROUP_LEADER, - GROUP_MEMBER - }; - int CountValueToData(const __u64 value, const __u64 timeEnabled, - const __u64 timeRunning, __u64 &accumCount, std::vector &data); int InitBpfObj(); int InitBpfCgroupObj(); - int ReadBpf(std::vector &data); - int ReadBpfCgroup(std::vector &data); int InitPidForEvent(); - - // Accumulated pmu count, time enabled and time running. - __u64 count = 0; - __u64 enabled = 0; - __u64 running = 0; - // For group events, is the accum counts of all members. - // For normal events, has only one element. - std::vector<__u64> accumCount; - int groupFd = 0; - GroupStatus groupStatus = GroupStatus::NO_GROUP; - int ReadCgroupId(const std::string &cgroupName); + int ReadBpfProcess(std::vector &data); + int ReadBpfCgroup(std::vector &data); }; } // namespace KUNPENG_PMU #endif diff --git a/pmu/bpf/sched_cgroup.bpf.c b/pmu/bpf/sched_cgroup.bpf.c index 5d9f705..d044db8 100644 --- a/pmu/bpf/sched_cgroup.bpf.c +++ b/pmu/bpf/sched_cgroup.bpf.c @@ -12,17 +12,14 @@ * Create: 2025-08-10 * Description: the bpf program for cgroup collecting in counting mode ******************************************************************************/ -#include "bpf/vmlinux.h" -#include "bpf/bpf_core_read.h" -#include "bpf/bpf_tracing.h" -#include "bpf/bpf_helpers.h" +#include +#include +#include +#include #define MAX_LEVELS 10 // max cgroup hierarchy level: arbitrary #define MAX_EVENTS 128 // max events per cgroup: arbitrary -// NOTE: many of map and global data will be modified before loading -// from the userspace (perf tool) using the skeleton helpers. - // single set of global perf events to measure // {evt0, cpu0}, {evt0, cpu1}, {evt0, cpu2}...{evt0, cpuM}, {evt1, cpu0}...{evtM, cpuM} struct { @@ -155,8 +152,8 @@ static int bperf_cgroup_count(void) for (c = 0; c < MAX_LEVELS; c++) { if (c == cgrp_cnt) break; - cgrp = cgrp_idx[c]; + // aggregate the result by cgroup key = cgrp * num_events + idx; bpf_printk("c: %d cgrp: %d key: %d", c, cgrp, key); diff --git a/pmu/bpf/sched_counter.bpf.c b/pmu/bpf/sched_counter.bpf.c index 4e4062b..891d813 100644 --- a/pmu/bpf/sched_counter.bpf.c +++ b/pmu/bpf/sched_counter.bpf.c @@ -12,10 +12,10 @@ * Create: 2025-08-10 * Description: the bpf program for ordinary or multi-thread program collecting in counting mode ******************************************************************************/ -#include "bpf/vmlinux.h" -#include "bpf/bpf_core_read.h" -#include "bpf/bpf_tracing.h" -#include "bpf/bpf_helpers.h" +#include +#include +#include +#include char LICENSE[] SEC("license") = "GPL"; diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h index b57a6ed..fe97363 100644 --- a/pmu/perf_counter.h +++ b/pmu/perf_counter.h @@ -34,6 +34,7 @@ namespace KUNPENG_PMU { virtual int Enable() = 0; virtual int Disable() = 0; virtual int Reset() = 0; + virtual int Close() = 0; }; } // namespace KUNPENG_PMU #endif diff --git a/pmu/perf_counter_default.cpp b/pmu/perf_counter_default.cpp index e30fef0..25f2bb6 100644 --- a/pmu/perf_counter_default.cpp +++ b/pmu/perf_counter_default.cpp @@ -338,7 +338,7 @@ int KUNPENG_PMU::PerfDefaultCounter::MapPerfAttr(const bool groupEnable, const i groupStatus = GroupStatus::NO_GROUP; } this->groupFd = groupFd; - printf("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", + DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd); if (__glibc_unlikely(this->fd < 0)) { return MapErrno(errno); -- Gitee From e6ba455c9e2addb34c2b6466bf74295f1400ed55 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Tue, 12 Aug 2025 15:18:52 +0800 Subject: [PATCH 5/6] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E9=80=80=E5=87=BA?= =?UTF-8?q?=E5=88=86=E6=9E=90=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/evt.h | 2 -- pmu/evt_list.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pmu/evt.h b/pmu/evt.h index ce9c952..a268d00 100644 --- a/pmu/evt.h +++ b/pmu/evt.h @@ -89,8 +89,6 @@ public: return ""; } - std::string cgroupName; - protected: int fd; int cpu; diff --git a/pmu/evt_list.h b/pmu/evt_list.h index 349ffa2..bc05bdf 100644 --- a/pmu/evt_list.h +++ b/pmu/evt_list.h @@ -37,6 +37,7 @@ public: virtual int Start() = 0; virtual int Stop() = 0; virtual int Read(EventData &eventData) = 0; + }; } // namespace KUNPENG_PMU -- Gitee From edaf1cfc519eb6b3ed2debc5ef0a68c75b506dae Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Tue, 12 Aug 2025 21:00:33 +0800 Subject: [PATCH 6/6] revise fd check --- pmu/bpf/evt_list_bpf.h | 6 +++--- pmu/evt_list.h | 4 +++- pmu/evt_list_default.h | 6 +++--- pmu/pmu_list.cpp | 9 ++++++--- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/pmu/bpf/evt_list_bpf.h b/pmu/bpf/evt_list_bpf.h index 2962892..b0d106f 100644 --- a/pmu/bpf/evt_list_bpf.h +++ b/pmu/bpf/evt_list_bpf.h @@ -51,9 +51,9 @@ public: int Start() override; int Stop() override; int Read(EventData &eventData) override; - int Enable(); - int Reset(); - int Close(); + int Enable() override; + int Reset() override; + int Close() override; void FillFields(const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo, std::vector& pmuData); diff --git a/pmu/evt_list.h b/pmu/evt_list.h index bc05bdf..84b4c4f 100644 --- a/pmu/evt_list.h +++ b/pmu/evt_list.h @@ -37,7 +37,9 @@ public: virtual int Start() = 0; virtual int Stop() = 0; virtual int Read(EventData &eventData) = 0; - + virtual int Enable() = 0; + virtual int Reset() = 0; + virtual int Close() = 0; }; } // namespace KUNPENG_PMU diff --git a/pmu/evt_list_default.h b/pmu/evt_list_default.h index 446af55..f1fc946 100644 --- a/pmu/evt_list_default.h +++ b/pmu/evt_list_default.h @@ -65,11 +65,11 @@ public: } int Init(const bool groupEnable, const std::shared_ptr evtLeader); int Pause(); - int Close(); + int Close() override; int Start() override; - int Enable(); + int Enable() override; int Stop() override; - int Reset(); + int Reset() override; int Read(EventData &eventData) override; void SetGroupInfo(const EventGroupInfo &grpInfo); diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 4ab72bb..5c30c3f 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -113,9 +113,12 @@ namespace KUNPENG_PMU { return symbolErrNo; } - auto err = CheckRlimit(fdNum); - if (err != SUCCESS) { - return err; + int err; + if (!taskParam->pmuEvt->useBpf) { // in bpf mode, cpuSize * proSize whill exceed rlimit + err = CheckRlimit(fdNum); + if (err != SUCCESS) { + return err; + } } err = Init(pd); -- Gitee