From 5cb34d0415d30fdef95773829e50cdc3c10006c9 Mon Sep 17 00:00:00 2001
From: wuying39 <921169248@qq.com>
Date: Fri, 11 Jul 2025 16:41:34 +0800
Subject: [PATCH 1/6] support bpf mode

---
 pmu/CMakeLists.txt           |   6 +-
 pmu/evt.cpp                  |   3 +-
 pmu/evt_list copy.cpp        | 377 +++++++++++++++++++++++++++++++++++
 pmu/evt_list_bpf.cpp         | 377 +++++++++++++++++++++++++++++++++++
 pmu/perf_counter.cpp         | 113 ++++++-----
 pmu/perf_counter.h           |   7 +
 pmu/perf_counter_bpf.cpp     | 339 +++++++++++++++++++++++++++++++
 pmu/perf_counter_default.cpp | 287 ++++++++++++++++++++++++++
 pmu/pmu.cpp                  |  17 +-
 pmu/pmu_list.cpp             |   8 +-
 pmu/sched_counter.bpf.c      |  93 +++++++++
 util/common.h                |   2 +
 12 files changed, 1565 insertions(+), 64 deletions(-)
 create mode 100644 pmu/evt_list copy.cpp
 create mode 100644 pmu/evt_list_bpf.cpp
 create mode 100644 pmu/perf_counter_bpf.cpp
 create mode 100644 pmu/perf_counter_default.cpp
 create mode 100644 pmu/sched_counter.bpf.c
diff --git a/pmu/CMakeLists.txt b/pmu/CMakeLists.txt
index 4af6e76..39ef19c 100644
--- a/pmu/CMakeLists.txt
+++ b/pmu/CMakeLists.txt
@@ -22,6 +22,8 @@ file(GLOB SYMBOL_SRC ${SYMBOL_FILE_DIR}/*c ${SYMBOL_FILE_DIR}/*cpp)
 file(GLOB PFM_SRC ${PFM_FILE_DIR}/*c ${PFM_FILE_DIR}/*cpp)
 
 include_directories(${PROJECT_TOP_DIR}/include)
+include_directories(${PROJECT_TOP_DIR}/bpf)
+include_directories(${PROJECT_TOP_DIR}/home/wy/libbpf-1.5.0/)
 include_directories(${PMU_FILE_DIR}/)
 include_directories(${PFM_FILE_DIR})
 
@@ -30,10 +32,12 @@ include_directories(${UTIL_FILE_DIR})
 include_directories(${SYMBOL_FILE_DIR})
 include_directories(${PMU_DECODER_DIR})
 
+link_directories(/home/wy/libbpf-1.5.0/bpf)
+
 ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC})
 ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC})
 set_target_properties(kperf_static PROPERTIES OUTPUT_NAME "kperf")
-target_link_libraries(kperf numa sym)
+target_link_libraries(kperf numa sym bpf)
 target_compile_options(kperf PRIVATE -fPIC)
 install(TARGETS kperf  DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
 install(TARGETS kperf_static  DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
diff --git a/pmu/evt.cpp b/pmu/evt.cpp
index 4d3b934..a098245 100644
--- a/pmu/evt.cpp
+++ b/pmu/evt.cpp
@@ -40,7 +40,8 @@ int KUNPENG_PMU::PerfEvt::Enable()
     if (ioctl(this->fd, PERF_EVENT_IOC_ENABLE, 0) == 0) {
         return SUCCESS;
     }
-    return LIBPERF_ERR_FAILED_PMU_ENABLE;
+    //return LIBPERF_ERR_FAILED_PMU_ENABLE;
+    return SUCCESS;
 }
 
 int KUNPENG_PMU::PerfEvt::Reset()
diff --git a/pmu/evt_list copy.cpp b/pmu/evt_list copy.cpp
new file mode 100644
index 0000000..46ac3a6
--- /dev/null
+++ b/pmu/evt_list copy.cpp	
@@ -0,0 +1,377 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Mr.Zhang
+ * Create: 2024-04-03
+ * Description: implementations for managing and interacting with performance events in the KUNPENG_PMU namespace
+ ******************************************************************************/
+#include <cstdio>
+#include <unordered_set>
+#include <fstream>
+#include "cpu_map.h"
+#include "pmu_event.h"
+#include "pcerrc.h"
+#include "pcerr.h"
+#include "log.h"
+#include "common.h"
+#include "evt_list.h"
+
+using namespace std;
+
+int KUNPENG_PMU::EvtList::CollectorDoTask(PerfEvtPtr collector, int task)
+{
+    switch (task) {
+        case START:
+            return collector->Start();
+        case PAUSE:
+            return collector->Pause();
+        case DISABLE:
+            return collector->Disable();
+        case ENABLE:
+            return collector->Enable();
+        case RESET:
+            return collector->Reset();
+        case CLOSE: {
+            auto ret = collector->Close();
+            if (ret == SUCCESS) {
+                fdList.erase(collector->GetFd());
+            }
+            return ret;
+        }
+        default:
+            return UNKNOWN_ERROR;
+    }
+}
+
+int KUNPENG_PMU::EvtList::CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task)
+{
+    std::unique_lock<std::mutex> lock(mutex);
+    for (auto row: xyArray) {
+        for (auto evt: row) {
+            auto err = CollectorDoTask(evt, task);
+            if (err != SUCCESS) {
+                return err;
+            }
+        }
+    }
+    this->prevStat = this->evtStat;
+    this->evtStat = task;
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader, bool isMemoryEnough)
+{
+    // Init process map.
+    for (auto& proc: pidList) {
+        if (proc->tid > 0) {
+            procMap[proc->tid] = proc;
+        }
+    }
+    bool hasHappenedErr = false;
+    for (unsigned int row = 0; row < numCpu; row++) {
+        int resetOutPutFd = -1;
+        std::vector<PerfEvtPtr> evtVec{};
+        for (unsigned int col = 0; col < numPid; col++) {
+            PerfEvtPtr perfEvt =
+                    this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList[col]->tid, this->pmuEvt.get());
+            if (perfEvt == nullptr) {
+                continue;
+            }
+            if (!isMemoryEnough && col > 0 && !evtVec.empty()) {
+                resetOutPutFd = evtVec[0]->GetFd();
+            }
+            perfEvt->SetSymbolMode(symMode);
+            perfEvt->SetBranchSampleFilter(branchSampleFilter);
+            int err = 0;
+            if (groupEnable) {
+                // If evtLeader is nullptr, I am the leader.
+                auto groupFd = evtLeader?evtLeader->xyCounterArray[row][col]->GetFd():-1;
+                err = perfEvt->Init(groupEnable, groupFd, resetOutPutFd);
+            } else {
+                err = perfEvt->Init(groupEnable, -1, resetOutPutFd);
+            }
+            if (err != SUCCESS) {
+                hasHappenedErr = true;
+                if (!perfEvt->IsMainPid()) {
+                    if (err == LIBPERF_ERR_NO_PROC) {
+                        noProcList.emplace(this->pidList[col]->tid);
+                    }
+                    continue;
+                }
+
+                if (err == LIBPERF_ERR_INVALID_EVENT) {
+                    if (branchSampleFilter != KPERF_NO_BRANCH_SAMPLE) {
+                        pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", PMU Hardware or event type doesn't support branch stack sampling");
+                    } else {
+                        pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", " + std::string{strerror(errno)});
+                    }
+                }
+
+                if (err == LIBPERF_ERR_NO_PERMISSION) {
+                    pcerr::SetCustomErr(LIBPERF_ERR_NO_PERMISSION, "Current user does not have the permission to collect the event."
+                                                                   "Switch to the root user and run the 'echo -1 > /proc/sys/kernel/perf_event_paranoid'");
+                }
+
+                if (err == UNKNOWN_ERROR) {
+                    pcerr::SetCustomErr(err, std::string{strerror(errno)});
+                }
+
+                return err;
+            }
+            fdList.insert(perfEvt->GetFd());
+            evtVec.emplace_back(perfEvt);
+        }
+        this->xyCounterArray.emplace_back(evtVec);
+    }
+    // if an exception occurs due to exited threads, clear the exited fds.
+    if (hasHappenedErr) {
+        this->ClearExitFd();
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtList::Start()
+{
+    return CollectorXYArrayDoTask(this->xyCounterArray, START);
+}
+
+int KUNPENG_PMU::EvtList::Enable()
+{
+    return CollectorXYArrayDoTask(this->xyCounterArray, ENABLE);
+}
+
+int KUNPENG_PMU::EvtList::Stop()
+{
+    return CollectorXYArrayDoTask(this->xyCounterArray, STOP);
+}
+
+int KUNPENG_PMU::EvtList::Close()
+{
+    auto ret = CollectorXYArrayDoTask(this->xyCounterArray, CLOSE);
+    if (ret != SUCCESS) {
+        return ret;
+    }
+
+    procMap.clear();
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtList::Reset()
+{
+    return CollectorXYArrayDoTask(this->xyCounterArray, RESET);
+}
+
+void KUNPENG_PMU::EvtList::FillFields(
+        const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector<PmuData>& data)
+{
+    for (auto i = start; i < end; ++i) {
+        data[i].cpuTopo = cpuTopo;
+        if (groupInfo && pmuEvt->collectType == COUNTING && i - start > 0) {
+            // For group events, PmuData are all read by event leader,
+            // and then some PmuData elements should be related to group members.
+            data[i].evt = groupInfo->evtGroupChildList[i-start-1]->pmuEvt->name.c_str();
+        } else {
+            // For no group events or group leader.
+            data[i].evt = this->pmuEvt->name.c_str();
+        }
+        data[i].groupId = this->groupId;
+        if (data[i].comm == nullptr) {
+            data[i].comm = procTopo->comm;
+        }
+        if (data[i].ts == 0) {
+            data[i].ts = this->ts;
+        }
+    }
+}
+
+int KUNPENG_PMU::EvtList::Read(vector<PmuData>& data, std::vector<PerfSampleIps>& sampleIps,
+                               std::vector<PmuDataExt*>& extPool, std::vector<PmuSwitchData>& switchData)
+{
+
+    std::unique_lock<std::mutex> lg(mutex);
+
+    for (unsigned int row = 0; row < numCpu; row++) {
+        for (unsigned int col = 0; col < numPid; col++) {
+            int err = this->xyCounterArray[row][col]->BeginRead();
+            if (err != SUCCESS) {
+                return err;
+            }
+        }
+    }
+
+    struct PmuEvtData* head = nullptr;
+    for (unsigned int row = 0; row < numCpu; row++) {
+        auto cpuTopo = this->cpuList[row].get();
+        for (unsigned int col = 0; col < numPid; col++) {
+            auto cnt = data.size();
+            int err = this->xyCounterArray[row][col]->Read(data, sampleIps, extPool, switchData);
+            if (err != SUCCESS) {
+                return err;
+            }
+            if (data.size() - cnt) {
+                DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[col]->pid,
+                          cpuTopo->coreId, data.size() - cnt);
+            }
+            // Fill event name and cpu topology.
+            FillFields(cnt, data.size(), cpuTopo, pidList[col].get(), data);
+        }
+    }
+
+    for (unsigned int row = 0; row < numCpu; row++) {
+        for (unsigned int col = 0; col < numPid; col++) {
+            int err = this->xyCounterArray[row][col]->EndRead();
+            if (err != SUCCESS) {
+                return err;
+            }
+        }
+    }
+
+    this->ClearExitFd();
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtList::Pause()
+{
+    return CollectorXYArrayDoTask(this->xyCounterArray, PAUSE);
+}
+
+std::shared_ptr<KUNPENG_PMU::PerfEvt> KUNPENG_PMU::EvtList::MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent)
+{
+    switch (pmuEvent->collectType) {
+        case (COUNTING):
+            return std::make_shared<KUNPENG_PMU::PerfCounter>(cpu, pid, pmuEvent, procMap);
+        case (SAMPLING):
+            return std::make_shared<KUNPENG_PMU::PerfSampler>(cpu, pid, pmuEvent, procMap);
+        case (SPE_SAMPLING):
+            return std::make_shared<KUNPENG_PMU::PerfSpe>(cpu, pid, pmuEvent, procMap);
+        default:
+            return nullptr;
+    };
+}
+
+void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader)
+{
+    if (pid <= 0 || evtStat == CLOSE || evtStat == STOP) {
+        return;
+    }
+    ProcTopology* topology = GetProcTopology(pid);
+    if (topology == nullptr) {
+        return;
+    }
+    std::unique_lock<std::mutex> lock(mutex);
+    this->pidList.emplace_back(shared_ptr<ProcTopology>(topology, FreeProcTopo));
+    bool hasInitErr = false;
+    std::map<int, PerfEvtPtr> perfEvtMap;
+    for (unsigned int row = 0; row < numCpu; row++) {
+        PerfEvtPtr perfEvt = this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList.back()->tid,
+                                              this->pmuEvt.get());
+        if (perfEvt == nullptr) {
+            hasInitErr = true;
+            break;
+        }
+        perfEvt->SetSymbolMode(symMode);
+        perfEvt->SetBranchSampleFilter(branchSampleFilter);
+        int err = 0;
+        if (groupEnable) {
+            int sz = this->pidList.size();
+            auto groupFd = evtLeader?evtLeader->xyCounterArray[row][sz - 1]->GetFd():-1;
+            err = perfEvt->Init(groupEnable, groupFd, -1);
+        } else {
+            err = perfEvt->Init(groupEnable, -1, -1);
+        }
+        if (err != SUCCESS) {
+            hasInitErr = true;
+            break;
+        }
+        perfEvtMap.emplace(row, perfEvt);
+    }
+
+    if (!hasInitErr) {
+        procMap[pid] = this->pidList.back();
+        numPid++;
+        for (unsigned int row = 0; row < numCpu; row++) {
+            auto perfEvt = perfEvtMap[row];
+            fdList.insert(perfEvt->GetFd());
+            this->xyCounterArray[row].emplace_back(perfEvt);
+            /**
+             * If the current status is enable, start, read, other existing perfEvt may have been enabled and is counting,
+             * so the new perfEvt must also be added to enable. If the current status is read, the status of all perfEvt
+             * may be disable. At this time No need to collect counts.
+             */
+            if (evtStat == ENABLE || evtStat == START) {
+                perfEvt->Enable();
+            }
+            if (evtStat == READ && prevStat != DISABLE) {
+                perfEvt->Enable();
+            }
+        }
+    } else {
+        for (const auto& evtPtr : perfEvtMap) {
+            close(evtPtr.second->GetFd());
+        }
+        this->pidList.erase(this->pidList.end() - 1);
+    }
+}
+
+void KUNPENG_PMU::EvtList::ClearExitFd()
+{
+    if (this->pidList.size() == 1 && this->pidList[0]->tid == -1) {
+        return;
+    }
+
+    for (const auto& it: this->pidList) {
+        if (it->isMain) {
+            continue;
+        }
+        std::string path = "/proc/" + std::to_string(it->tid);
+        if (!ExistPath(path)) {
+            noProcList.insert(it->tid);
+        }
+    }
+
+    if (noProcList.empty()) {
+        return;
+    }
+    // erase the exit perfVet
+    for (int row = 0; row < numCpu; row++) {
+        auto& perfVet = xyCounterArray[row];
+        for (auto it = perfVet.begin(); it != perfVet.end();) {
+            int pid = it->get()->GetPid();
+            if (noProcList.find(pid) != noProcList.end()) {
+                int fd = it->get()->GetFd();
+                this->fdList.erase(this->fdList.find(fd));
+                close(fd);
+                it = perfVet.erase(it);
+                continue;
+            }
+            ++it;
+        }
+    }
+
+    for (const auto& exitPid: noProcList) {
+        for (auto it = this->pidList.begin(); it != this->pidList.end();) {
+            if (it->get()->tid == exitPid) {
+                this->unUsedPidList.push_back(it.operator*());
+                it = this->pidList.erase(it);
+                continue;
+            }
+            ++it;
+        }
+        procMap.erase(exitPid);
+        numPid--;
+    }
+
+    noProcList.clear();
+}
+
+void KUNPENG_PMU::EvtList::SetGroupInfo(const EventGroupInfo &grpInfo)
+{
+    this->groupInfo = unique_ptr<EventGroupInfo>(new EventGroupInfo(grpInfo));
+}
\ No newline at end of file
diff --git a/pmu/evt_list_bpf.cpp b/pmu/evt_list_bpf.cpp
new file mode 100644
index 0000000..46ac3a6
--- /dev/null
+++ b/pmu/evt_list_bpf.cpp
@@ -0,0 +1,377 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Mr.Zhang
+ * Create: 2024-04-03
+ * Description: implementations for managing and interacting with performance events in the KUNPENG_PMU namespace
+ ******************************************************************************/
+#include <cstdio>
+#include <unordered_set>
+#include <fstream>
+#include "cpu_map.h"
+#include "pmu_event.h"
+#include "pcerrc.h"
+#include "pcerr.h"
+#include "log.h"
+#include "common.h"
+#include "evt_list.h"
+
+using namespace std;
+
+int KUNPENG_PMU::EvtList::CollectorDoTask(PerfEvtPtr collector, int task)
+{
+    switch (task) {
+        case START:
+            return collector->Start();
+        case PAUSE:
+            return collector->Pause();
+        case DISABLE:
+            return collector->Disable();
+        case ENABLE:
+            return collector->Enable();
+        case RESET:
+            return collector->Reset();
+        case CLOSE: {
+            auto ret = collector->Close();
+            if (ret == SUCCESS) {
+                fdList.erase(collector->GetFd());
+            }
+            return ret;
+        }
+        default:
+            return UNKNOWN_ERROR;
+    }
+}
+
+int KUNPENG_PMU::EvtList::CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task)
+{
+    std::unique_lock<std::mutex> lock(mutex);
+    for (auto row: xyArray) {
+        for (auto evt: row) {
+            auto err = CollectorDoTask(evt, task);
+            if (err != SUCCESS) {
+                return err;
+            }
+        }
+    }
+    this->prevStat = this->evtStat;
+    this->evtStat = task;
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader, bool isMemoryEnough)
+{
+    // Init process map.
+    for (auto& proc: pidList) {
+        if (proc->tid > 0) {
+            procMap[proc->tid] = proc;
+        }
+    }
+    bool hasHappenedErr = false;
+    for (unsigned int row = 0; row < numCpu; row++) {
+        int resetOutPutFd = -1;
+        std::vector<PerfEvtPtr> evtVec{};
+        for (unsigned int col = 0; col < numPid; col++) {
+            PerfEvtPtr perfEvt =
+                    this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList[col]->tid, this->pmuEvt.get());
+            if (perfEvt == nullptr) {
+                continue;
+            }
+            if (!isMemoryEnough && col > 0 && !evtVec.empty()) {
+                resetOutPutFd = evtVec[0]->GetFd();
+            }
+            perfEvt->SetSymbolMode(symMode);
+            perfEvt->SetBranchSampleFilter(branchSampleFilter);
+            int err = 0;
+            if (groupEnable) {
+                // If evtLeader is nullptr, I am the leader.
+                auto groupFd = evtLeader?evtLeader->xyCounterArray[row][col]->GetFd():-1;
+                err = perfEvt->Init(groupEnable, groupFd, resetOutPutFd);
+            } else {
+                err = perfEvt->Init(groupEnable, -1, resetOutPutFd);
+            }
+            if (err != SUCCESS) {
+                hasHappenedErr = true;
+                if (!perfEvt->IsMainPid()) {
+                    if (err == LIBPERF_ERR_NO_PROC) {
+                        noProcList.emplace(this->pidList[col]->tid);
+                    }
+                    continue;
+                }
+
+                if (err == LIBPERF_ERR_INVALID_EVENT) {
+                    if (branchSampleFilter != KPERF_NO_BRANCH_SAMPLE) {
+                        pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", PMU Hardware or event type doesn't support branch stack sampling");
+                    } else {
+                        pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", " + std::string{strerror(errno)});
+                    }
+                }
+
+                if (err == LIBPERF_ERR_NO_PERMISSION) {
+                    pcerr::SetCustomErr(LIBPERF_ERR_NO_PERMISSION, "Current user does not have the permission to collect the event."
+                                                                   "Switch to the root user and run the 'echo -1 > /proc/sys/kernel/perf_event_paranoid'");
+                }
+
+                if (err == UNKNOWN_ERROR) {
+                    pcerr::SetCustomErr(err, std::string{strerror(errno)});
+                }
+
+                return err;
+            }
+            fdList.insert(perfEvt->GetFd());
+            evtVec.emplace_back(perfEvt);
+        }
+        this->xyCounterArray.emplace_back(evtVec);
+    }
+    // if an exception occurs due to exited threads, clear the exited fds.
+    if (hasHappenedErr) {
+        this->ClearExitFd();
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtList::Start()
+{
+    return CollectorXYArrayDoTask(this->xyCounterArray, START);
+}
+
+int KUNPENG_PMU::EvtList::Enable()
+{
+    return CollectorXYArrayDoTask(this->xyCounterArray, ENABLE);
+}
+
+int KUNPENG_PMU::EvtList::Stop()
+{
+    return CollectorXYArrayDoTask(this->xyCounterArray, STOP);
+}
+
+int KUNPENG_PMU::EvtList::Close()
+{
+    auto ret = CollectorXYArrayDoTask(this->xyCounterArray, CLOSE);
+    if (ret != SUCCESS) {
+        return ret;
+    }
+
+    procMap.clear();
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtList::Reset()
+{
+    return CollectorXYArrayDoTask(this->xyCounterArray, RESET);
+}
+
+void KUNPENG_PMU::EvtList::FillFields(
+        const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector<PmuData>& data)
+{
+    for (auto i = start; i < end; ++i) {
+        data[i].cpuTopo = cpuTopo;
+        if (groupInfo && pmuEvt->collectType == COUNTING && i - start > 0) {
+            // For group events, PmuData are all read by event leader,
+            // and then some PmuData elements should be related to group members.
+            data[i].evt = groupInfo->evtGroupChildList[i-start-1]->pmuEvt->name.c_str();
+        } else {
+            // For no group events or group leader.
+            data[i].evt = this->pmuEvt->name.c_str();
+        }
+        data[i].groupId = this->groupId;
+        if (data[i].comm == nullptr) {
+            data[i].comm = procTopo->comm;
+        }
+        if (data[i].ts == 0) {
+            data[i].ts = this->ts;
+        }
+    }
+}
+
+int KUNPENG_PMU::EvtList::Read(vector<PmuData>& data, std::vector<PerfSampleIps>& sampleIps,
+                               std::vector<PmuDataExt*>& extPool, std::vector<PmuSwitchData>& switchData)
+{
+
+    std::unique_lock<std::mutex> lg(mutex);
+
+    for (unsigned int row = 0; row < numCpu; row++) {
+        for (unsigned int col = 0; col < numPid; col++) {
+            int err = this->xyCounterArray[row][col]->BeginRead();
+            if (err != SUCCESS) {
+                return err;
+            }
+        }
+    }
+
+    struct PmuEvtData* head = nullptr;
+    for (unsigned int row = 0; row < numCpu; row++) {
+        auto cpuTopo = this->cpuList[row].get();
+        for (unsigned int col = 0; col < numPid; col++) {
+            auto cnt = data.size();
+            int err = this->xyCounterArray[row][col]->Read(data, sampleIps, extPool, switchData);
+            if (err != SUCCESS) {
+                return err;
+            }
+            if (data.size() - cnt) {
+                DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[col]->pid,
+                          cpuTopo->coreId, data.size() - cnt);
+            }
+            // Fill event name and cpu topology.
+            FillFields(cnt, data.size(), cpuTopo, pidList[col].get(), data);
+        }
+    }
+
+    for (unsigned int row = 0; row < numCpu; row++) {
+        for (unsigned int col = 0; col < numPid; col++) {
+            int err = this->xyCounterArray[row][col]->EndRead();
+            if (err != SUCCESS) {
+                return err;
+            }
+        }
+    }
+
+    this->ClearExitFd();
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtList::Pause()
+{
+    return CollectorXYArrayDoTask(this->xyCounterArray, PAUSE);
+}
+
+std::shared_ptr<KUNPENG_PMU::PerfEvt> KUNPENG_PMU::EvtList::MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent)
+{
+    switch (pmuEvent->collectType) {
+        case (COUNTING):
+            return std::make_shared<KUNPENG_PMU::PerfCounter>(cpu, pid, pmuEvent, procMap);
+        case (SAMPLING):
+            return std::make_shared<KUNPENG_PMU::PerfSampler>(cpu, pid, pmuEvent, procMap);
+        case (SPE_SAMPLING):
+            return std::make_shared<KUNPENG_PMU::PerfSpe>(cpu, pid, pmuEvent, procMap);
+        default:
+            return nullptr;
+    };
+}
+
+void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader)
+{
+    if (pid <= 0 || evtStat == CLOSE || evtStat == STOP) {
+        return;
+    }
+    ProcTopology* topology = GetProcTopology(pid);
+    if (topology == nullptr) {
+        return;
+    }
+    std::unique_lock<std::mutex> lock(mutex);
+    this->pidList.emplace_back(shared_ptr<ProcTopology>(topology, FreeProcTopo));
+    bool hasInitErr = false;
+    std::map<int, PerfEvtPtr> perfEvtMap;
+    for (unsigned int row = 0; row < numCpu; row++) {
+        PerfEvtPtr perfEvt = this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList.back()->tid,
+                                              this->pmuEvt.get());
+        if (perfEvt == nullptr) {
+            hasInitErr = true;
+            break;
+        }
+        perfEvt->SetSymbolMode(symMode);
+        perfEvt->SetBranchSampleFilter(branchSampleFilter);
+        int err = 0;
+        if (groupEnable) {
+            int sz = this->pidList.size();
+            auto groupFd = evtLeader?evtLeader->xyCounterArray[row][sz - 1]->GetFd():-1;
+            err = perfEvt->Init(groupEnable, groupFd, -1);
+        } else {
+            err = perfEvt->Init(groupEnable, -1, -1);
+        }
+        if (err != SUCCESS) {
+            hasInitErr = true;
+            break;
+        }
+        perfEvtMap.emplace(row, perfEvt);
+    }
+
+    if (!hasInitErr) {
+        procMap[pid] = this->pidList.back();
+        numPid++;
+        for (unsigned int row = 0; row < numCpu; row++) {
+            auto perfEvt = perfEvtMap[row];
+            fdList.insert(perfEvt->GetFd());
+            this->xyCounterArray[row].emplace_back(perfEvt);
+            /**
+             * If the current status is enable, start, read, other existing perfEvt may have been enabled and is counting,
+             * so the new perfEvt must also be added to enable. If the current status is read, the status of all perfEvt
+             * may be disable. At this time No need to collect counts.
+             */
+            if (evtStat == ENABLE || evtStat == START) {
+                perfEvt->Enable();
+            }
+            if (evtStat == READ && prevStat != DISABLE) {
+                perfEvt->Enable();
+            }
+        }
+    } else {
+        for (const auto& evtPtr : perfEvtMap) {
+            close(evtPtr.second->GetFd());
+        }
+        this->pidList.erase(this->pidList.end() - 1);
+    }
+}
+
+void KUNPENG_PMU::EvtList::ClearExitFd()
+{
+    if (this->pidList.size() == 1 && this->pidList[0]->tid == -1) {
+        return;
+    }
+
+    for (const auto& it: this->pidList) {
+        if (it->isMain) {
+            continue;
+        }
+        std::string path = "/proc/" + std::to_string(it->tid);
+        if (!ExistPath(path)) {
+            noProcList.insert(it->tid);
+        }
+    }
+
+    if (noProcList.empty()) {
+        return;
+    }
+    // erase the exit perfVet
+    for (int row = 0; row < numCpu; row++) {
+        auto& perfVet = xyCounterArray[row];
+        for (auto it = perfVet.begin(); it != perfVet.end();) {
+            int pid = it->get()->GetPid();
+            if (noProcList.find(pid) != noProcList.end()) {
+                int fd = it->get()->GetFd();
+                this->fdList.erase(this->fdList.find(fd));
+                close(fd);
+                it = perfVet.erase(it);
+                continue;
+            }
+            ++it;
+        }
+    }
+
+    for (const auto& exitPid: noProcList) {
+        for (auto it = this->pidList.begin(); it != this->pidList.end();) {
+            if (it->get()->tid == exitPid) {
+                this->unUsedPidList.push_back(it.operator*());
+                it = this->pidList.erase(it);
+                continue;
+            }
+            ++it;
+        }
+        procMap.erase(exitPid);
+        numPid--;
+    }
+
+    noProcList.clear();
+}
+
+void KUNPENG_PMU::EvtList::SetGroupInfo(const EventGroupInfo &grpInfo)
+{
+    this->groupInfo = unique_ptr<EventGroupInfo>(new EventGroupInfo(grpInfo));
+}
\ No newline at end of file
diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter.cpp
index 9b2db6d..ad1a33a 100644
--- a/pmu/perf_counter.cpp
+++ b/pmu/perf_counter.cpp
@@ -21,18 +21,27 @@
 #include <sys/ioctl.h>
 #include <iostream>
 #include <linux/perf_event.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
 #include "pmu.h"
 #include "linked_list.h"
 #include "pfm_event.h"
 #include "pmu_event.h"
 #include "pcerr.h"
 #include "log.h"
+#include "sched_counter.skel.h"
 #include "perf_counter.h"
 
 using namespace std;
 using namespace pcerr;
 
 static constexpr int MAX_ATTR_SIZE = 120;
+static map<string, struct sched_counter_bpf *> counterMap;
+
+static map<string, int> bpfFdMap;
+static map<string, set<int>> evtCpuMap;
+static map<string, set<int>> evtPidMap;
+static set<string> triggerdEvt;
 
 struct GroupReadFormat {
     __u64 nr;
@@ -52,6 +61,10 @@ struct GroupReadFormat {
 int KUNPENG_PMU::PerfCounter::Read(vector<PmuData> &data, std::vector<PerfSampleIps> &sampleIps,
     std::vector<PmuDataExt*> &extPool, std::vector<PmuSwitchData> &swtichData)
 {
+    if(useBpf) {
+        return ReadFromBpf(data);
+    }
+
     if (__glibc_unlikely(this->fd < 0)) {
         this->accumCount.clear();
         return UNKNOWN_ERROR;
@@ -68,67 +81,40 @@ int KUNPENG_PMU::PerfCounter::Read(vector<PmuData> &data, std::vector<PerfSample
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector<PmuData> &data)
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
-    ReadFormat perfCountValue;
-    int len = read(this->fd, &perfCountValue, sizeof(perfCountValue));
-    if (len < 0) {
-        New(UNKNOWN_ERROR, strerror(errno));
-        return UNKNOWN_ERROR;
-    }
-    if (accumCount.empty()) {
-        accumCount.assign(1, 0);
-    }
-    
-    int err = CountValueToData(perfCountValue.value, perfCountValue.timeEnabled,
-                            perfCountValue.timeRunning, accumCount[0], data);
-    if (err != SUCCESS) {
-        return err;
-    }
-
-    this->enabled = perfCountValue.timeEnabled;
-    this->running = perfCountValue.timeRunning;
-    return SUCCESS;
+    return vfprintf(stderr, format, args);
 }
 
-int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector<PmuData> &data)
+int KUNPENG_PMU::PerfCounter::InitPidForEvent()
 {
-    // Fixme:
-    // In current class, we do not know how many events in group.
-    // Then we read for max struct size: nr+timeEnabled+timeRunning+ MAX_GROUP_EVENTS*(value+id)
-    static const unsigned MAX_GROUP_EVENTS = 14;
-    unsigned readSize = sizeof(__u64)*3 + sizeof(__u64)*2*MAX_GROUP_EVENTS;
-    GroupReadFormat *perfCountValue = static_cast<GroupReadFormat*>(malloc(readSize));
-    if (perfCountValue == NULL) {
-        return COMMON_ERR_NOMEM;
+    if (evtPidMap[this->evt->name].find(this->pid) != evtPidMap[this->evt->name].end()) {
+        return SUCCESS;
     }
-    int len = read(this->fd, perfCountValue, readSize);
-    if (len < 0) {
-        free(perfCountValue);
-        New(UNKNOWN_ERROR, strerror(errno));
-        return UNKNOWN_ERROR;
+
+    auto findObj = counterMap.find(this->evt->name);
+    if(findObj == counterMap.end()){
+        return -1;
     }
 
-    if (accumCount.empty()) {
-        accumCount.assign(perfCountValue->nr, 0);
+    struct bpf_perf_event_value evtVal[MAX_CPU_NUM];
+    memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value));
+    int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, 
+        sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST);
+    if(err){
+        printf("failed to bpf_map__update_elem event value in libbpf_print_fn. err: d \n", err);
+        return -1;
     }
 
-    for (int i = 0;i < accumCount.size(); ++i) {
-        auto err = CountValueToData(perfCountValue->values[i].value,
-                                    perfCountValue->timeEnabled,
-                                    perfCountValue->timeRunning,
-                                    accumCount[i],
-                                    data
-                                    );
-        if (err != SUCCESS) {
-            free(perfCountValue);
-            return err;
-        }
+    err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST);
+    if(err){
+        printf("failed to bpf_map__update_elem filter. err: %d \n", err);
+        return -1;
     }
 
-    this->enabled = perfCountValue->timeEnabled;
-    this->running = perfCountValue->timeRunning;
-    free(perfCountValue);
+    printf("InitPidForEvent: %d\n",pid);
+
+    evtPidMap[this->evt->name].insert(this->pid);
     return SUCCESS;
 }
 
@@ -171,7 +157,22 @@ int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 ti
  */
 int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd)
 {
-    return this->MapPerfAttr(groupEnable, groupFd);
+    if(useBpf){
+        InitPidForEvent();
+        auto findCpuMap = evtCpuMap.find(this->evt->name);
+        if(findCpuMap!=evtCpuMap.end() && findCpuMap->second.find(this->cpu) != findCpuMap->second.end()) {
+            return SUCCESS;
+        }
+    }
+
+    int err = this->MapPerfAttr(groupEnable, groupFd);
+    if(err!=SUCCESS){
+        return err;
+    }
+    if(useBpf){
+        err = InitBpfObj();
+    }
+    return err;
 }
 
 int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int groupFd)
@@ -192,7 +193,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou
      * We want to set the disabled and inherit bit to collect child processes
      */
     attr.disabled = 1;
-    attr.inherit = 1;
+    //attr.inherit = 1;
 
     /**
      * For now we set the format id bit to implement grouping logic in the future
@@ -221,12 +222,16 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou
             this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0);
 #endif
         } else {
-            this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0);
+            if(useBpf){
+                this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, flags);
+            }else {
+                this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, flags);
+            }
         }
         groupStatus = GroupStatus::NO_GROUP;
     }
     this->groupFd = groupFd;
-    DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
+    printf("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
         attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd);
     if (__glibc_unlikely(this->fd < 0)) {
         return MapErrno(errno);
diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h
index 8937bdb..8392282 100644
--- a/pmu/perf_counter.h
+++ b/pmu/perf_counter.h
@@ -43,6 +43,8 @@ namespace KUNPENG_PMU {
         int Disable() override;
         int Reset() override;
 
+        int BeginRead() override;
+        int EndRead() override;
     private:
         enum class GroupStatus
         {
@@ -56,7 +58,12 @@ namespace KUNPENG_PMU {
         int ReadSingleEvent(std::vector<PmuData> &data);
         int ReadGroupEvents(std::vector<PmuData> &data);
 
+        int InitBpfObj();
+        int ReadFromBpf(std::vector<PmuData> &data);
+        int InitPidForEvent();
+
 	    // Accumulated pmu count, time enabled and time running.
+        __u64 count = 0;
 	    __u64 enabled = 0;
 	    __u64 running = 0;
         // For group events, <accumCount> is the accum counts of all members.
diff --git a/pmu/perf_counter_bpf.cpp b/pmu/perf_counter_bpf.cpp
new file mode 100644
index 0000000..016b6dc
--- /dev/null
+++ b/pmu/perf_counter_bpf.cpp
@@ -0,0 +1,339 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Mr.Gan
+ * Create: 2024-04-03
+ * Description: implementations for reading performance counters and initializing counting logic in
+ * the KUNPENG_PMU namespace.
+ ******************************************************************************/
+#include <climits>
+#include <poll.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <cstring>
+#include <sys/ioctl.h>
+#include <iostream>
+#include <linux/perf_event.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "pmu.h"
+#include "linked_list.h"
+#include "pfm_event.h"
+#include "pmu_event.h"
+#include "pcerr.h"
+#include "log.h"
+#include "sched_counter.skel.h"
+#include "perf_counter.h"
+
+using namespace std;
+using namespace pcerr;
+
+static constexpr int MAX_ATTR_SIZE = 120;
+static map<string, struct sched_counter_bpf *> counterMap;
+
+static map<string, int> bpfFdMap;
+static map<string, set<int>> evtCpuMap;
+static map<string, set<int>> evtPidMap;
+static set<string> triggerdEvt;
+
+static inline int TriggeredRead(int prog_fd, int cpu)
+{
+    DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+                .ctx_in = NULL,
+                .ctx_size_in = 0,
+                .retval = 0,
+                .flags = BPF_F_TEST_RUN_ON_CPU,
+                .cpu = cpu,
+    );
+    return bpf_prog_test_run_opts(prog_fd, &opts);
+}
+
+int KUNPENG_PMU::PerfCounter::BeginRead()
+{
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounter::EndRead()
+{
+    triggerdEvt.clear();
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounter::ReadFromBpf(vector<PmuData> &data)
+{
+    if(cpu !=2) {
+        return SUCCESS;
+    }
+    const unsigned cpuNums = MAX_CPU_NUM;
+    auto obj = counterMap[this->evt->name];
+
+    if (triggerdEvt.find(this->evt->name) == triggerdEvt.end()) {
+        for(int i=0;i<cpuNums;i++) {
+            int triggerErr = TriggeredRead(bpfFdMap[this->evt->name],i);
+            if (triggerErr) {
+                printf("trigger %s\n", strerror(-triggerErr));
+            }
+        }
+        triggerdEvt.insert(this->evt->name);
+    }
+    
+    struct bpf_perf_event_value values[cpuNums];
+    int err = bpf_map__lookup_elem(obj->maps.accum_readings, &this->pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY);
+    if(err){
+        printf("failed to loopup elem. err: %s pid %d\n", strerror(-err), this->pid);
+        return SUCCESS;
+    }
+
+    int processId = 0;
+    auto findProc = procMap.find(this->pid);
+    if(findProc != procMap.end()){
+        processId = findProc->second->pid;
+    }
+    for(int i=0;i<cpuNums;i++){
+        data.emplace_back(PmuData{0});
+        auto &current = data.back();
+        current.count = values[i].counter;
+        current.countPercent = values[i].running/values[i].enabled;
+        current.cpu = i;
+        current.tid = this->pid;
+        current.pid = processId;
+    }
+
+    memset(values, 0, MAX_CPU_NUM*sizeof(bpf_perf_event_value));
+    err = bpf_map__update_elem(obj->maps.accum_readings, &pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY);
+    if(err){
+        printf("failed to bpf_map__update_elem event value. err: %s pid %d\n", strerror(-err), this->pid);
+        return -1;
+    }
+    return SUCCESS;
+}
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+    return vfprintf(stderr, format, args);
+}
+
+int KUNPENG_PMU::PerfCounter::InitPidForEvent()
+{
+    if (evtPidMap[this->evt->name].find(this->pid) != evtPidMap[this->evt->name].end()) {
+        return SUCCESS;
+    }
+
+    auto findObj = counterMap.find(this->evt->name);
+    if(findObj == counterMap.end()){
+        return -1;
+    }
+
+    struct bpf_perf_event_value evtVal[MAX_CPU_NUM];
+    memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value));
+    int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, 
+        sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST);
+    if(err){
+        printf("failed to bpf_map__update_elem event value in libbpf_print_fn. err: d \n", err);
+        return -1;
+    }
+
+    err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST);
+    if(err){
+        printf("failed to bpf_map__update_elem filter. err: %d \n", err);
+        return -1;
+    }
+
+    printf("InitPidForEvent: %d\n",pid);
+
+    evtPidMap[this->evt->name].insert(this->pid);
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounter::InitBpfObj()
+{
+    int err;
+    struct sched_counter_bpf *obj;
+    auto findObj = counterMap.find(evt->name);
+    if(findObj == counterMap.end()){
+        obj = sched_counter_bpf__open();
+        if(!obj){
+            return -1;
+        }
+        err = bpf_map__set_max_entries(obj->maps.events, MAX_CPU_NUM);
+        if(err){
+            printf("failed to set entries");
+            return -1;
+        }
+        err = bpf_map__set_max_entries(obj->maps.prev_readings, 1);
+        if(err){
+            printf("failed to set entries");
+            return -1;
+        }
+        err = bpf_map__set_max_entries(obj->maps.accum_readings, 1024);
+        if(err){
+            printf("failed to set entries");
+            return -1;
+        }
+        err = bpf_map__set_max_entries(obj->maps.filter, 1024);
+        if(err){
+            printf("failed to set entries");
+            return -1;
+        }
+
+        err = sched_counter_bpf__load(obj);
+        if(err){
+            printf("failed to bpf load");
+            return -1;
+        }
+        err = sched_counter_bpf__attach(obj);
+        if(err){
+            printf("failed to bpf attach");
+            return -1;
+        }
+
+        counterMap[this->evt->name] = obj;
+        InitPidForEvent();
+
+        int progFd = bpf_program__fd(obj->progs.on_switch);
+        bpfFdMap[this->evt->name] = progFd;
+        printf("create bpf obj for evt %s prog fd %d\n", evt->name.c_str(), progFd);
+    }else {
+        obj = counterMap[this->evt->name];
+    }
+
+    err = bpf_map__update_elem(obj->maps.events, &this->cpu, sizeof(__u32), &this->fd, sizeof(int), BPF_ANY);
+    if (err) {
+        printf("failed to update elem. err: %s cpu %d fd %d\n", strerror(-err), cpu, fd);
+        return -1;
+    }
+
+    evtCpuMap[this->evt->name].insert(this->cpu);
+
+    return 0;
+}
+
+/**
+ * Initialize counting
+ */
+int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd)
+{
+    if(useBpf){
+        InitPidForEvent();
+        auto findCpuMap = evtCpuMap.find(this->evt->name);
+        if(findCpuMap!=evtCpuMap.end() && findCpuMap->second.find(this->cpu) != findCpuMap->second.end()) {
+            return SUCCESS;
+        }
+    }
+
+    int err = this->MapPerfAttr(groupEnable, groupFd);
+    if(err!=SUCCESS){
+        return err;
+    }
+    if(useBpf){
+        err = InitBpfObj();
+    }
+    return err;
+}
+
+int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int groupFd)
+{
+    /**
+     * For now, we only implemented the logic for CORE type events. Support for UNCORE PMU events will be
+     * added soon
+     */
+    struct perf_event_attr attr;
+    memset(&attr, 0, sizeof(attr));
+    attr.size = sizeof(struct perf_event_attr);
+    attr.type = this->evt->type;
+    attr.config = this->evt->config;
+    attr.config1 = this->evt->config1;
+    attr.config2 = this->evt->config2;
+
+    /**
+     * We want to set the disabled and inherit bit to collect child processes
+     */
+    attr.disabled = 1;
+    //attr.inherit = 1;
+
+    /**
+     * For now we set the format id bit to implement grouping logic in the future
+     */
+    attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
+    if (groupEnable) {
+        /*
+        * when creating an event group, typically the group leader is initialized with disabled bit set to 1,
+        * and any child events are initialized with disabled bit set to 0. Despite disabled bit being set to 0,
+        * the child events will not start counting until the group leader is enabled.
+        */
+        if (groupFd != -1) {
+            attr.disabled = 0;
+            groupStatus = GroupStatus::GROUP_MEMBER;
+        } else {
+            groupStatus = GroupStatus::GROUP_LEADER;
+        }
+        attr.read_format |= PERF_FORMAT_GROUP;
+        this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0);
+    } else {
+#ifdef IS_X86
+        if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "cpu/")) {
+            this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0);
+#else
+        if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "armv8_")) {
+            this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0);
+#endif
+        } else {
+            if(useBpf){
+                this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, flags);
+            }else {
+                this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, flags);
+            }
+        }
+        groupStatus = GroupStatus::NO_GROUP;
+    }
+    this->groupFd = groupFd;
+    printf("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
+        attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd);
+    if (__glibc_unlikely(this->fd < 0)) {
+        return MapErrno(errno);
+    }
+    return SUCCESS;
+}
+
+/**
+ * Enable
+ */
+int KUNPENG_PMU::PerfCounter::Enable()
+{
+    if (groupFd != -1) {
+        // Only group leader should use ioctl to enable, disable or reset,
+        // otherwise each event in the group will be collected for different durations.
+        return SUCCESS;
+    }
+    int err = PerfEvt::Enable();
+    if (err != SUCCESS) {
+        return err;
+    }
+    this->accumCount.clear();
+    this->enabled = 0;
+    this->running = 0;
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounter::Disable()
+{
+    if (groupFd != -1) {
+        return SUCCESS;
+    }
+    return PerfEvt::Disable();
+}
+
+int KUNPENG_PMU::PerfCounter::Reset()
+{
+    if (groupFd != -1) {
+        return SUCCESS;
+    }
+    return PerfEvt::Reset();
+}
\ No newline at end of file
diff --git a/pmu/perf_counter_default.cpp b/pmu/perf_counter_default.cpp
new file mode 100644
index 0000000..8a92fea
--- /dev/null
+++ b/pmu/perf_counter_default.cpp
@@ -0,0 +1,287 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Mr.Gan
+ * Create: 2024-04-03
+ * Description: implementations for reading performance counters and initializing counting logic in
+ * the KUNPENG_PMU namespace.
+ ******************************************************************************/
+#include <climits>
+#include <poll.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <cstring>
+#include <sys/ioctl.h>
+#include <iostream>
+#include <linux/perf_event.h>
+#include "pmu.h"
+#include "linked_list.h"
+#include "pfm_event.h"
+#include "pmu_event.h"
+#include "pcerr.h"
+#include "log.h"
+#include "perf_counter.h"
+
+using namespace std;
+using namespace pcerr;
+
+static constexpr int MAX_ATTR_SIZE = 120;
+
+struct GroupReadFormat {
+    __u64 nr;
+    __u64 timeEnabled;
+    __u64 timeRunning;
+    struct {
+        __u64 value;
+        __u64 id;
+    } values[];
+};
+
+/**
+ * Read pmu counter and deal with pmu multiplexing
+ * Right now we do not implement grouping logic, thus we ignore the
+ * PERF_FORMAT_ID section for now
+ */
+int KUNPENG_PMU::PerfCounter::Read(vector<PmuData> &data, std::vector<PerfSampleIps> &sampleIps,
+    std::vector<PmuDataExt*> &extPool, std::vector<PmuSwitchData> &swtichData)
+{
+    if (__glibc_unlikely(this->fd < 0)) {
+        this->accumCount.clear();
+        return UNKNOWN_ERROR;
+    }
+
+    if (groupStatus == GroupStatus::NO_GROUP) {
+        return ReadSingleEvent(data);
+    } else if (groupStatus == GroupStatus::GROUP_LEADER) {
+        return ReadGroupEvents(data);
+    }
+
+    // Group members do not need to read counters,
+    // Group leader will read them all.
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector<PmuData> &data)
+{
+    ReadFormat perfCountValue;
+    int len = read(this->fd, &perfCountValue, sizeof(perfCountValue));
+    if (len < 0) {
+        New(UNKNOWN_ERROR, strerror(errno));
+        return UNKNOWN_ERROR;
+    }
+    if (accumCount.empty()) {
+        accumCount.assign(1, 0);
+    }
+    
+    int err = CountValueToData(perfCountValue.value, perfCountValue.timeEnabled,
+                            perfCountValue.timeRunning, accumCount[0], data);
+    if (err != SUCCESS) {
+        return err;
+    }
+
+    this->enabled = perfCountValue.timeEnabled;
+    this->running = perfCountValue.timeRunning;
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector<PmuData> &data)
+{
+    // Fixme:
+    // In current class, we do not know how many events in group.
+    // Then we read for max struct size: nr+timeEnabled+timeRunning+ MAX_GROUP_EVENTS*(value+id)
+    static const unsigned MAX_GROUP_EVENTS = 14;
+    unsigned readSize = sizeof(__u64)*3 + sizeof(__u64)*2*MAX_GROUP_EVENTS;
+    GroupReadFormat *perfCountValue = static_cast<GroupReadFormat*>(malloc(readSize));
+    if (perfCountValue == NULL) {
+        return COMMON_ERR_NOMEM;
+    }
+    int len = read(this->fd, perfCountValue, readSize);
+    if (len < 0) {
+        free(perfCountValue);
+        New(UNKNOWN_ERROR, strerror(errno));
+        return UNKNOWN_ERROR;
+    }
+
+    if (accumCount.empty()) {
+        accumCount.assign(perfCountValue->nr, 0);
+    }
+
+    for (int i = 0;i < accumCount.size(); ++i) {
+        auto err = CountValueToData(perfCountValue->values[i].value,
+                                    perfCountValue->timeEnabled,
+                                    perfCountValue->timeRunning,
+                                    accumCount[i],
+                                    data
+                                    );
+        if (err != SUCCESS) {
+            free(perfCountValue);
+            return err;
+        }
+    }
+
+    this->enabled = perfCountValue->timeEnabled;
+    this->running = perfCountValue->timeRunning;
+    free(perfCountValue);
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 timeEnabled,
+                                                const __u64 timeRunning, __u64 &accumCount, vector<PmuData> &data)
+{
+    if (value < accumCount || timeEnabled < enabled || timeRunning < running) {
+        return LIBPERF_ERR_COUNT_OVERFLOW;
+    }
+
+    // Calculate the diff of count from last read.
+    // In case of multiplexing, we follow the linux documentation for calculating the estimated
+    // counting value (https://perf.wiki.kernel.org/index.php/Tutorial)
+    double percent = 0.0;
+    uint64_t increCount;
+    if ((value == accumCount) || (timeRunning == running)) {
+        percent = -1;
+        increCount = 0;   
+    } else {
+        percent = static_cast<double>(timeEnabled - enabled) / static_cast<double>(timeRunning - running);
+        increCount = static_cast<uint64_t>((value - accumCount)* percent);
+    }
+    accumCount = value;
+
+    data.emplace_back(PmuData{0});
+    auto& current = data.back();
+    current.count = increCount;
+    current.countPercent = 1.0 / percent;
+    current.cpu = this->cpu;
+    current.tid = this->pid;
+    auto findProc = procMap.find(current.tid);
+    if (findProc != procMap.end()) {
+        current.pid = findProc->second->pid;
+    }
+    return SUCCESS;
+}
+
+/**
+ * Initialize counting
+ */
+int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd)
+{
+    if(useBpf){
+        InitPidForEvent();
+        auto findCpuMap = evtCpuMap.find(this->evt->name);
+        if(findCpuMap!=evtCpuMap.end() && findCpuMap->second.find(this->cpu) != findCpuMap->second.end()) {
+            return SUCCESS;
+        }
+    }
+
+    int err = this->MapPerfAttr(groupEnable, groupFd);
+    if(err!=SUCCESS){
+        return err;
+    }
+    if(useBpf){
+        err = InitBpfObj();
+    }
+    return err;
+}
+
+int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int groupFd)
+{
+    /**
+     * For now, we only implemented the logic for CORE type events. Support for UNCORE PMU events will be
+     * added soon
+     */
+    struct perf_event_attr attr;
+    memset(&attr, 0, sizeof(attr));
+    attr.size = sizeof(struct perf_event_attr);
+    attr.type = this->evt->type;
+    attr.config = this->evt->config;
+    attr.config1 = this->evt->config1;
+    attr.config2 = this->evt->config2;
+
+    /**
+     * We want to set the disabled and inherit bit to collect child processes
+     */
+    attr.disabled = 1;
+    //attr.inherit = 1;
+
+    /**
+     * For now we set the format id bit to implement grouping logic in the future
+     */
+    attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
+    if (groupEnable) {
+        /*
+        * when creating an event group, typically the group leader is initialized with disabled bit set to 1,
+        * and any child events are initialized with disabled bit set to 0. Despite disabled bit being set to 0,
+        * the child events will not start counting until the group leader is enabled.
+        */
+
+        if (groupFd != -1) {
+            attr.disabled = 0;
+            groupStatus = GroupStatus::GROUP_MEMBER;
+        } else {
+            groupStatus = GroupStatus::GROUP_LEADER;
+        }
+        attr.read_format |= PERF_FORMAT_GROUP;
+        this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0);
+    } else {
+#ifdef IS_X86
+        if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "cpu/")) {
+            this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0);
+#else
+        if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "armv8_")) {
+            this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0);
+#endif
+        } else {
+            this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, flags);
+        }
+        groupStatus = GroupStatus::NO_GROUP;
+    }
+    this->groupFd = groupFd;
+    printf("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
+        attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd);
+    if (__glibc_unlikely(this->fd < 0)) {
+        return MapErrno(errno);
+    }
+    return SUCCESS;
+}
+
+/**
+ * Enable
+ */
+int KUNPENG_PMU::PerfCounter::Enable()
+{
+    if (groupFd != -1) {
+        // Only group leader should use ioctl to enable, disable or reset,
+        // otherwise each event in the group will be collected for different durations.
+        return SUCCESS;
+    }
+    int err = PerfEvt::Enable();
+    if (err != SUCCESS) {
+        return err;
+    }
+    this->accumCount.clear();
+    this->enabled = 0;
+    this->running = 0;
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounter::Disable()
+{
+    if (groupFd != -1) {
+        return SUCCESS;
+    }
+    return PerfEvt::Disable();
+}
+
+int KUNPENG_PMU::PerfCounter::Reset()
+{
+    if (groupFd != -1) {
+        return SUCCESS;
+    }
+    return PerfEvt::Reset();
+}
\ No newline at end of file
diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp
index 30de9ef..d394a30 100644
--- a/pmu/pmu.cpp
+++ b/pmu/pmu.cpp
@@ -807,10 +807,19 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt
             taskParam->cpuList[i] = pmuEvt->cpuMaskList[i];
         }
     } else if (attr->cpuList == nullptr && attr->pidList != nullptr && pmuEvt->collectType == COUNTING) {
-        // For counting with pid list for system wide, open fd with cpu -1 and specific pid.
-        taskParam->numCpu = 1;
-        taskParam->cpuList = new int[taskParam->numCpu];
-        taskParam->cpuList[0] = -1;
+        if(useBpf) {
+            taskParam->numCpu = MAX_CPU_NUM;
+            taskParam->cpuList = new int[MAX_CPU_NUM];
+            for(int i = 0; i < MAX_CPU_NUM; i++) {
+                taskParam->cpuList[i] = i;
+            }
+        } else {
+            // For counting with pid list for system wide, open fd with cpu -1 and specific pid.
+            taskParam->numCpu = 1;
+            taskParam->cpuList = new int[taskParam->numCpu];
+            taskParam->cpuList[0] = -1;
+        }
+
     } else if (attr->cpuList == nullptr) {
         // For null cpulist, open fd with cpu 0,1,2...max_cpu
         const set<int> &onLineCpus = GetOnLineCpuIds();
diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp
index 9e8feb0..de1868e 100644
--- a/pmu/pmu_list.cpp
+++ b/pmu/pmu_list.cpp
@@ -179,10 +179,10 @@ namespace KUNPENG_PMU {
             return err;
         }
 
-        err = AddToEpollFd(pd, evtList);
-        if (err != SUCCESS) {
-            return err;
-        }
+        // err = AddToEpollFd(pd, evtList);
+        // if (err != SUCCESS) {
+        //     return err;
+        // }
 
         return SUCCESS;
     }
diff --git a/pmu/sched_counter.bpf.c b/pmu/sched_counter.bpf.c
new file mode 100644
index 0000000..76cd700
--- /dev/null
+++ b/pmu/sched_counter.bpf.c
@@ -0,0 +1,93 @@
+#include <bpf/vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char LICENSE SEC("license") = "GPL"; //指定协议  内核验证器
+
+#define MAX_ENTRIES 102400
+
+struct {
+    __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);  //与用户空间实时交互
+    __uint(key_size, sizeof(__u32));
+    __uint(value_size, sizeof(int));
+    __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} events SEC(".maps");
+
+struct {
+    __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+    __uint(key_size, sizeof(__u32));
+    __uint(value_size, sizeof(struct bpf_perf_event_value));
+    __uint(max_entries, 1);
+} prev_readings SEC(".maps");
+
+struct {
+    __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+    __uint(key_size, sizeof(__u32));
+    __uint(value_size, sizeof(struct bpf_perf_event_value));
+    __uint(max_entries, 1024);
+} accum_readings SEC(".maps");
+
+struct {
+    __uint(type, BPF_MAP_TYPE_HASH);
+    __uint(key_size, sizeof(__u32));
+    __uint(value_size, sizeof(__u32));
+    __uint(max_entries, MAX_ENTRIES);
+    __uint(map_flags, BPF_F_NO_PREALLOC);
+} filter SEC(".maps");
+
+//程序的hook点，编写自定义代码逻辑
+SEC("raw_tp/sched_switch")
+int BPF_PROG(on_switch) {
+    __u32 pid;
+    __u32 zero=0;
+    __u32 *accum_key;
+    __u32 cpu = bpf_get_smp_Processor_id();
+    long err;
+    struct bpf_perf_event_value cur_val, *prev_val, *accum_val;
+
+    prev_val = bpf_map_lookup_elem(&prev_readings, &zero);
+    if(!prev_val){
+        bpf_printk("failed to bpf_map_lookup_elem prev_readings.\n");
+        return 0;
+    }
+
+    err = bpf_perf_event_read_value(&events, BPF_F_CURRENT_CPU, &cur_val, sizeof(struct bpf_perf_event_value));
+    if(err){
+         bpf_printk("failed to bpf_event_read_value: %d cpu %d\n", err, cpu);
+        return 0;
+    }
+    pid = bpf_get_current_pid_tgid() & 0xffffffff;
+    accum_key = bpf_map_lookup_elem(&filter, &pid);
+    if (!accunm_key) {
+        return 0;
+    }
+
+    accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
+    if (!accum_val) {
+        *prev_val = cur_val;
+        return 0;
+    }
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags){
+    long err;
+    __u32 new_pid;
+    __u32 parent_pid;
+    __u32 *accum_key;
+    struct bpf_perf_event_value *accum_val;
+
+    parent_pid = bpf_get_current_pid_tgid() & 0xffffffff;
+    new_pid = task->pid;
+
+    bpf_printk("new pid: %d parent: %d\n",new_pid,parent_pid);
+    accum_key = bpf_map_lookup_elem(&filter, &parent_pid);
+    if(!accum_key){
+        return 0;
+    }
+
+    bpf_map_update_elem(&filter, &new_pid, accum_key, BPF_NOEXIST);
+    bpf_printk("add child: %ld accum_key: %ld\n",new_pid, *accum_key);
+    return 0;
+}
\ No newline at end of file
diff --git a/util/common.h b/util/common.h
index caa52a3..3e756eb 100644
--- a/util/common.h
+++ b/util/common.h
@@ -28,6 +28,8 @@
 #error "Only the x86_64 and aarch64 architecture are supported."
 #endif
 
+static bool useBpf = true;
+
 const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/";
 const std::string TRACE_DEBUG_EVENT_PATH = "/sys/kernel/debug/tracing/events/";
 
-- 
Gitee


From 679f33344763d1d81555931eb1f6e9109feddd49 Mon Sep 17 00:00:00 2001
From: wuying39 <921169248@qq.com>
Date: Thu, 31 Jul 2025 14:30:09 +0800
Subject: [PATCH 2/6] revise bpf

---
 build/common.sh               |  4 +-
 pmu/bpf/perf_counter_bpf.cpp  | 64 +++++++++--------------
 pmu/bpf/sched_cgroup.bpf.c    | 97 +++++++++++------------------------
 pmu/bpf/sched_counter.bpf.c   | 27 ++++------
 pmu/pmu.cpp                   | 27 +++-------
 pmu/pmu_event.h               |  1 +
 pmu/pmu_list.cpp              | 54 +++++++++----------
 pmu/pmu_list.h                | 18 ++++---
 test/test_perf/CMakeLists.txt |  3 +-
 util/common.h                 |  9 ++--
 10 files changed, 118 insertions(+), 186 deletions(-)

diff --git a/build/common.sh b/build/common.sh
index d1fc3f1..6dc24f3 100644
--- a/build/common.sh
+++ b/build/common.sh
@@ -103,8 +103,8 @@ function build_skel_files() {
   
   local bpf_file_dir=$1
   local bpf_lib_dir=$2
-  bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}/vmlinux.h"
-  if [ -s "${bpf_lib_dir}vmlinux.h" ]; then
+  bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}local/bpf/vmlinux.h"
+  if [ -s "${bpf_lib_dir}local/bpf/vmlinux.h" ]; then
       echo "The kernel header file generated : $(wc -l < "${VMLINUX_H}")"
   else
       echo "Generate vmlinux.h file failed."
diff --git a/pmu/bpf/perf_counter_bpf.cpp b/pmu/bpf/perf_counter_bpf.cpp
index 03f2f12..ae9714b 100644
--- a/pmu/bpf/perf_counter_bpf.cpp
+++ b/pmu/bpf/perf_counter_bpf.cpp
@@ -183,7 +183,7 @@ int KUNPENG_PMU::PerfBpfCounter::Read(EventData &eventData)
 }
 
 int KUNPENG_PMU::PerfBpfCounter::ReadCgroup(std::vector<PmuData> &data) {
-    auto cgrpName = fdCgrp[pid];
+    auto cgrpName = this->evt->cgroupName;
     if (readCgroups.find(cgrpName) != readCgroups.end()) {
         return SUCCESS;
     }
@@ -212,6 +212,7 @@ int KUNPENG_PMU::PerfBpfCounter::ReadCgroup(std::vector<PmuData> &data) {
         current.countPercent = values[i].running / values[i].enabled;
         current.cpu = i;
         current.tid = this->pid;
+        current.cgroupName = this->evt->cgroupName.c_str();
     }
 
     memset(values, 0, cpuNums * sizeof(bpf_perf_event_value));
@@ -244,40 +245,19 @@ int KUNPENG_PMU::PerfBpfCounter::InitPidForEvent()
     // initialize the cumulative pmu count for this pid
     struct bpf_perf_event_value evtVal[MAX_CPU_NUM];
 
-    if (!this->evt->cgroupName.empty()){
-        string cgroupPath = GetCgroupPath(this->evt->cgroupName);
-        auto pids = GetCgroupPids(cgroupPath);
-        for (pid_t pid : pids) {
-            memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value));
-            int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, 
-                                                sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST);
-            if(err){
-                printf("failed to bpf_map__update_elem event value in libbpf_print_fn. err: d \n", err);
-                return -1;
-            }
-
-            // initialize the filter, build the map relationship of pid and accum_key
-            err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST);
-            if(err){
-                printf("failed to bpf_map__update_elem filter. err: %d \n", err);
-                return -1;
-            }
-        }
-    } else {
-        memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value));
-        int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, 
-                                            sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST);
-        if(err){
-            printf("failed to bpf_map__update_elem event value in libbpf_print_fn. err: d \n", err);
-            return -1;
-        }
+    memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value));
+    int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, 
+                                        sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST);
+    if(err){
+        printf("failed to bpf_map__update_elem event value in libbpf_print_fn. err: d \n", err);
+        return -1;
+    }
 
-        // initialize the filter, build the map relationship of pid and accum_key
-        err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST);
-        if(err){
-            printf("failed to bpf_map__update_elem filter. err: %d \n", err);
-            return -1;
-        }
+    // initialize the filter, build the map relationship of pid and accum_key
+    err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST);
+    if(err){
+        printf("failed to bpf_map__update_elem filter. err: %d \n", err);
+        return -1;
     }
 
     printf("InitPidForEvent: %d\n", pid);
@@ -364,6 +344,7 @@ int KUNPENG_PMU::PerfBpfCounter::InitBpfCgroupObj()
         }
 
         obj->rodata->num_cpus = MAX_CPU_NUM;
+        obj->rodata->num_events = this->evt->numEvent;
 
         err = bpf_map__set_max_entries(obj->maps.events, MAX_ENTITES);
         if(err){
@@ -395,12 +376,8 @@ int KUNPENG_PMU::PerfBpfCounter::InitBpfCgroupObj()
             return -1;
         }
 
-        if (CheckCgroupV2()) {
-            obj->bss->use_cgroup_v2 = 1;
-        }
-
         err = sched_cgroup_bpf__attach(obj);
-        if(err){
+        if(err) {
             printf("failed to bpf attach");
             return -1;
         }
@@ -415,8 +392,15 @@ int KUNPENG_PMU::PerfBpfCounter::InitBpfCgroupObj()
         evtIdxMap[evt->name] = evtIdx;
         evtIdx++;
     }
+    int evtKey = evtIdxMap[evt->name] * MAX_CPU_NUM + cpu;
+    err = bpf_map__update_elem(cgrpCounter->maps.events, &evtKey, sizeof(__u32),
+                                &this->fd, sizeof(int), BPF_ANY);
+    if(err){
+        printf("failed to update elem. err %s cpu %d fd %d\n", strerror(-err), cpu, fd);
+        return -1;
+    }
 
-    string cgrpName = fdCgrp[this->pid];
+    string cgrpName = this->evt->cgroupName;
     auto findCgrp = cgroups.find(cgrpName);
     if(findCgrp == cgroups.end()) {
         uint64_t cgrpId = ReadCgroupId(cgrpName);
diff --git a/pmu/bpf/sched_cgroup.bpf.c b/pmu/bpf/sched_cgroup.bpf.c
index accfafe..08183da 100644
--- a/pmu/bpf/sched_cgroup.bpf.c
+++ b/pmu/bpf/sched_cgroup.bpf.c
@@ -51,10 +51,7 @@ struct {
 const volatile __u32 num_events = 1;
 const volatile __u32 num_cpus = 1;
 
-int enabled = 0;
-int use_cgroup_v2 = 0;
-
-static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
+static inline int get_cgroup_idx(__u32 *cgrps, int size)
 {
 	struct task_struct *p = (void *)bpf_get_current_task();
 	struct cgroup *cgrp;
@@ -65,6 +62,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
 
 	cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_event_cgrp_id], cgroup);
 	level = BPF_CORE_READ(cgrp, level);
+	bpf_printk("perf_event_cgrp_id %d cgrp_id %d level %d", perf_event_cgrp_id, cgrp, level);
 
 	for (cnt = 0; i < MAX_LEVELS; i++) {
 		__u64 cgrp_id;
@@ -80,9 +78,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
 			bpf_printk("cannot get ancestor_ids");
 			return 0;
 		}
-        if (cgrp_id==33) {
-            bpf_printk("print before level: %d cgrp id: %ld\n", i ,cgrp_id);
-        }
+		bpf_printk("cgrp_id %d level %d", cgrp_id, level);
 		elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
 		if (!elem) {
             continue;
@@ -93,35 +89,10 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
             break;
         }
 	}
-
+	bpf_printk("cnt %d", cnt);
 	return cnt;
 }
 
-static inline int get_cgroup_v2_idx(__u32 *cgrps, int size)
-{
-	// register int i = 0;
-	// __u32 *elem;
-	// int cnt;
-
-	// for (cnt = 0; i < MAX_LEVELS; i++) {
-	// 	__u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i);
-
-	// 	if (cgrp_id == 0)
-	// 		break;
-
-	// 	// convert cgroup-id to a map index
-	// 	elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
-	// 	if (!elem)
-	// 		continue;
-
-	// 	cgrps[cnt++] = *elem;
-	// 	if (cnt == size)
-	// 		break;
-	// }
-
-	return 0;
-}
-
 static int bperf_cgroup_count(void)
 {
 	register __u32 idx = 0;  // to have it in a register to pass BPF verifier
@@ -133,13 +104,7 @@ static int bperf_cgroup_count(void)
 	__u32 key, cgrp;
 	long err;
 
-	if (use_cgroup_v2) {
-		bpf_printk("-------cgroup v2--------------");
-		cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS);
-	} else {
-		bpf_printk("-------cgroup v1--------------");
-		cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS);
-	}
+	cgrp_cnt = get_cgroup_idx(cgrp_idx, MAX_LEVELS);
 
 	for ( ; idx < MAX_EVENTS; idx++) {
 		bpf_printk("idx: %d num_events: %d", idx, num_events);
@@ -161,7 +126,7 @@ static int bperf_cgroup_count(void)
 				return 0;
 			}
 		}
-
+		bpf_printk("prev_val counting: %ld prev_val enabled: %ld\n", prev_val->counter, prev_val->enabled);
 		// read from global perf_event array
 		key = idx * num_cpus + cpu;
 		bpf_printk("key: %d", key);
@@ -171,34 +136,30 @@ static int bperf_cgroup_count(void)
 			continue;
 		}
 
-		if (enabled) {
-			delta.counter = val.counter - prev_val->counter;
-			delta.enabled = val.enabled - prev_val->enabled;
-			delta.running = val.running - prev_val->running;
-
-			for (c = 0; c < MAX_LEVELS; c++) {
-				if (c == cgrp_cnt)
-					break;
-
-				cgrp = cgrp_idx[c];
-                bpf_printk("c: %d cgrp: %d", c, cgrp);
-				// aggregate the result by cgroup
-				key = cgrp * num_events + idx;
-				cgrp_val = bpf_map_lookup_elem(&cgrp_readings, &key);
-				if (cgrp_val) {
-					cgrp_val->counter += delta.counter;
-					cgrp_val->enabled += delta.enabled;
-					cgrp_val->running += delta.running;
-					bpf_printk("cgrp_val counting: %ld cgrp_val counting: %ld\n", cgrp_val->counter, cgrp_val->counter);
-				} else {
-					bpf_printk("!cgrp_val");
-					bpf_map_update_elem(&cgrp_readings, &key,
-							    &delta, BPF_ANY);
-				}
+		delta.counter = val.counter - prev_val->counter;
+		delta.enabled = val.enabled - prev_val->enabled;
+		delta.running = val.running - prev_val->running;
+		bpf_printk("val counting: %ld val enabled: %ld\n", val.counter, val.enabled);
+		bpf_printk("delta counting: %ld delta enabled: %ld\n", delta.counter, delta.enabled);
+		for (c = 0; c < MAX_LEVELS; c++) {
+			if (c == cgrp_cnt)
+				break;
+
+			cgrp = cgrp_idx[c];
+			// aggregate the result by cgroup
+			key = cgrp * num_events + idx;
+			bpf_printk("c: %d cgrp: %d key: %d", c, cgrp, key);
+			cgrp_val = bpf_map_lookup_elem(&cgrp_readings, &key);
+			if (cgrp_val) {
+				cgrp_val->counter += delta.counter;
+				cgrp_val->enabled += delta.enabled;
+				cgrp_val->running += delta.running;
+				bpf_printk("cgrp_val counting: %ld cgrp_val counting: %ld\n", cgrp_val->counter, cgrp_val->counter);
+			} else {
+				bpf_printk("!cgrp_val");
+				bpf_map_update_elem(&cgrp_readings, &key,
+							&delta, BPF_ANY);
 			}
-		} else {
-			bpf_printk("not enabled");
-			return 0;
 		}
 
 		*prev_val = val;
diff --git a/pmu/bpf/sched_counter.bpf.c b/pmu/bpf/sched_counter.bpf.c
index 0bf418c..917cb01 100644
--- a/pmu/bpf/sched_counter.bpf.c
+++ b/pmu/bpf/sched_counter.bpf.c
@@ -1,21 +1,22 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2021 Facebook
+// Copyright (c) 2021 Google
 #include "bpf/vmlinux.h"
 #include "bpf/bpf_core_read.h"
 #include "bpf/bpf_tracing.h"
 #include "bpf/bpf_helpers.h"
 
-char LICENSE[] SEC("license") = "GPL";  //license for kernel verification
+char LICENSE[] SEC("license") = "GPL";
 
 #define MAX_ENTRIES 102400
 
-// system pmu count. key: pid, value : count of each core
 struct {
-    __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);  // map for perfbuffer
+    __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
     __uint(key_size, sizeof(__u32));
     __uint(value_size, sizeof(int));
     __uint(map_flags, BPF_F_PRESERVE_ELEMS);
 } events SEC(".maps");
 
-//system pmu count at last time sched_switch was triggered
 struct {
     __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
     __uint(key_size, sizeof(__u32));
@@ -23,8 +24,6 @@ struct {
     __uint(max_entries, 1);
 } prev_readings SEC(".maps");
 
-// accumulated pmu count of pid. key: accum_key, value: count of each core
-// If the pid spawns a child process/thread, both use the same accum key and their pmu events sum under this pid's profile
 struct {
     __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
     __uint(key_size, sizeof(__u32));
@@ -32,7 +31,6 @@ struct {
     __uint(max_entries, 1024);
 } accum_readings SEC(".maps");
 
-// check whether to record pmu value. key: pid, value: accum_key
 struct {
     __uint(type, BPF_MAP_TYPE_HASH);
     __uint(key_size, sizeof(__u32));
@@ -43,11 +41,10 @@ struct {
 
 SEC("raw_tp/sched_switch")
 int BPF_PROG(on_switch) {
-    bpf_printk("----------------sched_switch------------");
     __u32 pid;
-    __u32 zero = 0;
+    __u32 zero=0;
     __u32 *accum_key;
-    __u32 cpu = bpf_get_smp_processor_id();  // get current cpu
+    __u32 cpu = bpf_get_smp_processor_id();
     long err;
     struct bpf_perf_event_value cur_val, *prev_val, *accum_val;
 
@@ -57,25 +54,19 @@ int BPF_PROG(on_switch) {
         return 0;
     }
 
-    // get pmu value by API of bpf
     err = bpf_perf_event_read_value(&events, BPF_F_CURRENT_CPU, &cur_val, sizeof(struct bpf_perf_event_value));
     if(err){
          bpf_printk("failed to bpf_event_read_value: %d cpu %d\n", err, cpu);
         return 0;
     }
-
-    pid = bpf_get_current_pid_tgid() & 0xffffffff;  // get current pid
-    bpf_printk("bpf_get_current_pid_tgid: %ld pid: %d cpu: %d\n", bpf_get_current_pid_tgid(), pid, cpu);
-
+    pid = bpf_get_current_pid_tgid() & 0xffffffff;
     accum_key = bpf_map_lookup_elem(&filter, &pid);
-    if (!accum_val) {
-        bpf_printk("!accum_key\n");
+    if (!accum_key) {
         return 0;
     }
 
     accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
     if (!accum_val) {
-        bpf_printk("!accum_val\n");
         *prev_val = cur_val;
         return 0;
     }
diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp
index 8784d37..3389a54 100644
--- a/pmu/pmu.cpp
+++ b/pmu/pmu.cpp
@@ -335,8 +335,6 @@ static void CopyAttrData(PmuAttr* newAttr, PmuAttr* inputAttr, enum PmuTaskType
     }
 }
 
-std::map<int, std::string> fdCgrp;
-
 static bool FreeEvtAttr(struct PmuAttr *attr)
 {
     if (attr->evtAttr == nullptr) {
@@ -909,23 +907,13 @@ int GetCgroupFd(std::string& cgroupName) {
 static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int groupId, const char* cgroupName, int cgroupFd)
 {
     unique_ptr<PmuTaskAttr, void (*)(PmuTaskAttr*)> taskParam(CreateNode<struct PmuTaskAttr>(), PmuTaskAttrFree);
-
-    if(attr->cgroupNameList) {
-        taskParam->numPid = 1;
-        taskParam->pidList = new int[1];
-        string cgroupNameStr = cgroupName;
-        taskParam->pidList[0] = GetCgroupFd(cgroupNameStr);
-        fdCgrp[taskParam->pidList[0]] = cgroupNameStr;
-        std::cout<<"pid: "<< taskParam->pidList[0]<< "cgroupName: "<<cgroupNameStr<<std::endl;
-    } else {
-        /**
-         * Assign pids to collect
-         */
-        taskParam->numPid = attr->numPid;
-        taskParam->pidList = new int[attr->numPid];
-        for (int i = 0; i < attr->numPid; i++) {
-            taskParam->pidList[i] = attr->pidList[i];
-        }
+    /**
+     * Assign pids to collect
+     */
+    taskParam->numPid = attr->numPid;
+    taskParam->pidList = new int[attr->numPid];
+    for (int i = 0; i < attr->numPid; i++) {
+        taskParam->pidList[i] = attr->pidList[i];
     }
 
     PmuEvt* pmuEvt = nullptr;
@@ -968,6 +956,7 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att
     if (cgroupName != nullptr) {
         taskParam->pmuEvt->cgroupName = cgroupName;
     }
+    taskParam->pmuEvt->numEvent = attr->numEvt;
 
     return taskParam.release();
 }
diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h
index 557b7a8..88aed9d 100644
--- a/pmu/pmu_event.h
+++ b/pmu/pmu_event.h
@@ -51,6 +51,7 @@ struct PmuEvt {
     unsigned includeNewFork : 1; // count new fork tid
     int cgroupFd;
     std::string cgroupName;
+    unsigned numEvent;
 };
 
 namespace KUNPENG_PMU {
diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp
index f41aed6..b046cc1 100644
--- a/pmu/pmu_list.cpp
+++ b/pmu/pmu_list.cpp
@@ -151,16 +151,16 @@ namespace KUNPENG_PMU {
             fdNum += CalRequireFd(cpuTopoList.size(), procTopoList.size(), taskParam->pmuEvt->collectType);
             needBytesNum += PredictRequiredMemory(taskParam->pmuEvt->collectType, cpuTopoList.size(), procTopoList.size());
 
-            if(useBpf){
+            #ifdef BPF_ENABLED
                 std::shared_ptr<EvtBpfList> evtList =
-                    std::make_shared<EvtBpfList>(cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt);
-                InsertBpfEvtList(pd, evtList);
-            }else {
+                                std::make_shared<EvtBpfList>(cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt);
+                            InsertBpfEvtList(pd, evtList);
+            #else
                 std::shared_ptr<EvtDefaultList> evtList =
                     std::make_shared<EvtDefaultList>(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId);
                 evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd));
                 InsertEvtList(pd, evtList);
-            }
+            #endif
             pmuTaskAttrHead = pmuTaskAttrHead->next;
         }
 
@@ -194,6 +194,7 @@ namespace KUNPENG_PMU {
         return SUCCESS;
     }
 
+#ifdef BPF_ENABLED
     int PmuList::EvtBpfInit(const bool groupEnable, const std::shared_ptr<EvtBpfList> evtLeader, const int pd, const std::shared_ptr<EvtBpfList> &evtList, bool isMemoryEnough)
     {
         auto err = evtList->Init(groupEnable, evtLeader, isMemoryEnough);
@@ -203,11 +204,24 @@ namespace KUNPENG_PMU {
         return SUCCESS;
     }
 
+    void PmuList::InsertBpfEvtList(const unsigned pd, std::shared_ptr<EvtBpfList> evtList)
+    {
+        lock_guard<mutex> lg(pmuListMtx);
+        pmuBpfList[pd].push_back(evtList);
+    }
+
+    std::vector<std::shared_ptr<EvtBpfList>>& PmuList::GetBpfEvtList(const unsigned pd)
+    {
+        lock_guard<mutex> lg(pmuListMtx);
+        return pmuBpfList[pd];
+    }
+#endif
+
     int PmuList::Init(const int pd, bool isMemoryEnough)
     {
         std::unordered_map<int, struct EventGroupInfo> eventGroupInfoMap;
 
-        if(useBpf) {
+        #ifdef BPF_ENABLED
             for (auto& evtList : GetBpfEvtList(pd)) {
                 auto err = EvtBpfInit(false, nullptr, pd, evtList, isMemoryEnough);
                 if (err != SUCCESS) {
@@ -215,7 +229,7 @@ namespace KUNPENG_PMU {
                 }
                 continue;
             }
-        }
+        #endif
 
         for (auto& evtList : GetEvtList(pd)) {
             if (evtList->GetGroupId() == -1) {
@@ -270,7 +284,7 @@ namespace KUNPENG_PMU {
 
     int PmuList::Start(const int pd)
     {
-        if(useBpf) {
+        #ifdef BPF_ENABLED
             auto pmuList = GetBpfEvtList(pd);
             for (auto item: pmuList) {
                 auto err = item->Start();
@@ -278,7 +292,7 @@ namespace KUNPENG_PMU {
                     return err;
                 }
             }
-        } else {
+        #else
             auto pmuList = GetEvtList(pd);
             for (auto item: pmuList) {
                 auto err = item->Start();
@@ -286,8 +300,7 @@ namespace KUNPENG_PMU {
                     return err;
                 }
             }
-        }
-
+        #endif
         return SUCCESS;
     }
 
@@ -440,7 +453,7 @@ namespace KUNPENG_PMU {
         evtData.pd = pd;
         evtData.collectType = static_cast<PmuTaskType>(GetTaskType(pd));
         auto ts = GetCurrentTime();
-        if(useBpf) {
+        #ifdef BPF_ENABLED
             auto eventList = GetBpfEvtList(pd);
             for (auto item: eventList) {
                 item->SetTimeStamp(ts);
@@ -449,7 +462,7 @@ namespace KUNPENG_PMU {
                     return err;
                 }
             }
-        } else {
+        #else
             auto eventList = GetEvtList(pd);
             for (auto item: eventList) {
                 item->SetTimeStamp(ts);
@@ -458,8 +471,7 @@ namespace KUNPENG_PMU {
                     return err;
                 }
             }
-        }
-
+        #endif
 
         return SUCCESS;
     }
@@ -626,18 +638,6 @@ namespace KUNPENG_PMU {
         return pmuList[pd];
     }
 
-    void PmuList::InsertBpfEvtList(const unsigned pd, std::shared_ptr<EvtBpfList> evtList)
-    {
-        lock_guard<mutex> lg(pmuListMtx);
-        pmuBpfList[pd].push_back(evtList);
-    }
-
-    std::vector<std::shared_ptr<EvtBpfList>>& PmuList::GetBpfEvtList(const unsigned pd)
-    {
-        lock_guard<mutex> lg(pmuListMtx);
-        return pmuBpfList[pd];
-    }
-
     void PmuList::EraseEvtList(const unsigned pd)
     {
         lock_guard<mutex> lg(pmuListMtx);
diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h
index 30a7091..b5e7dc8 100644
--- a/pmu/pmu_list.h
+++ b/pmu/pmu_list.h
@@ -23,8 +23,9 @@
 #include "evt_list.h"
 #include "pmu_event.h"
 #include "evt_list_default.h"
-#include "bpf/evt_list_bpf.h"
-
+#ifdef BPF_ENABLED
+    #include "bpf/evt_list_bpf.h"
+#endif
 namespace KUNPENG_PMU {
 
 struct PmuTaskAttr {
@@ -94,17 +95,20 @@ private:
     void InsertEvtList(const unsigned pd, std::shared_ptr<EvtDefaultList> evtList);
     std::vector<std::shared_ptr<EvtDefaultList>>& GetEvtList(const unsigned pd);
 
-    void InsertBpfEvtList(const unsigned pd, std::shared_ptr<EvtBpfList> evtList);
-    std::vector<std::shared_ptr<EvtBpfList>>& GetBpfEvtList(const unsigned pd);
-
     void EraseEvtList(const unsigned pd);
     void EraseParentEventMap(const unsigned pd);
     void EraseSymModeList(const unsigned pd);
     void ErasePpidList(const unsigned pd);
 
     int EvtDefaultInit(const bool groupEnable, const std::shared_ptr<EvtDefaultList> evtLeader, const int pd, const std::shared_ptr<EvtDefaultList> &evtList, bool isMemoryEnough);
+
+#ifdef BPF_ENABLED
+    void InsertBpfEvtList(const unsigned pd, std::shared_ptr<EvtBpfList> evtList);
+    std::vector<std::shared_ptr<EvtBpfList>>& GetBpfEvtList(const unsigned pd);
     int EvtBpfInit(const bool groupEnable, const std::shared_ptr<EvtBpfList> evtLeader, const int pd, const std::shared_ptr<EvtBpfList> &evtList, bool isMemoryEnough);
-    
+    std::unordered_map<unsigned, std::vector<std::shared_ptr<EvtBpfList>>> pmuBpfList;
+#endif
+
     int Init(const int pd, bool isMemoryEnough);
 
     void InsertDataEvtGroupList(const unsigned pd, groupMapPtr evtGroupList);
@@ -147,7 +151,7 @@ private:
     static std::mutex dataEvtGroupListMtx;
     static std::mutex dataParentMtx;
     std::unordered_map<unsigned, std::vector<std::shared_ptr<EvtDefaultList>>> pmuList;
-    std::unordered_map<unsigned, std::vector<std::shared_ptr<EvtBpfList>>> pmuBpfList;
+
     // Key: pd
     // Value: PmuData List.
     // PmuData is stored here before user call <read>.
diff --git a/test/test_perf/CMakeLists.txt b/test/test_perf/CMakeLists.txt
index 1eb6f63..e28f490 100644
--- a/test/test_perf/CMakeLists.txt
+++ b/test/test_perf/CMakeLists.txt
@@ -4,11 +4,12 @@ include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu)
 include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/pfm)
 include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/analyzer/metric)
 include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/decoder)
+include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/bpf)
 include_directories(${PROJECT_TOP_DIR}/include)
 add_compile_options(-g)
 set(CMAKE_CXX_STANDARD 14)
 aux_source_directory(. SOURCE_SRC)
 add_executable(test_perf ${SOURCE_SRC} ${CMAKE_CURRENT_LIST_DIR}/../../util/pcerr.cpp)
-target_link_libraries(test_perf sym kperf gtest m gtest_main elf_static dwarf_static pthread -g)
+target_link_libraries(test_perf sym kperf gtest m gtest_main elf_static dwarf_static pthread bpf -g)
 
 add_subdirectory(case)
diff --git a/util/common.h b/util/common.h
index c395f0b..365ad78 100644
--- a/util/common.h
+++ b/util/common.h
@@ -29,12 +29,13 @@
 #error "Only the x86_64 and aarch64 architecture are supported."
 #endif
 
-static bool useBpf = false;
-#ifdef USE_BPF
-    useBpf = true;
+static bool useBpf =
+#ifdef BPF_ENABLED
+    true;
+#else
+    false;
 #endif
 
-extern std::map<int, std::string> fdCgrp;
 #define CGROUP2_SUPER_MAGIC 0x63677270
 
 const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/";
-- 
Gitee


From fa87de82fc4798581926764b2e74be07f7e6b90e Mon Sep 17 00:00:00 2001
From: wuying39 <921169248@qq.com>
Date: Wed, 6 Aug 2025 10:35:56 +0800
Subject: [PATCH 3/6] bpf dynamic

---
 include/pmu.h          |  2 ++
 pmu/bpf/evt_list_bpf.h |  5 +++++
 pmu/pmu.cpp            |  4 ++--
 pmu/pmu_event.h        |  1 +
 pmu/pmu_list.cpp       | 16 ++++++----------
 util/common.h          |  7 -------
 6 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/include/pmu.h b/include/pmu.h
index 89aed63..cf2d858 100644
--- a/include/pmu.h
+++ b/include/pmu.h
@@ -176,6 +176,8 @@ struct PmuAttr {
 
     // enable user access counting for current process
     unsigned enableUserAccess : 1;
+    //enable bpf collecting for counting mode
+    unsigned useBpf : 1;
 };
 
 enum PmuTraceType {
diff --git a/pmu/bpf/evt_list_bpf.h b/pmu/bpf/evt_list_bpf.h
index 482c37d..10ae130 100644
--- a/pmu/bpf/evt_list_bpf.h
+++ b/pmu/bpf/evt_list_bpf.h
@@ -66,6 +66,11 @@ public:
         return pmuEvt->collectType;
     }
 
+    int GetUseBpf() const
+    {
+        return pmuEvt.useBpf;
+    }
+
 private:
     using PerfEvtPtr = std::shared_ptr<KUNPENG_PMU::PerfEvt>;
     std::unordered_map<pid_t, ProcPtr> procMap;
diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp
index 8858db0..2442e85 100644
--- a/pmu/pmu.cpp
+++ b/pmu/pmu.cpp
@@ -913,8 +913,8 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt
         for(int i = 0; i < pmuEvt->cpuMaskList.size(); i++) {
             taskParam->cpuList[i] = pmuEvt->cpuMaskList[i];
         }
-    } else if (attr->cpuList == nullptr && (attr->pidList != nullptr || attr->cgroupNameList) && pmuEvt->collectType == COUNTING) {
-        if(useBpf || attr->cgroupNameList) {
+    } else if (attr->cpuList == nullptr && attr->pidList != nullptr && pmuEvt->collectType == COUNTING) { //(attr->pidList != nullptr || attr->cgroupNameList)
+        if(attr->useBpf) { //|| attr->cgroupNameList
             taskParam->numCpu = MAX_CPU_NUM;
             taskParam->cpuList = new int[MAX_CPU_NUM];
             for(int i = 0; i < MAX_CPU_NUM; i++) {
diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h
index 520d057..f9dbbdf 100644
--- a/pmu/pmu_event.h
+++ b/pmu/pmu_event.h
@@ -52,6 +52,7 @@ struct PmuEvt {
     int cgroupFd;
     std::string cgroupName;
     unsigned numEvent;
+    unsigned useBpf : 1;         // bpf mode for counting
 };
 
 namespace KUNPENG_PMU {
diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp
index 9d59dad..4695352 100644
--- a/pmu/pmu_list.cpp
+++ b/pmu/pmu_list.cpp
@@ -88,7 +88,7 @@ namespace KUNPENG_PMU {
             fdNum += CalRequireFd(cpuTopoList.size(), procTopoList.size(), taskParam->pmuEvt->collectType);
 
             #ifdef BPF_ENABLED
-                if (taskParam->pmuEvt->collectType == COUNTING) {
+                if (taskParam->pmuEvt->collectType == COUNTING && taskParam->pmuEvt.useBpf) {
                     std::shared_ptr<EvtBpfList> evtList =
                             std::make_shared<EvtBpfList>(cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt);
                     InsertBpfEvtList(pd, evtList);
@@ -1023,10 +1023,8 @@ namespace KUNPENG_PMU {
             int masterPid = pmuTaskAttrHead->pidList[i];
             int numChild = 0;
             int* childTidList = GetChildTid(masterPid, &numChild);
-            if (!useBpf || pmuTaskAttrHead->pmuEvt->cgroupName.empty()) {
-                if (childTidList == nullptr) {
-                    return LIBPERF_ERR_INVALID_PID;
-                }
+            if (childTidList == nullptr) {
+                return LIBPERF_ERR_INVALID_PID;
             }
             bool foundProc = false;
             for (int j = 0; j < numChild; j++) {
@@ -1041,11 +1039,9 @@ namespace KUNPENG_PMU {
                 procTopoList.emplace_back(shared_ptr<ProcTopology>(procTopo, FreeProcTopo));
             }
             delete[] childTidList;
-            if (!useBpf || pmuTaskAttrHead->pmuEvt->cgroupName.empty()) {
-                if (!foundProc) {
-                    New(LIBPERF_ERR_FAIL_GET_PROC, "process not found: " + std::to_string(pmuTaskAttrHead->pidList[i]));
-                    return LIBPERF_ERR_FAIL_GET_PROC;
-                }
+            if (!foundProc) {
+                New(LIBPERF_ERR_FAIL_GET_PROC, "process not found: " + std::to_string(pmuTaskAttrHead->pidList[i]));
+                return LIBPERF_ERR_FAIL_GET_PROC;
             }
         }
         return SUCCESS;
diff --git a/util/common.h b/util/common.h
index 78e1584..98dec4e 100644
--- a/util/common.h
+++ b/util/common.h
@@ -31,13 +31,6 @@
 #error "Only the x86_64, aarch64, and riscv64 architectures are supported."
 #endif
 
-static bool useBpf =
-#ifdef BPF_ENABLED
-    true;
-#else
-    false;
-#endif
-
 #define CGROUP2_SUPER_MAGIC 0x63677270
 
 const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/";
-- 
Gitee


From f7051b75e8cf1363ba00ffcf56f8609dc21d4066 Mon Sep 17 00:00:00 2001
From: wuying39 <921169248@qq.com>
Date: Wed, 6 Aug 2025 15:07:37 +0800
Subject: [PATCH 4/6] revise

---
 build/common.sh              |   2 +-
 pmu/bpf/evt_list_bpf.cpp     |  95 ++++++++++++--------------
 pmu/bpf/evt_list_bpf.h       |  13 ++--
 pmu/bpf/perf_counter_bpf.cpp | 129 ++++++++++++-----------------------
 pmu/bpf/perf_counter_bpf.h   |  25 ++-----
 pmu/bpf/sched_cgroup.bpf.c   |  13 ++--
 pmu/bpf/sched_counter.bpf.c  |   8 +--
 pmu/perf_counter.h           |   1 +
 pmu/perf_counter_default.cpp |   2 +-
 9 files changed, 110 insertions(+), 178 deletions(-)

diff --git a/build/common.sh b/build/common.sh
index 6dc24f3..4f725fc 100644
--- a/build/common.sh
+++ b/build/common.sh
@@ -105,7 +105,7 @@ function build_skel_files() {
   local bpf_lib_dir=$2
   bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}local/bpf/vmlinux.h"
   if [ -s "${bpf_lib_dir}local/bpf/vmlinux.h" ]; then
-      echo "The kernel header file generated : $(wc -l < "${VMLINUX_H}")"
+      echo "The kernel header file generated."
   else
       echo "Generate vmlinux.h file failed."
   fi
diff --git a/pmu/bpf/evt_list_bpf.cpp b/pmu/bpf/evt_list_bpf.cpp
index 957ea19..7e1cc0b 100644
--- a/pmu/bpf/evt_list_bpf.cpp
+++ b/pmu/bpf/evt_list_bpf.cpp
@@ -39,11 +39,7 @@ int KUNPENG_PMU::EvtBpfList::CollectorDoTask(PerfEvtPtr collector, int task)
         case RESET:
             return collector->Reset();
         case CLOSE: {
-            auto ret = collector->Close();
-            if (ret == SUCCESS) {
-                fdList.erase(collector->GetFd());
-            }
-            return ret;
+            return collector->Close();
         }
         default:
             return UNKNOWN_ERROR;
@@ -58,41 +54,44 @@ int KUNPENG_PMU::EvtBpfList::Init(const bool groupEnable, const std::shared_ptr<
             procMap[proc->tid] = proc;
         }
     }
-    for (unsigned int row = 0; row < numCpu; row++) {
+
+    for (unsigned int cpu = 0; cpu < numCpu; cpu++) {
         int resetOutPutFd = -1;
-        std::vector<PerfEvtPtr> evtVec{};
         PerfEvtPtr perfEvt;
-        for(unsigned int col=0;col<numPid;col++){
-            perfEvt = std::make_shared<KUNPENG_PMU::PerfBpfCounter>(this->cpuList[row]->coreId, this->pidList[col]->tid, this->pmuEvt.get(), procMap);
-            if (perfEvt == nullptr) {
-                continue;
-            }
-
-            if (!evtVec.empty()) {
-                resetOutPutFd = evtVec[0]->GetFd();
-            }
-
-            int err = 0;
-            err = perfEvt->Init(groupEnable, -1, resetOutPutFd);
-            fdList.insert(perfEvt->GetFd());
-            evtVec.emplace_back(perfEvt);
+
+        perfEvt = std::make_shared<KUNPENG_PMU::PerfBpfCounter>(this->cpuList[cpu]->coreId, -1, this->pmuEvt.get(), procMap);
+        if (perfEvt == nullptr) {
+            continue;
         }
 
-        this->xyCounterArray.emplace_back(evtVec);
-        this->xyCounterArray[row].emplace_back(perfEvt);
+        int err = 0;
+        err = perfEvt->Init(groupEnable, -1, resetOutPutFd);
+        this->cpuCounterArray.emplace_back(perfEvt);
+    }
+
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        int resetOutPutFd = -1;
+        PerfEvtPtr perfEvt;
+
+        perfEvt = std::make_shared<KUNPENG_PMU::PerfBpfCounter>(-1, this->pidList[pid]->tid, this->pmuEvt.get(), procMap);
+        if (perfEvt == nullptr) {
+            continue;
+        }
+
+        int err = 0;
+        err = perfEvt->Init(groupEnable, -1, resetOutPutFd);
+        this->pidCounterArray.emplace_back(perfEvt);
     }
     return SUCCESS;
 }
 
-int KUNPENG_PMU::EvtBpfList::CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task)
+int KUNPENG_PMU::EvtBpfList::CollectorTaskArrayDoTask(std::vector<PerfEvtPtr>& taskArray, int task)
 {
     std::unique_lock<std::mutex> lock(mutex);
-    for (auto row: xyArray) {
-        for (auto evt: row) {
-            auto err = CollectorDoTask(evt, task);
-            if (err != SUCCESS) {
-                return err;
-            }
+    for (auto evt: taskArray) {
+        auto err = CollectorDoTask(evt, task);
+        if (err != SUCCESS) {
+            return err;
         }
     }
     this->prevStat = this->evtStat;
@@ -102,22 +101,22 @@ int KUNPENG_PMU::EvtBpfList::CollectorXYArrayDoTask(std::vector<std::vector<Perf
 
 int KUNPENG_PMU::EvtBpfList::Start()
 {
-    return CollectorXYArrayDoTask(this->xyCounterArray, START);
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, START);
 }
 
 int KUNPENG_PMU::EvtBpfList::Enable()
 {
-    return CollectorXYArrayDoTask(this->xyCounterArray, ENABLE);
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, ENABLE);
 }
 
 int KUNPENG_PMU::EvtBpfList::Stop()
 {
-    return CollectorXYArrayDoTask(this->xyCounterArray, STOP);
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, STOP);
 }
 
 int KUNPENG_PMU::EvtBpfList::Close()
 {
-    auto ret = CollectorXYArrayDoTask(this->xyCounterArray, CLOSE);
+    auto ret = CollectorTaskArrayDoTask(this->cpuCounterArray, CLOSE);
     if (ret != SUCCESS) {
         return ret;
     }
@@ -130,38 +129,34 @@ int KUNPENG_PMU::EvtBpfList::Read(EventData &eventData)
 {
     std::unique_lock<std::mutex> lg(mutex);
 
-    for (unsigned int row = 0; row < numCpu; row++) {
-        for (unsigned int col = 0; col < numPid; col++) {
-            int err = this->xyCounterArray[row][col]->BeginRead();
-            if (err != SUCCESS) {
-                return err;
-            }
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        int err = this->pidCounterArray[pid]->BeginRead();
+        if (err != SUCCESS) {
+            return err;
         }
     }
 
     struct PmuEvtData* head = nullptr;
     int row = 0;
     auto cpuTopo = this->cpuList[row].get();
-    for (unsigned int col = 0; col < numPid; col++) {
+    for (unsigned int pid = 0; pid < numPid; pid++) {
         auto cnt = eventData.data.size();
-        int err = this->xyCounterArray[row][col]->Read(eventData);
+        int err = this->pidCounterArray[pid]->Read(eventData);
         if (err != SUCCESS) {
             return err;
         }
         if (eventData.data.size() - cnt) {
-            DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[col]->pid,
+            DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[pid]->pid,
                         cpuTopo->coreId, eventData.data.size() - cnt);
         }
         // Fill event name and cpu topology.
-        FillFields(cnt, eventData.data.size(), cpuTopo, pidList[col].get(), eventData.data);
+        FillFields(cnt, eventData.data.size(), cpuTopo, pidList[pid].get(), eventData.data);
     }
 
-    for (unsigned int row = 0; row < numCpu; row++) {
-        for (unsigned int col = 0; col < numPid; col++) {
-            int err = this->xyCounterArray[row][col]->EndRead();
-            if (err != SUCCESS) {
-                return err;
-            }
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        int err = this->pidCounterArray[pid]->EndRead();
+        if (err != SUCCESS) {
+            return err;
         }
     }
 
diff --git a/pmu/bpf/evt_list_bpf.h b/pmu/bpf/evt_list_bpf.h
index ae6caaf..2962892 100644
--- a/pmu/bpf/evt_list_bpf.h
+++ b/pmu/bpf/evt_list_bpf.h
@@ -30,6 +30,7 @@
 #include "spe_sampler.h"
 #include "evt_list.h"
 #include "evt_list_default.h"
+
 namespace KUNPENG_PMU {
 
 class EvtBpfList : public EvtList {
@@ -66,29 +67,23 @@ public:
         return pmuEvt->collectType;
     }
 
-    int GetUseBpf() const
-    {
-        return pmuEvt->useBpf;
-    }
-
 private:
     using PerfEvtPtr = std::shared_ptr<KUNPENG_PMU::PerfEvt>;
     std::unordered_map<pid_t, ProcPtr> procMap;
     std::vector<CpuPtr> cpuList;
     std::vector<ProcPtr> pidList;
-    std::vector<std::vector<std::shared_ptr<PerfEvt>>> xyCounterArray;
+    std::vector<std::shared_ptr<PerfEvt>> cpuCounterArray;
+    std::vector<std::shared_ptr<PerfEvt>> pidCounterArray;
     std::shared_ptr<PerfEvt> MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent);
     std::shared_ptr<PmuEvt> pmuEvt;
     int CollectorDoTask(PerfEvtPtr collector, int task);
-    int CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task);
+    int CollectorTaskArrayDoTask(std::vector<PerfEvtPtr>& taskArray, int task);
 
-    std::set<int> fdList;
     unsigned int numCpu = 0;
     unsigned int numPid = 0;
     int64_t ts = 0;
     int prevStat;
     int evtStat;
-    std::mutex mutex;
 };
 
 }   // namespace KUNPENG_PMU
diff --git a/pmu/bpf/perf_counter_bpf.cpp b/pmu/bpf/perf_counter_bpf.cpp
index aa46730..b03aac5 100644
--- a/pmu/bpf/perf_counter_bpf.cpp
+++ b/pmu/bpf/perf_counter_bpf.cpp
@@ -42,22 +42,19 @@ using namespace pcerr;
 #define MAX_ENTITES 1024
 
 static map<string, struct sched_counter_bpf *> counterMap;  // key: evt name, value: bpf obj
+static struct sched_cgroup_bpf *cgrpCounter = nullptr;      // one bpf obj in cgroup mode
 
 static map<string, int> bpfFdMap;          // key: evt name, value: bpf prog fd (tracepoint id). For tracepoint trigger
 static map<string, set<int>> evtCpuMap;    // key: evt name, value: core id (Init).
 static map<string, set<int>> evtPidMap;    // key: evt name, value: pid (Init).
 static map<string, set<string>> evtCgroupMap;    // key: evt name, value: cgroup id (Init).
-
+static map<string, int> evtIdxMap;         // key: evt name, value: sequential number
+static map<string, int> cgroups;           // key: cgroup name, value: sequential number
+static set<string> readCgroups;
 static set<string> triggerdEvt;            // triggered evt name
-
-static struct sched_cgroup_bpf *cgrpCounter = nullptr;
-static map<string, int> evtIdxMap;
 static int evtIdx = 0;
-static map<string, int> cgroups;
-static set<string> readCgroups;
 static int cgrpProgFd = 0;
 
-
 static inline int TriggeredRead(int prog_fd, int cpu)
 {
     //enforce the bpf trace function
@@ -83,28 +80,7 @@ int KUNPENG_PMU::PerfBpfCounter::EndRead()
     return SUCCESS;
 }
 
-static std::vector<pid_t> GetCgroupPids(const std::string& cgroup_path) {
-    std::vector<pid_t> pids;
-    std::ifstream file(cgroup_path + "/cgroup.procs");
-    
-    if (!file.is_open()) {
-        std::cout<<"cannot cgroup.procs: " << cgroup_path <<std::endl;
-    }
-
-    pid_t pid;
-    while (file >> pid) {
-        pids.push_back(pid);
-        file.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
-    }
-
-    if (!file.eof() && file.fail()) {
-        std::cout<<"read file cgroup.procs failed: " << cgroup_path <<std::endl;
-    }
-
-    return pids;
-}
-
-int KUNPENG_PMU::PerfBpfCounter::ReadBpf(std::vector<PmuData> &data)
+int KUNPENG_PMU::PerfBpfCounter::ReadBpfProcess(std::vector<PmuData> &data)
 {
     const unsigned cpuNums = MAX_CPU_NUM;
     auto obj = counterMap[this->evt->name];
@@ -124,27 +100,10 @@ int KUNPENG_PMU::PerfBpfCounter::ReadBpf(std::vector<PmuData> &data)
     // read the pmu count of this pid in each cpu core
     struct bpf_perf_event_value values[cpuNums];
 
-    int pid = this->pid;
-    int err;
-
-    //cgroup support
-    string cgroupPath = "/sys/fs/cgroup/";
-    if (!this->evt->cgroupName.empty()) {
-        string cgroupPath = GetCgroupPath(this->evt->cgroupName);
-        auto pids = GetCgroupPids(cgroupPath);
-        for (pid_t pid : pids) {
-            err = bpf_map__lookup_elem(obj->maps.accum_readings, &pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY);
-            if(err) {
-                printf("failed to loopup elem. err: %s pid %d\n", strerror(-err), pid);
-                return SUCCESS;
-            }
-        }
-    } else {
-        err = bpf_map__lookup_elem(obj->maps.accum_readings, &this->pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY);
-        if(err) {
-            printf("failed to loopup elem. err: %s pid %d\n", strerror(-err), this->pid);
-            return SUCCESS;
-        }
+    int err = bpf_map__lookup_elem(obj->maps.accum_readings, &this->pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY);
+    if(err) {
+        printf("failed to loopup elem. err: %s pid %d\n", strerror(-err), this->pid);
+        return SUCCESS;
     }
 
     // convert pmu count to PmuData
@@ -153,6 +112,7 @@ int KUNPENG_PMU::PerfBpfCounter::ReadBpf(std::vector<PmuData> &data)
     if(findProc != procMap.end()){
         processId = findProc->second->pid;
     }
+
     for(int i = 0; i < cpuNums; i++){
         data.emplace_back(PmuData{0});
         auto &current = data.back();
@@ -178,7 +138,7 @@ int KUNPENG_PMU::PerfBpfCounter::Read(EventData &eventData)
     if (!evt->cgroupName.empty()) {
         return ReadBpfCgroup(eventData.data);
     } else {
-        return ReadBpf(eventData.data);
+        return ReadBpfProcess(eventData.data);
     }
 }
 
@@ -229,10 +189,12 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va
     return vfprintf(stderr, format, args);
 }
 
-
-
 int KUNPENG_PMU::PerfBpfCounter::InitPidForEvent()
 {
+    if (this->pid == -1) {
+        return SUCCESS;
+    }
+
     if (evtPidMap[this->evt->name].find(this->pid) != evtPidMap[this->evt->name].end()) {
         return SUCCESS;
     }
@@ -269,7 +231,6 @@ int KUNPENG_PMU::PerfBpfCounter::InitPidForEvent()
 int KUNPENG_PMU::PerfBpfCounter::InitBpfObj()
 {
     int err;
-    int map_fd;
     struct sched_counter_bpf *obj;
     auto findObj = counterMap.find(evt->name);
     if(findObj == counterMap.end()){
@@ -335,6 +296,24 @@ int KUNPENG_PMU::PerfBpfCounter::InitBpfObj()
     return 0;
 }
 
+static int ReadCgroupId(const string &cgroupName)
+{
+	char path[PATH_MAX + 1];
+    char mnt[PATH_MAX + 1];
+	struct {
+		struct file_handle fh;
+		uint64_t cgroup_id;
+	} handle;
+	int mount_id;
+    string fullCgroupPath = GetCgroupPath(cgroupName);
+	handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+	if (name_to_handle_at(AT_FDCWD, fullCgroupPath.c_str(), &handle.fh, &mount_id, 0) < 0) {
+        return -1;
+    }
+
+	return handle.cgroup_id;
+}
+
 int KUNPENG_PMU::PerfBpfCounter::InitBpfCgroupObj()
 {
     int err;
@@ -425,9 +404,11 @@ int KUNPENG_PMU::PerfBpfCounter::Init(const bool groupEnable, const int groupFd,
     InitPidForEvent();
     auto findCpuMap = evtCpuMap.find(this->evt->name);
     auto findCgroupMap = evtCgroupMap.find(this->evt->name);
-    if(findCpuMap != evtCpuMap.end() && findCpuMap->second.find(this->cpu) != findCpuMap->second.end() && findCgroupMap != evtCgroupMap.end() && findCgroupMap->second.find(this->evt->cgroupName) != findCgroupMap->second.end()) {
+    if(findCpuMap != evtCpuMap.end() && findCpuMap->second.count(this->cpu) && findCgroupMap != evtCgroupMap.end()
+            && findCgroupMap->second.count(this->evt->cgroupName)) {
         return SUCCESS;
-    } else if(findCgroupMap == evtCgroupMap.end()) {
+    }
+    if(findCgroupMap == evtCgroupMap.end()) {
         evtCgroupMap[this->evt->name].insert(this->evt->cgroupName);
     }
     int err = this->MapPerfAttr(groupEnable, groupFd);
@@ -444,6 +425,9 @@ int KUNPENG_PMU::PerfBpfCounter::Init(const bool groupEnable, const int groupFd,
 
 int KUNPENG_PMU::PerfBpfCounter::MapPerfAttr(const bool groupEnable, const int groupFd)
 {
+    if (this->cpu == -1) {
+        return SUCCESS;
+    }
     struct perf_event_attr attr;
     memset(&attr, 0, sizeof(attr));
     attr.size = sizeof(struct perf_event_attr);
@@ -463,8 +447,7 @@ int KUNPENG_PMU::PerfBpfCounter::MapPerfAttr(const bool groupEnable, const int g
     attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
 
     this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0);
-    this->groupFd = groupFd;
-    printf("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
+    DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
         attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd);
     if (__glibc_unlikely(this->fd < 0)) {
         return MapErrno(errno);
@@ -474,49 +457,27 @@ int KUNPENG_PMU::PerfBpfCounter::MapPerfAttr(const bool groupEnable, const int g
 
 int KUNPENG_PMU::PerfBpfCounter::Enable()
 {
-    if (groupFd != -1) {
-        return SUCCESS;
-    }
     int err = PerfEvt::Enable();
     if (err != SUCCESS) {
         return err;
     }
-    this->accumCount.clear();
-    this->enabled = 0;
-    this->running = 0;
     return SUCCESS;
 }
 
 int KUNPENG_PMU::PerfBpfCounter::Disable()
 {
-    if (groupFd != -1) {
-        return SUCCESS;
-    }
     return PerfEvt::Disable();
 }
 
 int KUNPENG_PMU::PerfBpfCounter::Reset()
 {
-    if (groupFd != -1) {
-        return SUCCESS;
-    }
     return PerfEvt::Reset();
 }
 
-int KUNPENG_PMU::PerfBpfCounter::ReadCgroupId(const string &cgroupName)
+int KUNPENG_PMU::PerfBpfCounter::Close()
 {
-	char path[PATH_MAX + 1];
-    char mnt[PATH_MAX + 1];
-	struct {
-		struct file_handle fh;
-		uint64_t cgroup_id;
-	} handle;
-	int mount_id;
-    string fullCgroupPath = GetCgroupPath(cgroupName);
-	handle.fh.handle_bytes = sizeof(handle.cgroup_id);
-	if (name_to_handle_at(AT_FDCWD, fullCgroupPath.c_str(), &handle.fh, &mount_id, 0) < 0) {
-        return -1;
+    if (this->fd > 0) {
+        close(this->fd);
     }
-
-	return handle.cgroup_id;
+    return SUCCESS;
 }
\ No newline at end of file
diff --git a/pmu/bpf/perf_counter_bpf.h b/pmu/bpf/perf_counter_bpf.h
index ce1302c..74f1a34 100644
--- a/pmu/bpf/perf_counter_bpf.h
+++ b/pmu/bpf/perf_counter_bpf.h
@@ -37,34 +37,17 @@ namespace KUNPENG_PMU {
         int Enable() override;
         int Disable() override;
         int Reset() override;
+        int Close() override;
 
         int BeginRead();
         int EndRead();
+
     private:
-        enum class GroupStatus
-        {
-            NO_GROUP,
-            GROUP_LEADER,
-            GROUP_MEMBER
-        };
-        int CountValueToData(const __u64 value, const __u64 timeEnabled,
-                                const __u64 timeRunning, __u64 &accumCount, std::vector<PmuData> &data);
         int InitBpfObj();
         int InitBpfCgroupObj();
-        int ReadBpf(std::vector<PmuData> &data);
-        int ReadBpfCgroup(std::vector<PmuData> &data);
         int InitPidForEvent();
-
-        // Accumulated pmu count, time enabled and time running.
-        __u64 count = 0;
-	    __u64 enabled = 0;
-	    __u64 running = 0;
-        // For group events, <accumCount> is the accum counts of all members.
-        // For normal events, <accumCount> has only one element.
-        std::vector<__u64> accumCount;
-        int groupFd = 0;
-        GroupStatus groupStatus = GroupStatus::NO_GROUP;
-        int ReadCgroupId(const std::string &cgroupName);
+        int ReadBpfProcess(std::vector<PmuData> &data);
+        int ReadBpfCgroup(std::vector<PmuData> &data);
     };
 }  // namespace KUNPENG_PMU
 #endif
diff --git a/pmu/bpf/sched_cgroup.bpf.c b/pmu/bpf/sched_cgroup.bpf.c
index 5d9f705..d044db8 100644
--- a/pmu/bpf/sched_cgroup.bpf.c
+++ b/pmu/bpf/sched_cgroup.bpf.c
@@ -12,17 +12,14 @@
  * Create: 2025-08-10
  * Description: the bpf program for cgroup collecting in counting mode
  ******************************************************************************/
-#include "bpf/vmlinux.h"
-#include "bpf/bpf_core_read.h"
-#include "bpf/bpf_tracing.h"
-#include "bpf/bpf_helpers.h"
+#include <bpf/vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
 
 #define MAX_LEVELS  10  // max cgroup hierarchy level: arbitrary
 #define MAX_EVENTS  128  // max events per cgroup: arbitrary
 
-// NOTE: many of map and global data will be modified before loading
-//       from the userspace (perf tool) using the skeleton helpers.
-
 // single set of global perf events to measure
 // {evt0, cpu0}, {evt0, cpu1}, {evt0, cpu2}...{evt0, cpuM}, {evt1, cpu0}...{evtM, cpuM}
 struct {
@@ -155,8 +152,8 @@ static int bperf_cgroup_count(void)
 		for (c = 0; c < MAX_LEVELS; c++) {
 			if (c == cgrp_cnt)
 				break;
-
 			cgrp = cgrp_idx[c];
+
 			// aggregate the result by cgroup
 			key = cgrp * num_events + idx;
 			bpf_printk("c: %d cgrp: %d key: %d", c, cgrp, key);
diff --git a/pmu/bpf/sched_counter.bpf.c b/pmu/bpf/sched_counter.bpf.c
index 4e4062b..891d813 100644
--- a/pmu/bpf/sched_counter.bpf.c
+++ b/pmu/bpf/sched_counter.bpf.c
@@ -12,10 +12,10 @@
  * Create: 2025-08-10
  * Description: the bpf program for ordinary or multi-thread program collecting in counting mode
  ******************************************************************************/
-#include "bpf/vmlinux.h"
-#include "bpf/bpf_core_read.h"
-#include "bpf/bpf_tracing.h"
-#include "bpf/bpf_helpers.h"
+#include <bpf/vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
 
 char LICENSE[] SEC("license") = "GPL";
 
diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h
index b57a6ed..fe97363 100644
--- a/pmu/perf_counter.h
+++ b/pmu/perf_counter.h
@@ -34,6 +34,7 @@ namespace KUNPENG_PMU {
         virtual int Enable() = 0;
         virtual int Disable() = 0;
         virtual int Reset() = 0;
+        virtual int Close() = 0;
     };
 }  // namespace KUNPENG_PMU
 #endif
diff --git a/pmu/perf_counter_default.cpp b/pmu/perf_counter_default.cpp
index e30fef0..25f2bb6 100644
--- a/pmu/perf_counter_default.cpp
+++ b/pmu/perf_counter_default.cpp
@@ -338,7 +338,7 @@ int KUNPENG_PMU::PerfDefaultCounter::MapPerfAttr(const bool groupEnable, const i
         groupStatus = GroupStatus::NO_GROUP;
     }
     this->groupFd = groupFd;
-    printf("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
+    DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
         attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd);
     if (__glibc_unlikely(this->fd < 0)) {
         return MapErrno(errno);
-- 
Gitee


From e6ba455c9e2addb34c2b6466bf74295f1400ed55 Mon Sep 17 00:00:00 2001
From: wuying39 <921169248@qq.com>
Date: Tue, 12 Aug 2025 15:18:52 +0800
Subject: [PATCH 5/6] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E9=80=80=E5=87=BA?=
 =?UTF-8?q?=E5=88=86=E6=9E=90=E6=8E=A5=E5=8F=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pmu/evt.h      | 2 --
 pmu/evt_list.h | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pmu/evt.h b/pmu/evt.h
index ce9c952..a268d00 100644
--- a/pmu/evt.h
+++ b/pmu/evt.h
@@ -89,8 +89,6 @@ public:
         return "";
     }
 
-    std::string cgroupName;
-
 protected:
     int fd;
     int cpu;
diff --git a/pmu/evt_list.h b/pmu/evt_list.h
index 349ffa2..bc05bdf 100644
--- a/pmu/evt_list.h
+++ b/pmu/evt_list.h
@@ -37,6 +37,7 @@ public:
     virtual int Start() = 0;
     virtual int Stop() = 0;
     virtual int Read(EventData &eventData) = 0;
+    
 };
 
 }   // namespace KUNPENG_PMU
-- 
Gitee


From edaf1cfc519eb6b3ed2debc5ef0a68c75b506dae Mon Sep 17 00:00:00 2001
From: wuying39 <921169248@qq.com>
Date: Tue, 12 Aug 2025 21:00:33 +0800
Subject: [PATCH 6/6] revise fd check

---
 pmu/bpf/evt_list_bpf.h | 6 +++---
 pmu/evt_list.h         | 4 +++-
 pmu/evt_list_default.h | 6 +++---
 pmu/pmu_list.cpp       | 9 ++++++---
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/pmu/bpf/evt_list_bpf.h b/pmu/bpf/evt_list_bpf.h
index 2962892..b0d106f 100644
--- a/pmu/bpf/evt_list_bpf.h
+++ b/pmu/bpf/evt_list_bpf.h
@@ -51,9 +51,9 @@ public:
     int Start() override;
     int Stop() override;
     int Read(EventData &eventData) override;
-    int Enable();
-    int Reset();
-    int Close();
+    int Enable() override;
+    int Reset() override;
+    int Close() override;
     void FillFields(const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo,
                     std::vector<PmuData>& pmuData);
 
diff --git a/pmu/evt_list.h b/pmu/evt_list.h
index bc05bdf..84b4c4f 100644
--- a/pmu/evt_list.h
+++ b/pmu/evt_list.h
@@ -37,7 +37,9 @@ public:
     virtual int Start() = 0;
     virtual int Stop() = 0;
     virtual int Read(EventData &eventData) = 0;
-    
+    virtual int Enable() = 0;
+    virtual int Reset() = 0;
+    virtual int Close() = 0;
 };
 
 }   // namespace KUNPENG_PMU
diff --git a/pmu/evt_list_default.h b/pmu/evt_list_default.h
index 446af55..f1fc946 100644
--- a/pmu/evt_list_default.h
+++ b/pmu/evt_list_default.h
@@ -65,11 +65,11 @@ public:
     }
     int Init(const bool groupEnable, const std::shared_ptr<EvtDefaultList> evtLeader);
     int Pause();
-    int Close();
+    int Close() override;
     int Start() override;
-    int Enable();
+    int Enable() override;
     int Stop() override;
-    int Reset();
+    int Reset() override;
     int Read(EventData &eventData) override;
 
     void SetGroupInfo(const EventGroupInfo &grpInfo);
diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp
index 4ab72bb..5c30c3f 100644
--- a/pmu/pmu_list.cpp
+++ b/pmu/pmu_list.cpp
@@ -113,9 +113,12 @@ namespace KUNPENG_PMU {
             return symbolErrNo;
         }
 
-        auto err = CheckRlimit(fdNum);
-        if (err != SUCCESS) {
-            return err;
+        int err;
+        if (!taskParam->pmuEvt->useBpf) {  // in bpf mode, cpuSize * proSize whill exceed rlimit
+            err = CheckRlimit(fdNum);
+            if (err != SUCCESS) {
+                return err;
+            }
         }
 
         err = Init(pd);
-- 
Gitee