From 35fc6db5aa31aafe933a4ad1d3f580bca5ff2d63 Mon Sep 17 00:00:00 2001
From: wuying39 <921169248@qq.com>
Date: Mon, 18 Aug 2025 10:52:50 +0800
Subject: [PATCH] add bpf mode files && revise compile

---
 build/common.sh                               |   6 +-
 include/pcerrc.h                              |   2 +
 include/pmu.h                                 |   2 +
 pmu/CMakeLists.txt                            |  11 +-
 pmu/bpf/evt_list_bpf.cpp                      | 166 ++++++
 pmu/bpf/evt_list_bpf.h                        |  61 +++
 pmu/bpf/perf_counter_bpf.cpp                  | 502 ++++++++++++++++++
 pmu/bpf/perf_counter_bpf.h                    |  59 ++
 pmu/bpf/sched_counter.bpf.c                   |   8 +-
 pmu/evt_list.h                                |  62 ++-
 pmu/{evt_list.cpp => evt_list_default.cpp}    |  70 +--
 pmu/evt_list_default.h                        |  60 +++
 pmu/perf_counter.h                            |  53 +-
 ...f_counter.cpp => perf_counter_default.cpp} |  30 +-
 pmu/perf_counter_default.h                    |  76 +++
 pmu/pmu.cpp                                   |  48 +-
 pmu/pmu_event.h                               |   2 +
 pmu/pmu_list.cpp                              |  31 +-
 test/test_perf/CMakeLists.txt                 |   7 +-
 util/pcerr.cpp                                |   4 +-
 20 files changed, 1105 insertions(+), 155 deletions(-)
 create mode 100644 pmu/bpf/evt_list_bpf.cpp
 create mode 100644 pmu/bpf/evt_list_bpf.h
 create mode 100644 pmu/bpf/perf_counter_bpf.cpp
 create mode 100644 pmu/bpf/perf_counter_bpf.h
 rename pmu/{evt_list.cpp => evt_list_default.cpp} (84%)
 create mode 100644 pmu/evt_list_default.h
 rename pmu/{perf_counter.cpp => perf_counter_default.cpp} (93%)
 create mode 100644 pmu/perf_counter_default.h

diff --git a/build/common.sh b/build/common.sh
index 0ce407c..c2330e1 100644
--- a/build/common.sh
+++ b/build/common.sh
@@ -102,8 +102,8 @@ function build_skel_files() {
   
   local bpf_file_dir=$1
   local bpf_lib_dir=$2
-  bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}local/bpf/vmlinux.h"
-  if [ -s "${bpf_lib_dir}local/bpf/vmlinux.h" ]; then
+  bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}local/bpf/usr/include/bpf/vmlinux.h"
+  if [ -s "${bpf_lib_dir}local/bpf/usr/include/bpf/vmlinux.h" ]; then
       echo "The kernel header file generated."
   else
       echo "Generate vmlinux.h file failed."
@@ -116,7 +116,7 @@ function build_skel_files() {
     skel_path="${bpf_file_dir}/${src_name}.skel.h"
 
     echo "compile: $src_name"
-    clang -I${bpf_lib_dir}local -g -O2 -target bpf -c "$bpf_src" -o "$obj_path"
+    clang -I${bpf_lib_dir}local/bpf/usr/include -g -O2 -target bpf -c "$bpf_src" -o "$obj_path"
     [ -s "$obj_path" ] || { echo "Error: The obj file was not generated."; exit 1; }
     bpftool gen skeleton "$obj_path" > "$skel_path"
     [ -s "$skel_path" ] || { echo "Error: The skeleton file was not generated."; exit 1; }
diff --git a/include/pcerrc.h b/include/pcerrc.h
index 9d5f4f6..b2a1fdf 100644
--- a/include/pcerrc.h
+++ b/include/pcerrc.h
@@ -123,6 +123,8 @@ extern "C" {
 #define LIBPERF_ERR_ALLOCATE_REGISTER_FAILED 1076
 #define LIBPERF_ERR_CHECK_USER_ACCESS 1077
 #define LIBPERF_ERR_COUNTER_INDEX_IS_ZERO 1078
+#define LIBPERF_ERR_BPF_ACT_FAILED 1079
+#define LIBPERF_ERR_INVALID_BPF_PARAM 1080
 
 #define UNKNOWN_ERROR 9999
 
diff --git a/include/pmu.h b/include/pmu.h
index 43f1cc1..0361455 100644
--- a/include/pmu.h
+++ b/include/pmu.h
@@ -176,6 +176,8 @@ struct PmuAttr {
 
     // enable user access counting for current process
     unsigned enableUserAccess : 1;
+    // enable bpf mode for counting 
+    unsigned enableBpf : 1;
 };
 
 enum PmuTraceType {
diff --git a/pmu/CMakeLists.txt b/pmu/CMakeLists.txt
index 4d7f7cf..d33bf40 100644
--- a/pmu/CMakeLists.txt
+++ b/pmu/CMakeLists.txt
@@ -36,16 +36,21 @@ if (BPF)
     message(STATUS "BPF is true. building with bpf submodule")
     add_compile_definitions(BPF_ENABLED)
     include_directories(${PMU_FILE_DIR}/bpf)
-    include_directories(${PROJECT_TOP_DIR}/third_party/libbpf)
-    link_directories(${PROJECT_TOP_DIR}/third_party/local/bpf)
+    include_directories(${PROJECT_TOP_DIR}/third_party/local/bpf/usr/include)
+    link_directories(${PROJECT_TOP_DIR}/third_party/local/bpf/usr/lib64)
     set(BPF_SOURCES ${BPF_SRC})
 else ()
     set(BPF_SOURCES "")
 endif()
 ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC} ${BPF_SOURCES})
 ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC} ${BPF_SOURCES})
+
 set_target_properties(kperf_static PROPERTIES OUTPUT_NAME "kperf")
-target_link_libraries(kperf sym)
+if (BPF)
+    target_link_libraries(kperf sym bpf)
+else ()
+    target_link_libraries(kperf sym)
+endif()
 target_compile_options(kperf PRIVATE -fPIC)
 install(TARGETS kperf  DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
 install(TARGETS kperf_static  DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
diff --git a/pmu/bpf/evt_list_bpf.cpp b/pmu/bpf/evt_list_bpf.cpp
new file mode 100644
index 0000000..607ebf7
--- /dev/null
+++ b/pmu/bpf/evt_list_bpf.cpp
@@ -0,0 +1,166 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-10
+ * Description: implementations for managing and interacting with performance events of EvtListBpf in the KUNPENG_PMU namespace
+ ******************************************************************************/
+#include <cstdio>
+#include <unordered_set>
+#include <fstream>
+#include "cpu_map.h"
+#include "pmu_event.h"
+#include "pcerrc.h"
+#include "pcerr.h"
+#include "log.h"
+#include "common.h"
+#include "evt_list_bpf.h"
+
+using namespace std;
+
+int KUNPENG_PMU::EvtListBpf::Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader)
+{
+    // Init process map.
+    for (auto& proc: pidList) {
+        if (proc->tid > 0) {
+            procMap[proc->tid] = proc;
+        }
+    }
+
+    for (unsigned int cpu = 0; cpu < numCpu; cpu++) {
+        PerfEvtPtr perfEvt =
+                std::make_shared<KUNPENG_PMU::PerfCounterBpf>(this->cpuList[cpu]->coreId, -1, this->pmuEvt.get(), procMap);
+        if (perfEvt == nullptr) {
+            continue;
+        }
+
+        int err = 0;
+        err = perfEvt->Init(groupEnable, -1, -1);
+        if (err != SUCCESS) {
+            return err;
+        }
+        this->cpuCounterArray.emplace_back(perfEvt);
+    }
+
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        PerfEvtPtr perfEvt =
+                std::make_shared<KUNPENG_PMU::PerfCounterBpf>(-1, this->pidList[pid]->tid, this->pmuEvt.get(), procMap);
+        if (perfEvt == nullptr) {
+            continue;
+        }
+
+        perfEvt->Init(groupEnable, -1, -1);  // init pid, ignore the result of perf_event_open
+        this->pidCounterArray.emplace_back(perfEvt);
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtListBpf::CollectorTaskArrayDoTask(std::vector<PerfEvtPtr>& taskArray, int task)
+{
+    std::unique_lock<std::mutex> lock(mutex);
+    for (auto evt: taskArray) {
+        auto err = CollectorDoTask(evt, task);
+        if (err != SUCCESS) {
+            return err;
+        }
+    }
+    this->prevStat = this->evtStat;
+    this->evtStat = task;
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtListBpf::Start()
+{
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, START);
+}
+
+int KUNPENG_PMU::EvtListBpf::Enable()
+{
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, ENABLE);
+}
+
+int KUNPENG_PMU::EvtListBpf::Stop()
+{
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, STOP);
+}
+
+int KUNPENG_PMU::EvtListBpf::Reset()
+{
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, RESET);
+}
+
+int KUNPENG_PMU::EvtListBpf::Pause()
+{
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, PAUSE);
+}
+
+int KUNPENG_PMU::EvtListBpf::Close()
+{
+    auto ret = CollectorTaskArrayDoTask(this->cpuCounterArray, CLOSE);
+    if (ret != SUCCESS) {
+        return ret;
+    }
+
+    procMap.clear();
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtListBpf::Read(EventData &eventData)
+{
+    std::unique_lock<std::mutex> lg(mutex);
+
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        int err = this->pidCounterArray[pid]->BeginRead();
+        if (err != SUCCESS) {
+            return err;
+        }
+    }
+
+    struct PmuEvtData* head = nullptr;
+    int row = 0;
+    auto cpuTopo = this->cpuList[row].get();
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        auto cnt = eventData.data.size();
+        int err = this->pidCounterArray[pid]->Read(eventData);
+        if (err != SUCCESS) {
+            return err;
+        }
+        if (eventData.data.size() - cnt) {
+            DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[pid]->pid,
+                cpuTopo->coreId, eventData.data.size() - cnt);
+        }
+        // Fill event name and cpu topology.
+        FillFields(cnt, eventData.data.size(), cpuTopo, pidList[pid].get(), eventData.data);
+    }
+
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        int err = this->pidCounterArray[pid]->EndRead();
+        if (err != SUCCESS) {
+            return err;
+        }
+    }
+
+    return SUCCESS;
+}
+
+void KUNPENG_PMU::EvtListBpf::FillFields(
+        size_t start, size_t end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector<PmuData>& data)
+{
+    for (auto i = start; i < end; ++i) {
+        data[i].cpuTopo = cpuTopo;
+        data[i].evt = this->pmuEvt->name.c_str();
+        if (data[i].comm == nullptr) {
+            data[i].comm = procTopo->comm;
+        }
+        if (data[i].ts == 0) {
+            data[i].ts = this->ts;
+        }
+    }
+}
\ No newline at end of file
diff --git a/pmu/bpf/evt_list_bpf.h b/pmu/bpf/evt_list_bpf.h
new file mode 100644
index 0000000..00afaad
--- /dev/null
+++ b/pmu/bpf/evt_list_bpf.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-10
+ * Description: declaration of class EvtListBpf with functions for managing and interacting with a list
+ * of performance events in the KUNPENG_PMU namespace
+ ******************************************************************************/
+#ifndef PMU_EVTLISTBPF_H
+#define PMU_EVTLISTBPF_H
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+#include <set>
+#include <linux/types.h>
+#include <mutex>
+#include "cpu_map.h"
+#include "perf_counter_bpf.h"
+#include "perf_counter_default.h"
+#include "pmu.h"
+#include "process_map.h"
+#include "sampler.h"
+#include "spe_sampler.h"
+#include "evt_list.h"
+
+namespace KUNPENG_PMU {
+
+class EvtListBpf : public EvtList {
+public:
+    EvtListBpf(const SymbolMode &symbolMode, std::vector<CpuPtr> &cpuList, std::vector<ProcPtr> &pidList,
+            std::shared_ptr<PmuEvt> pmuEvt, const int groupId)
+        : EvtList(symbolMode, cpuList, pidList, pmuEvt, groupId){}
+
+    int Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader);
+    int Pause();
+    int Close() override;
+    int Start() override;
+    int Enable() override;
+    int Stop() override;
+    int Reset() override;
+    int Read(EventData &eventData) override;
+
+    void SetGroupInfo(const EventGroupInfo &grpInfo) override {};
+    void AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader) override {};
+
+private:
+    std::vector<std::shared_ptr<PerfEvt>> cpuCounterArray;
+    std::vector<std::shared_ptr<PerfEvt>> pidCounterArray;
+    int CollectorTaskArrayDoTask(std::vector<PerfEvtPtr>& taskArray, int task);
+    void FillFields(size_t start, size_t end, CpuTopology* cpuTopo, ProcTopology* procTopo, std::vector<PmuData>& pmuData);
+};
+
+}   // namespace KUNPENG_PMU
+#endif
diff --git a/pmu/bpf/perf_counter_bpf.cpp b/pmu/bpf/perf_counter_bpf.cpp
new file mode 100644
index 0000000..bfe2026
--- /dev/null
+++ b/pmu/bpf/perf_counter_bpf.cpp
@@ -0,0 +1,502 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-10
+ * Description: implementations for reading performance counters and initializing counting logic
+ * of PerfCounterBpf in the KUNPENG_PMU namespace.
+ ******************************************************************************/
+#include <climits>
+#include <poll.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <cstring>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <linux/perf_event.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "pmu.h"
+#include "linked_list.h"
+#include "pfm_event.h"
+#include "pmu_event.h"
+#include "pcerr.h"
+#include "log.h"
+#include "sched_counter.skel.h"
+#include "sched_cgroup.skel.h"
+#include "perf_counter_bpf.h"
+
+using namespace std;
+using namespace pcerr;
+
+#define MAX_ENTITES 102400
+
+static map<string, struct sched_counter_bpf *> counterMap;  // key: evt name, value: bpf obj
+static struct sched_cgroup_bpf *cgrpCounter = nullptr;      // one bpf obj in cgroup mode
+static std::unordered_map<std::string, BpfEvent> evtDataMap;
+static set<int> evtKeys;                                    // updated fds of cgroup
+static set<string> readCgroups;
+static set<string> triggerdEvt;
+static int evtIdx = 0;
+static int cgrpProgFd = 0;
+
+static inline int TriggeredRead(int prog_fd, int cpu)
+{
+    // enforce the bpf trace function
+    DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+                .ctx_in = NULL,                     // no input context
+                .ctx_size_in = 0,
+                .retval = 0,                        // return code of the BPF program
+                .flags = BPF_F_TEST_RUN_ON_CPU,
+                .cpu = cpu,
+    );
+    return bpf_prog_test_run_opts(prog_fd, &opts);
+}
+
+int KUNPENG_PMU::PerfCounterBpf::BeginRead()
+{
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounterBpf::EndRead()
+{
+    triggerdEvt.clear();
+    readCgroups.clear();
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounterBpf::ReadBpfProcess(std::vector<PmuData> &data)
+{
+    const unsigned cpuNums = MAX_CPU_NUM;
+    auto obj = counterMap[this->evt->name];
+
+    // must execute sched_switch when each read operation.
+    // the pid may not have been scheduled for a long time and the pmu count will not be recoreded.
+    if (triggerdEvt.find(this->evt->name) == triggerdEvt.end()) {
+        for (int i = 0; i < cpuNums; i++) {
+            int triggerErr = TriggeredRead(evtDataMap[this->evt->name].bpfFd, i);
+            if (triggerErr) {
+                DBG_PRINT("trigger error: %s\n", strerror(-triggerErr));
+            }
+        }
+        triggerdEvt.insert(this->evt->name);
+    }
+    
+    // read the pmu count of this pid in each cpu core
+    struct bpf_perf_event_value values[cpuNums];
+
+    int err = bpf_map__lookup_elem(
+        obj->maps.accum_readings, &this->pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY);
+    if (err) {
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to lookup counter map accum_readings. Error: "
+                + string(strerror(-err)) + " pid " + to_string(this->pid));
+        return LIBPERF_ERR_BPF_ACT_FAILED;
+    }
+
+    // convert pmu count to PmuData
+    int processId = 0;
+    auto findProc = procMap.find(this->pid);
+    if (findProc != procMap.end()) {
+        processId = findProc->second->pid;
+    }
+
+    for (int i = 0; i < cpuNums; i++) {
+        data.emplace_back(PmuData{0});
+        auto &current = data.back();
+        current.count = values[i].counter;
+        current.countPercent = values[i].running / values[i].enabled;
+        current.cpu = i;
+        current.tid = this->pid;
+        current.pid = processId;
+    }
+
+    // reset pmu count in bpf to ensure that the value read from pmu is delta (after last read/open)
+    memset(values, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value));
+    err = bpf_map__update_elem(
+        obj->maps.accum_readings, &pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY);
+    if (err) {
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map accum_readings. Error: "
+                + string(strerror(-err)) + " pid " + to_string(this->pid));
+        return LIBPERF_ERR_BPF_ACT_FAILED;
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounterBpf::ReadBpfCgroup(std::vector<PmuData> &data)
+{
+    auto cgrpName = this->evt->cgroupName;
+    if (readCgroups.find(cgrpName) != readCgroups.end()) {
+        return SUCCESS;
+    }
+    readCgroups.insert(cgrpName);
+
+    for (int i=0;i<MAX_CPU_NUM;++i) {
+        int triggerErr = TriggeredRead(cgrpProgFd, i);
+        if (triggerErr) {
+            DBG_PRINT("trigger error: %s\n", strerror(-triggerErr));
+        }
+    }
+
+    const unsigned cpuNums = MAX_CPU_NUM;
+    struct bpf_perf_event_value values[cpuNums];
+    int readKey = cgroupIdxMap[cgrpName] * evtDataMap.size() + evtDataMap[this->evt->name].eventId;
+    int err = bpf_map__lookup_elem(cgrpCounter->maps.cgrp_readings, &readKey, sizeof(__u32), values, sizeof(values), BPF_ANY);
+    if (err) {
+        string msg =
+            "failed to lookup cgroup map cgrp_readings. Error: " + string(strerror(-err)) + " pid " + to_string(this->pid);
+        New(LIBPERF_ERR_BPF_ACT_FAILED, msg);
+        return SUCCESS;
+    }
+
+    for (int i = 0; i < cpuNums; i++) {
+        data.emplace_back(PmuData{0});
+        auto &current = data.back();
+        current.count = values[i].counter;
+        current.countPercent = values[i].running / values[i].enabled;
+        current.cpu = i;
+        current.tid = this->pid;
+        current.cgroupName = this->evt->cgroupName.c_str();
+    }
+
+    memset(values, 0, cpuNums * sizeof(bpf_perf_event_value));
+    err = bpf_map__update_elem(cgrpCounter->maps.cgrp_readings, &readKey, sizeof(__u32), values, sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_ANY);
+    if (err) {
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update cgroup map cgrp_readings. Error: "
+                + string(strerror(-err)) + " pid " + to_string(this->pid));
+        return LIBPERF_ERR_BPF_ACT_FAILED;
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounterBpf::Read(EventData &eventData)
+{
+    if (!evt->cgroupName.empty()) {
+        return ReadBpfCgroup(eventData.data);
+    } else {
+        return ReadBpfProcess(eventData.data);
+    }
+}
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+    return vfprintf(stderr, format, args);
+}
+
+int KUNPENG_PMU::PerfCounterBpf::InitPidForEvent()
+{
+    if (this->pid == -1) {
+        return SUCCESS;
+    }
+
+    if (evtDataMap[this->evt->name].pids.find(this->pid) != evtDataMap[this->evt->name].pids.end()) {
+        return SUCCESS;
+    }
+
+    auto findObj = counterMap.find(this->evt->name);
+    if (findObj == counterMap.end()) {
+        return -1;
+    }
+
+    // initialize the cumulative pmu count for this pid
+    struct bpf_perf_event_value evtVal[MAX_CPU_NUM];
+
+    memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value));
+    int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal,
+                sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST);
+    if (err) {
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map accum_readings. Error: " + err);
+        return LIBPERF_ERR_BPF_ACT_FAILED;
+    }
+
+    // initialize the filter, build the map relationship of pid and accum_key
+    err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST);
+    if (err) {
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map filter. Error: " + err);
+        return LIBPERF_ERR_BPF_ACT_FAILED;
+    }
+    DBG_PRINT("InitPidForEvent: %d\n", pid);
+    evtDataMap[this->evt->name].pids.insert(this->pid);
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounterBpf::InitBpfObj()
+{
+    int err;
+    struct sched_counter_bpf *obj;
+    auto findObj = counterMap.find(evt->name);
+    if (findObj == counterMap.end()) {
+        // initialize the bpf obj
+        obj = sched_counter_bpf__open();
+        if (!obj) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to open counter bpf obj");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+        err = bpf_map__set_max_entries(obj->maps.events, MAX_CPU_NUM);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: events");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+        err = bpf_map__set_max_entries(obj->maps.prev_readings, 1);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: prev_readings");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+        err = bpf_map__set_max_entries(obj->maps.accum_readings, 1024);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: accum_readings");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+        err = bpf_map__set_max_entries(obj->maps.filter, MAX_ENTITES);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: filter");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+
+        err = sched_counter_bpf__load(obj);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to load counter bpf obj");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+
+        err = sched_counter_bpf__attach(obj);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to attach counter bpf obj");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+
+        counterMap[this->evt->name] = obj;
+        err = InitPidForEvent();
+        if (err == LIBPERF_ERR_BPF_ACT_FAILED) {
+            return err;
+        }
+        // get the fd of bpf prog, trigger trace function(sched_switch) of bpf in read
+        int progFd = bpf_program__fd(obj->progs.on_switch);
+
+        evtDataMap[this->evt->name].bpfFd = progFd;
+        DBG_PRINT("create bpf obj for evt %s prog fd %d\n", evt->name.c_str(), progFd);
+    } else {
+        obj = counterMap[this->evt->name];
+    }
+
+    // initialize the pmu count, put fd of pmu into value
+    err = bpf_map__update_elem(obj->maps.events, &this->cpu, sizeof(__u32), &this->fd, sizeof(int), BPF_ANY);
+    if (err) {
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map events. Error: "
+                 + string(strerror(-err)) + "cpu " + to_string(cpu) + "fd " + to_string(fd));
+        return LIBPERF_ERR_BPF_ACT_FAILED;
+    }
+
+    evtDataMap[this->evt->name].cpus.insert(this->cpu);
+    return SUCCESS;
+}
+
+static uint64_t ReadCgroupId(const string &cgroupName)
+{
+    char path[PATH_MAX + 1];
+    char mnt[PATH_MAX + 1];
+    struct {
+        struct file_handle fh;
+        uint64_t cgroup_id;
+    } handle;
+    int mount_id;
+    std::string fullCgroupPath = "/sys/fs/cgroup/";
+    int cgroupIsV2 = CheckCgroupV2();
+    if (cgroupIsV2) {
+        fullCgroupPath += cgroupName;
+    } else if (cgroupIsV2 == 0) {
+        fullCgroupPath += "perf_event/" + cgroupName;
+    }
+    handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+    if (name_to_handle_at(AT_FDCWD, fullCgroupPath.c_str(), &handle.fh, &mount_id, 0) < 0) {
+        return -1;
+    }
+
+    return handle.cgroup_id;
+}
+
+int KUNPENG_PMU::PerfCounterBpf::InitBpfCgroupObj()
+{
+    int err;
+    struct sched_cgroup_bpf *obj;
+    if (cgrpCounter == nullptr) {
+        obj = sched_cgroup_bpf__open();
+        if(!obj){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to open cgroup bpf obj");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+
+        obj->rodata->num_cpus = MAX_CPU_NUM;
+        obj->rodata->num_events = this->evt->numEvent;
+
+        err = bpf_map__set_max_entries(obj->maps.events, MAX_ENTITES);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: events");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+
+        err = bpf_map__set_max_entries(obj->maps.prev_readings, MAX_ENTITES);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: prev_readings");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+
+        err = bpf_map__set_max_entries(obj->maps.cgrp_idx, MAX_ENTITES * 100);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: cgrp_idx");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+
+        err = bpf_map__set_max_entries(obj->maps.cgrp_readings, MAX_ENTITES);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: cgrp_readings");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+
+        err = sched_cgroup_bpf__load(obj);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to load cgroup bpf obj");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+
+        err = sched_cgroup_bpf__attach(obj);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to attach cgroup bpf obj");
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+
+        cgrpProgFd = bpf_program__fd(obj->progs.trigger_read);
+        cgrpCounter = obj;
+        DBG_PRINT("create bpf obj for cgroup evt %s \n", evt->name.c_str());
+    }
+
+    auto findEvtIdx = evtDataMap.find(this->evt->name);
+    if (findEvtIdx == evtDataMap.end()) {
+        evtDataMap[this->evt->name].eventId = evtIdx;
+        evtIdx++;
+    }
+    int evtKey = evtDataMap[this->evt->name].eventId * MAX_CPU_NUM + cpu;
+    if (evtKeys.find(evtKey) == evtKeys.end()) {
+        err = bpf_map__update_elem(cgrpCounter->maps.events, &evtKey, sizeof(__u32),
+                                    &this->fd, sizeof(int), BPF_ANY);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update cgroup map events. Error: "
+                 + string(strerror(-err)) + "cpu " + to_string(cpu) + "fd " + to_string(fd));
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+        evtKeys.insert(evtKey);
+    }
+
+    string cgrpName = this->evt->cgroupName;
+    auto findCgrp = cgroupIdxMap.find(cgrpName);
+    if (findCgrp == cgroupIdxMap.end()) {
+        uint64_t cgrpId = ReadCgroupId(cgrpName);
+        if (cgrpId == UINT64_MAX) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to get cgroup id of: " + cgrpName);
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+        int cgrpIdx = cgroupIdxMap.size();
+        err = bpf_map__update_elem(cgrpCounter->maps.cgrp_idx, &cgrpId, sizeof(__u64), &cgrpIdx, sizeof(__u32), BPF_ANY);
+        if (err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update cgroup id: " + cgrpId);
+            return LIBPERF_ERR_BPF_ACT_FAILED;
+        }
+        DBG_PRINT("init cgroup bpf map: %s id: %d\n", cgrpName.c_str(), cgrpId);
+        cgroupIdxMap[cgrpName] = cgrpIdx;
+    }
+
+    evtDataMap[this->evt->name].cpus.insert(this->cpu);
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounterBpf::Init(const bool groupEnable, const int groupFd, const int resetOutputFd)
+{
+    int err = InitPidForEvent();
+    if (err == LIBPERF_ERR_BPF_ACT_FAILED) {
+        return err;
+    }
+    auto findCpuMap = evtDataMap.find(this->evt->name);
+    auto findCgroup = cgroupIdxMap.find(this->evt->cgroupName);
+    if (findCpuMap != evtDataMap.end() && findCpuMap->second.cpus.count(this->cpu) && findCgroup != cgroupIdxMap.end()) {
+        return SUCCESS;
+    }
+    
+    if (findCpuMap == evtDataMap.end() || !findCpuMap->second.cpus.count(this->cpu)) {
+        err = this->MapPerfAttr(groupEnable, groupFd);
+        if (err != SUCCESS) {
+            return err;
+        }
+    }
+
+    if (this->evt->cgroupName.empty()) {
+        err = InitBpfObj();
+    } else {
+        err = InitBpfCgroupObj();
+    }
+    return err;
+}
+
+int KUNPENG_PMU::PerfCounterBpf::MapPerfAttr(const bool groupEnable, const int groupFd)
+{
+    struct perf_event_attr attr;
+    memset(&attr, 0, sizeof(attr));
+    attr.size = sizeof(struct perf_event_attr);
+    attr.type = this->evt->type;
+    attr.config = this->evt->config;
+    attr.config1 = this->evt->config1;
+    attr.config2 = this->evt->config2;
+    attr.disabled = 1;
+
+    // support cgroup feature
+    unsigned flags = 0;
+    if (this->GetCgroupFd() != -1) {
+        flags = PERF_FLAG_PID_CGROUP | PERF_FLAG_FD_CLOEXEC;
+        this->pid = this->GetCgroupFd();
+    }
+
+    attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
+
+    this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0);
+    DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
+        attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd);
+    if (__glibc_unlikely(this->fd < 0)) {
+        return MapErrno(errno);
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounterBpf::Enable()
+{
+    int err = PerfEvt::Enable();
+    if (err != SUCCESS) {
+        return err;
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfCounterBpf::Disable()
+{
+    return PerfEvt::Disable();
+}
+
+int KUNPENG_PMU::PerfCounterBpf::Reset()
+{
+    return PerfEvt::Reset();
+}
+
+int KUNPENG_PMU::PerfCounterBpf::Close()
+{
+    if (this->fd > 0) {
+        close(this->fd);
+    }
+    return SUCCESS;
+}
\ No newline at end of file
diff --git a/pmu/bpf/perf_counter_bpf.h b/pmu/bpf/perf_counter_bpf.h
new file mode 100644
index 0000000..1cf1c23
--- /dev/null
+++ b/pmu/bpf/perf_counter_bpf.h
@@ -0,0 +1,59 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-10
+ * Description: declaration of class PerfCounterBpf that inherits from PerfCounter.
+ ******************************************************************************/
+#ifndef PMU_COUNTER_BPF_H
+#define PMU_COUNTER_BPF_H
+
+#include <memory>
+#include <stdexcept>
+#include <linux/types.h>
+#include "evt.h"
+#include "pmu_event.h"
+#include "perf_counter.h"
+
+#define AT_FDCWD -100
+
+struct BpfEvent {
+    int bpfFd = -1;
+    int eventId = -1;
+    std::set<int> cpus;
+    std::set<int> pids;
+};
+
+namespace KUNPENG_PMU {
+    class PerfCounterBpf : public PerfCounter {
+    public:
+        using PerfCounter::PerfCounter;
+        ~PerfCounterBpf()
+        {}
+        int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) override;
+        int Read(EventData &eventData) override;
+        int MapPerfAttr(const bool groupEnable, const int groupFd) override;
+        int Enable() override;
+        int Disable() override;
+        int Reset() override;
+        int Close() override;
+
+        int BeginRead();
+        int EndRead();
+    private:
+        int InitBpfObj();
+        int InitBpfCgroupObj();
+        int InitPidForEvent();
+        int ReadBpfProcess(std::vector<PmuData> &data);
+        int ReadBpfCgroup(std::vector<PmuData> &data);
+        std::map<std::string, int> cgroupIdxMap; // key: cgroup name, value: sequential number
+    };
+}  // namespace KUNPENG_PMU
+#endif
diff --git a/pmu/bpf/sched_counter.bpf.c b/pmu/bpf/sched_counter.bpf.c
index 26e9ed1..f213676 100644
--- a/pmu/bpf/sched_counter.bpf.c
+++ b/pmu/bpf/sched_counter.bpf.c
@@ -56,7 +56,8 @@ struct {
 } filter SEC(".maps");
 
 SEC("raw_tp/sched_switch")
-int BPF_PROG(on_switch) {
+int BPF_PROG(on_switch)
+{
     __u32 pid;
     __u32 zero=0;
     __u32 *accum_key;
@@ -99,7 +100,8 @@ int BPF_PROG(on_switch) {
 }
 
 SEC("tp_btf/task_newtask")
-int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags){
+int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags)
+{
     long err;
     __u32 new_pid;
     __u32 parent_pid;
@@ -115,6 +117,6 @@ int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags){
     }
 
     bpf_map_update_elem(&filter, &new_pid, accum_key, BPF_NOEXIST);
-    bpf_printk("new pid: %d parent: %d add child: %ld accum_key: %ld\n", new_pid, parent_pid, new_pid, *accum_key);
+    bpf_printk("new pid: %d parent: %d accum_key: %ld\n", new_pid, parent_pid, *accum_key);
     return 0;
 }
\ No newline at end of file
diff --git a/pmu/evt_list.h b/pmu/evt_list.h
index 50cc522..4fb9de3 100644
--- a/pmu/evt_list.h
+++ b/pmu/evt_list.h
@@ -22,7 +22,7 @@
 #include <linux/types.h>
 #include <mutex>
 #include "cpu_map.h"
-#include "perf_counter.h"
+#include "perf_counter_default.h"
 #include "pmu.h"
 #include "process_map.h"
 #include "sampler.h"
@@ -63,16 +63,15 @@ public:
         this->prevStat = OPEN;
         this->evtStat = OPEN;
     }
-    int Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader);
-    int Pause();
-    int Close();
-    int Start();
-    int Enable();
-    int Stop();
-    int Reset();
-    int Read(EventData &eventData);
-
-    void SetGroupInfo(const EventGroupInfo &grpInfo);
+    virtual ~EvtList() = default;
+    virtual int Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader) = 0;
+    virtual int Pause() = 0;
+    virtual int Close() = 0;
+    virtual int Start() = 0;
+    virtual int Enable() = 0;
+    virtual int Stop() = 0;
+    virtual int Reset() = 0;
+    virtual int Read(EventData &eventData) = 0;
 
     void SetTimeStamp(const int64_t& timestamp)
     {
@@ -109,37 +108,48 @@ public:
         return pmuEvt->blockedSample;
     }
 
-    void AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader);
-    void ClearExitFd();
-private:
-    using PerfEvtPtr = std::shared_ptr<KUNPENG_PMU::PerfEvt>;
-    
-    int CollectorDoTask(PerfEvtPtr collector, int task);
-    int CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task);
-    void FillFields(const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo,
-                    std::vector<PmuData>& pmuData);
-    void AdaptErrInfo(int err, PerfEvtPtr perfEvt);
+    virtual void SetGroupInfo(const EventGroupInfo &grpInfo) = 0;
+    virtual void AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader) = 0;
 
+protected:
+    using PerfEvtPtr = std::shared_ptr<KUNPENG_PMU::PerfEvt>;
     std::vector<CpuPtr> cpuList;
     std::vector<ProcPtr> pidList;
     std::vector<ProcPtr> unUsedPidList;
     std::set<int> noProcList;
     std::shared_ptr<PmuEvt> pmuEvt;
     int groupId; // event group id
-    std::vector<std::vector<std::shared_ptr<PerfEvt>>> xyCounterArray;
-    std::shared_ptr<PerfEvt> MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent);
     unsigned int numCpu = 0;
     unsigned int numPid = 0;
     std::set<int> fdList;
     int64_t ts = 0;
+    std::vector<std::vector<std::shared_ptr<PerfEvt>>> xyCounterArray;
     std::unordered_map<pid_t, ProcPtr> procMap;
     SymbolMode symMode = NO_SYMBOL_RESOLVE;
     unsigned long branchSampleFilter = KPERF_NO_BRANCH_SAMPLE;
     int prevStat;
     int evtStat;
     std::mutex mutex;
-    // Fixme: decouple group event with normal event, use different classes to implement Read and Init.
-    std::unique_ptr<EventGroupInfo> groupInfo = nullptr;
+
+    int CollectorDoTask(PerfEvtPtr collector, int task)
+    {
+        switch (task) {
+            case START:
+                return collector->Start();
+            case PAUSE:
+                return collector->Pause();
+            case DISABLE:
+                return collector->Disable();
+            case ENABLE:
+                return collector->Enable();
+            case RESET:
+                return collector->Reset();
+            case CLOSE:
+                return collector->Close();
+            default:
+                return UNKNOWN_ERROR;
+        }
+    }
 };
 
 struct EventGroupInfo {
@@ -160,4 +170,4 @@ struct EventGroupInfo {
 using groupMapPtr = std::shared_ptr<std::unordered_map<int, EventGroupInfo>>;
 
 }   // namespace KUNPENG_PMU
-#endif
+#endif
\ No newline at end of file
diff --git a/pmu/evt_list.cpp b/pmu/evt_list_default.cpp
similarity index 84%
rename from pmu/evt_list.cpp
rename to pmu/evt_list_default.cpp
index 7e93c0d..6f96e88 100644
--- a/pmu/evt_list.cpp
+++ b/pmu/evt_list_default.cpp
@@ -21,36 +21,11 @@
 #include "pcerr.h"
 #include "log.h"
 #include "common.h"
-#include "evt_list.h"
+#include "evt_list_default.h"
 
 using namespace std;
 
-int KUNPENG_PMU::EvtList::CollectorDoTask(PerfEvtPtr collector, int task)
-{
-    switch (task) {
-        case START:
-            return collector->Start();
-        case PAUSE:
-            return collector->Pause();
-        case DISABLE:
-            return collector->Disable();
-        case ENABLE:
-            return collector->Enable();
-        case RESET:
-            return collector->Reset();
-        case CLOSE: {
-            auto ret = collector->Close();
-            if (ret == SUCCESS) {
-                fdList.erase(collector->GetFd());
-            }
-            return ret;
-        }
-        default:
-            return UNKNOWN_ERROR;
-    }
-}
-
-int KUNPENG_PMU::EvtList::CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task)
+int KUNPENG_PMU::EvtListDefault::CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task)
 {
     std::unique_lock<std::mutex> lock(mutex);
     for (auto row: xyArray) {
@@ -66,7 +41,7 @@ int KUNPENG_PMU::EvtList::CollectorXYArrayDoTask(std::vector<std::vector<PerfEvt
     return SUCCESS;
 }
 
-void KUNPENG_PMU::EvtList::AdaptErrInfo(int err, PerfEvtPtr perfEvt) 
+void KUNPENG_PMU::EvtListDefault::AdaptErrInfo(int err, PerfEvtPtr perfEvt) 
 {
     switch (err) {
         case LIBPERF_ERR_INVALID_EVENT:
@@ -104,7 +79,7 @@ void KUNPENG_PMU::EvtList::AdaptErrInfo(int err, PerfEvtPtr perfEvt)
     }
 }
 
-int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader)
+int KUNPENG_PMU::EvtListDefault::Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader)
 {
     // Init process map.
     for (auto& proc: pidList) {
@@ -127,7 +102,8 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr<Evt
             }
             perfEvt->SetSymbolMode(symMode);
             perfEvt->SetBranchSampleFilter(branchSampleFilter);
-            int groupFd = groupEnable && evtLeader ? evtLeader->xyCounterArray[row][col]->GetFd():-1;
+            auto evtleaderDefault = std::dynamic_pointer_cast<EvtListDefault>(evtLeader);
+            int groupFd = groupEnable && evtleaderDefault ? evtleaderDefault->xyCounterArray[row][col]->GetFd():-1;
             int err = perfEvt->Init(groupEnable, groupFd, resetOutPutFd);
             if (err == LIBPERF_ERR_NO_PERMISSION && !this->pmuEvt->excludeKernel && !this->pmuEvt->excludeUser && GetParanoidVal() > 1) {
                 perfEvt->SetNeedTryExcludeKernel(true);
@@ -157,22 +133,22 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr<Evt
     return SUCCESS;
 }
 
-int KUNPENG_PMU::EvtList::Start()
+int KUNPENG_PMU::EvtListDefault::Start()
 {
     return CollectorXYArrayDoTask(this->xyCounterArray, START);
 }
 
-int KUNPENG_PMU::EvtList::Enable()
+int KUNPENG_PMU::EvtListDefault::Enable()
 {
     return CollectorXYArrayDoTask(this->xyCounterArray, ENABLE);
 }
 
-int KUNPENG_PMU::EvtList::Stop()
+int KUNPENG_PMU::EvtListDefault::Stop()
 {
     return CollectorXYArrayDoTask(this->xyCounterArray, STOP);
 }
 
-int KUNPENG_PMU::EvtList::Close()
+int KUNPENG_PMU::EvtListDefault::Close()
 {
     auto ret = CollectorXYArrayDoTask(this->xyCounterArray, CLOSE);
     if (ret != SUCCESS) {
@@ -183,20 +159,21 @@ int KUNPENG_PMU::EvtList::Close()
     return SUCCESS;
 }
 
-int KUNPENG_PMU::EvtList::Reset()
+int KUNPENG_PMU::EvtListDefault::Reset()
 {
     return CollectorXYArrayDoTask(this->xyCounterArray, RESET);
 }
 
-void KUNPENG_PMU::EvtList::FillFields(
-        const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector<PmuData>& data)
+void KUNPENG_PMU::EvtListDefault::FillFields(
+        size_t start, size_t end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector<PmuData>& data)
 {
     for (auto i = start; i < end; ++i) {
         data[i].cpuTopo = cpuTopo;
         if (groupInfo && pmuEvt->collectType == COUNTING && i - start > 0) {
             // For group events, PmuData are all read by event leader,
             // and then some PmuData elements should be related to group members.
-            data[i].evt = groupInfo->evtGroupChildList[i-start-1]->pmuEvt->name.c_str();
+            std::shared_ptr<EvtListDefault> child = std::dynamic_pointer_cast<EvtListDefault>(groupInfo->evtGroupChildList[i-start-1]);
+            data[i].evt = child->pmuEvt->name.c_str();
         } else {
             // For no group events or group leader.
             data[i].evt = this->pmuEvt->name.c_str();
@@ -211,7 +188,7 @@ void KUNPENG_PMU::EvtList::FillFields(
     }
 }
 
-int KUNPENG_PMU::EvtList::Read(EventData &eventData)
+int KUNPENG_PMU::EvtListDefault::Read(EventData &eventData)
 {
 
     std::unique_lock<std::mutex> lg(mutex);
@@ -256,16 +233,16 @@ int KUNPENG_PMU::EvtList::Read(EventData &eventData)
     return SUCCESS;
 }
 
-int KUNPENG_PMU::EvtList::Pause()
+int KUNPENG_PMU::EvtListDefault::Pause()
 {
     return CollectorXYArrayDoTask(this->xyCounterArray, PAUSE);
 }
 
-std::shared_ptr<KUNPENG_PMU::PerfEvt> KUNPENG_PMU::EvtList::MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent)
+std::shared_ptr<KUNPENG_PMU::PerfEvt> KUNPENG_PMU::EvtListDefault::MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent)
 {
     switch (pmuEvent->collectType) {
         case (COUNTING):
-            return std::make_shared<KUNPENG_PMU::PerfCounter>(cpu, pid, pmuEvent, procMap);
+            return std::make_shared<KUNPENG_PMU::PerfCounterDefault>(cpu, pid, pmuEvent, procMap);
         case (SAMPLING):
             return std::make_shared<KUNPENG_PMU::PerfSampler>(cpu, pid, pmuEvent, procMap);
         case (SPE_SAMPLING):
@@ -275,7 +252,7 @@ std::shared_ptr<KUNPENG_PMU::PerfEvt> KUNPENG_PMU::EvtList::MapPmuAttr(int cpu,
     };
 }
 
-void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader)
+void KUNPENG_PMU::EvtListDefault::AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader)
 {
     if (pid <= 0 || evtStat == CLOSE || evtStat == STOP) {
         return;
@@ -300,7 +277,8 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons
         int err = 0;
         if (groupEnable) {
             int sz = this->pidList.size();
-            auto groupFd = evtLeader?evtLeader->xyCounterArray[row][sz - 1]->GetFd():-1;
+            std::shared_ptr<EvtListDefault> evtLeaderDefault = std::dynamic_pointer_cast<EvtListDefault>(evtLeader);
+            auto groupFd = evtLeaderDefault?evtLeaderDefault->xyCounterArray[row][sz - 1]->GetFd():-1;
             err = perfEvt->Init(groupEnable, groupFd, -1);
         } else {
             err = perfEvt->Init(groupEnable, -1, -1);
@@ -339,7 +317,7 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons
     }
 }
 
-void KUNPENG_PMU::EvtList::ClearExitFd()
+void KUNPENG_PMU::EvtListDefault::ClearExitFd()
 {
     if (this->pidList.size() == 1 && this->pidList[0]->tid == -1) {
         return;
@@ -390,7 +368,7 @@ void KUNPENG_PMU::EvtList::ClearExitFd()
     noProcList.clear();
 }
 
-void KUNPENG_PMU::EvtList::SetGroupInfo(const EventGroupInfo &grpInfo)
+void KUNPENG_PMU::EvtListDefault::SetGroupInfo(const EventGroupInfo &grpInfo)
 {
     this->groupInfo = unique_ptr<EventGroupInfo>(new EventGroupInfo(grpInfo));
 }
\ No newline at end of file
diff --git a/pmu/evt_list_default.h b/pmu/evt_list_default.h
new file mode 100644
index 0000000..bc4d5d2
--- /dev/null
+++ b/pmu/evt_list_default.h
@@ -0,0 +1,60 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Mr.Zhang
+ * Create: 2024-04-03
+ * Description: declaration of class EvtListDefault with functions for managing and interacting with a list
+ * of performance events in the KUNPENG_PMU namespace
+ ******************************************************************************/
+#ifndef PMU_EVTLISTDEFAULT_H
+#define PMU_EVTLISTDEFAULT_H
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+#include <set>
+#include <linux/types.h>
+#include <mutex>
+#include "cpu_map.h"
+#include "perf_counter_default.h"
+#include "pmu.h"
+#include "process_map.h"
+#include "sampler.h"
+#include "spe_sampler.h"
+#include "evt_list.h"
+
+namespace KUNPENG_PMU {
+
+class EvtListDefault : public EvtList {
+public:
+    EvtListDefault(const SymbolMode &symbolMode, std::vector<CpuPtr> &cpuList, std::vector<ProcPtr> &pidList,
+            std::shared_ptr<PmuEvt> pmuEvt, const int groupId)
+        : EvtList(symbolMode, cpuList, pidList, pmuEvt, groupId){}
+    int Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader);
+    int Pause();
+    int Close() override;
+    int Start() override;
+    int Enable() override;
+    int Stop() override;
+    int Reset() override;
+    int Read(EventData &eventData) override;
+
+    void SetGroupInfo(const EventGroupInfo &grpInfo) override;
+    void AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader) override;
+    void ClearExitFd();
+private:
+    int CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task);
+    void FillFields(size_t start, size_t end, CpuTopology* cpuTopo, ProcTopology* procTopo, std::vector<PmuData>& pmuData);
+    void AdaptErrInfo(int err, PerfEvtPtr perfEvt);
+    std::shared_ptr<PerfEvt> MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent);
+    // Fixme: decouple group event with normal event, use different classes to implement Read and Init.
+    std::unique_ptr<EventGroupInfo> groupInfo = nullptr;
+};
+}   // namespace KUNPENG_PMU
+#endif
diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h
index cdb6ddd..fe97363 100644
--- a/pmu/perf_counter.h
+++ b/pmu/perf_counter.h
@@ -23,55 +23,18 @@
 #include "evt.h"
 #include "pmu_event.h"
 
-#define REQUEST_USER_ACCESS 0x2
-
-struct ReadFormat {
-    __u64 value;
-    __u64 timeEnabled;
-    __u64 timeRunning;
-    __u64 id;
-};
-
 namespace KUNPENG_PMU {
-    static constexpr int COUNT_PAGE_SIZE = 4096;
     class PerfCounter : public PerfEvt {
     public:
         using PerfEvt::PerfEvt;
-        ~PerfCounter()
-        {}
-        int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) override;
-        int Read(EventData &eventData) override;
-        int MapPerfAttr(const bool groupEnable, const int groupFd) override;
-        int Enable() override;
-        int Disable() override;
-        int Reset() override;
-        int Close() override;
-
-    private:
-        enum class GroupStatus
-        {
-            NO_GROUP,
-            GROUP_LEADER,
-            GROUP_MEMBER
-        };
-        int Mmap();
-        int MapPerfAttrUserAccess();
-        int CountValueToData(const __u64 value, const __u64 timeEnabled,
-                                const __u64 timeRunning, __u64 &accumCount, std::vector<PmuData> &data);
-        int ReadSingleEvent(std::vector<PmuData> &data);
-        int ReadGroupEvents(std::vector<PmuData> &data);
-
-	    // Accumulated pmu count, time enabled and time running.
-	    __u64 enabled = 0;
-	    __u64 running = 0;
-        // For group events, <accumCount> is the accum counts of all members.
-        // For normal events, <accumCount> has only one element.
-        std::vector<__u64> accumCount;
-        int groupFd = 0;
-        GroupStatus groupStatus = GroupStatus::NO_GROUP; 
-        // reg index is stored in countMmap->base
-        std::shared_ptr<PerfMmap> countMmap = nullptr;
-        bool isCollect{false};
+        virtual ~PerfCounter() = default;
+        virtual int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) = 0;
+        virtual int Read(EventData &eventData) = 0;
+        virtual int MapPerfAttr(const bool groupEnable, const int groupFd) =0;
+        virtual int Enable() = 0;
+        virtual int Disable() = 0;
+        virtual int Reset() = 0;
+        virtual int Close() = 0;
     };
 }  // namespace KUNPENG_PMU
 #endif
diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter_default.cpp
similarity index 93%
rename from pmu/perf_counter.cpp
rename to pmu/perf_counter_default.cpp
index e3640ff..1ced311 100644
--- a/pmu/perf_counter.cpp
+++ b/pmu/perf_counter_default.cpp
@@ -10,8 +10,8 @@
  * See the Mulan PSL v2 for more details.
  * Author: Mr.Gan
  * Create: 2024-04-03
- * Description: implementations for reading performance counters and initializing counting logic in
- * the KUNPENG_PMU namespace.
+ * Description: implementations for reading performance counters and initializing counting logic
+ * of PerfCounterDefault in the KUNPENG_PMU namespace.
  ******************************************************************************/
 #include <climits>
 #include <poll.h>
@@ -29,7 +29,7 @@
 #include "pmu_event.h"
 #include "pcerr.h"
 #include "log.h"
-#include "perf_counter.h"
+#include "perf_counter_default.h"
 #include "read_reg.h"
 #include "common.h"
 
@@ -53,7 +53,7 @@ struct GroupReadFormat {
  * Right now we do not implement grouping logic, thus we ignore the
  * PERF_FORMAT_ID section for now
  */
-int KUNPENG_PMU::PerfCounter::Read(EventData &eventData)
+int KUNPENG_PMU::PerfCounterDefault::Read(EventData &eventData)
 {
     if (__glibc_unlikely(this->fd < 0)) {
         this->accumCount.clear();
@@ -141,7 +141,7 @@ static int PerfMmapReadSelf(const std::shared_ptr<PerfMmap> &countMmap, struct R
 }
 }  // namespace KUNPENG_PMU
 
-int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector<PmuData> &data)
+int KUNPENG_PMU::PerfCounterDefault::ReadSingleEvent(std::vector<PmuData> &data)
 {
     ReadFormat perfCountValue;
     if (this->evt->enableUserAccess) {
@@ -178,7 +178,7 @@ int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector<PmuData> &data)
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector<PmuData> &data)
+int KUNPENG_PMU::PerfCounterDefault::ReadGroupEvents(std::vector<PmuData> &data)
 {
     // Fixme:
     // In current class, we do not know how many events in group.
@@ -219,7 +219,7 @@ int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector<PmuData> &data)
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 timeEnabled,
+int KUNPENG_PMU::PerfCounterDefault::CountValueToData(const __u64 value, const __u64 timeEnabled,
                                                 const __u64 timeRunning, __u64 &accumCount, vector<PmuData> &data)
 {
     if (value < accumCount || timeEnabled < enabled || timeRunning < running) {
@@ -262,7 +262,7 @@ int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 ti
 /**
  * Initialize counting
  */
-int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd)
+int KUNPENG_PMU::PerfCounterDefault::Init(const bool groupEnable, const int groupFd, const int resetOutputFd)
 {
     int err = SUCCESS;
     if (this->evt->enableUserAccess) {  // user access
@@ -277,7 +277,7 @@ int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, co
     return err;
 }
 
-int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int groupFd)
+int KUNPENG_PMU::PerfCounterDefault::MapPerfAttr(const bool groupEnable, const int groupFd)
 {
     /**
      * For now, we only implemented the logic for CORE type events. Support for UNCORE PMU events will be
@@ -352,7 +352,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::MapPerfAttrUserAccess()
+int KUNPENG_PMU::PerfCounterDefault::MapPerfAttrUserAccess()
 {
     struct perf_event_attr attr;
     memset(&attr, 0, sizeof(attr));
@@ -375,7 +375,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttrUserAccess()
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::Mmap()
+int KUNPENG_PMU::PerfCounterDefault::Mmap()
 {
     this->countMmap = std::make_shared<PerfMmap>();
     this->countMmap->prev = 0;
@@ -395,7 +395,7 @@ int KUNPENG_PMU::PerfCounter::Mmap()
 /**
  * Enable
  */
-int KUNPENG_PMU::PerfCounter::Enable()
+int KUNPENG_PMU::PerfCounterDefault::Enable()
 {
     if (groupFd != -1) {
         // Only group leader should use ioctl to enable, disable or reset,
@@ -416,7 +416,7 @@ int KUNPENG_PMU::PerfCounter::Enable()
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::Disable()
+int KUNPENG_PMU::PerfCounterDefault::Disable()
 {
     if (groupFd != -1) {
         return SUCCESS;
@@ -428,7 +428,7 @@ int KUNPENG_PMU::PerfCounter::Disable()
     return err;
 }
 
-int KUNPENG_PMU::PerfCounter::Reset()
+int KUNPENG_PMU::PerfCounterDefault::Reset()
 {
     if (groupFd != -1) {
         return SUCCESS;
@@ -436,7 +436,7 @@ int KUNPENG_PMU::PerfCounter::Reset()
     return PerfEvt::Reset();
 }
 
-int KUNPENG_PMU::PerfCounter::Close()
+int KUNPENG_PMU::PerfCounterDefault::Close()
 {
     if (this->countMmap && this->countMmap->base && this->countMmap->base != MAP_FAILED) {
         munmap(this->countMmap->base, COUNT_PAGE_SIZE);
diff --git a/pmu/perf_counter_default.h b/pmu/perf_counter_default.h
new file mode 100644
index 0000000..1a84dd7
--- /dev/null
+++ b/pmu/perf_counter_default.h
@@ -0,0 +1,76 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Mr.Gan
+ * Create: 2024-04-03
+ * Description: declaration of class PerfCounterDefault that inherits from PerfCounter.
+ ******************************************************************************/
+#ifndef PMU_COUNTER_DEFAULT_H
+#define PMU_COUNTER_DEFAULT_H
+
+#include <memory>
+#include <stdexcept>
+#include <linux/types.h>
+#include "evt.h"
+#include "pmu_event.h"
+#include "perf_counter.h"
+
+#define REQUEST_USER_ACCESS 0x2
+
+struct ReadFormat {
+    __u64 value;
+    __u64 timeEnabled;
+    __u64 timeRunning;
+    __u64 id;
+};
+
+namespace KUNPENG_PMU {
+    static constexpr int COUNT_PAGE_SIZE = 4096;
+    class PerfCounterDefault : public PerfCounter {
+    public:
+        using PerfCounter::PerfCounter;
+        ~PerfCounterDefault()
+        {}
+        int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) override;
+        int Read(EventData &eventData) override;
+        int MapPerfAttr(const bool groupEnable, const int groupFd) override;
+        int Enable() override;
+        int Disable() override;
+        int Reset() override;
+        int Close() override;
+
+    private:
+        enum class GroupStatus
+        {
+            NO_GROUP,
+            GROUP_LEADER,
+            GROUP_MEMBER
+        };
+        int Mmap();
+        int MapPerfAttrUserAccess();
+        int CountValueToData(const __u64 value, const __u64 timeEnabled,
+                                const __u64 timeRunning, __u64 &accumCount, std::vector<PmuData> &data);
+        int ReadSingleEvent(std::vector<PmuData> &data);
+        int ReadGroupEvents(std::vector<PmuData> &data);
+
+	    // Accumulated pmu count, time enabled and time running.
+	    __u64 enabled = 0;
+	    __u64 running = 0;
+        // For group events, <accumCount> is the accum counts of all members.
+        // For normal events, <accumCount> has only one element.
+        std::vector<__u64> accumCount;
+        int groupFd = 0;
+        GroupStatus groupStatus = GroupStatus::NO_GROUP;
+        // reg index is stored in countMmap->base
+        std::shared_ptr<PerfMmap> countMmap = nullptr;
+        bool isCollect{false};
+    };
+}  // namespace KUNPENG_PMU
+#endif
diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp
index 2c8ef8b..5dfb9d5 100644
--- a/pmu/pmu.cpp
+++ b/pmu/pmu.cpp
@@ -293,6 +293,30 @@ static int CheckUserAccess(enum PmuTaskType collectType, struct PmuAttr *attr)
     return SUCCESS;
 }
 
+static int CheckBpfMode(enum PmuTaskType collectType, struct PmuAttr *attr)
+{
+    if (!attr->enableBpf) {
+        return SUCCESS;
+    }
+    #ifndef BPF_ENABLED
+        New(LIBPERF_ERR_INVALID_BPF_PARAM, "No compilation of 'bpf=true' to support bpf mode");
+        return LIBPERF_ERR_INVALID_BPF_PARAM;
+    #endif
+    if (collectType != COUNTING) {
+        New(LIBPERF_ERR_INVALID_BPF_PARAM, "Bpf mode only support counting");
+        return LIBPERF_ERR_INVALID_BPF_PARAM;
+    }
+    if (attr->cgroupNameList == nullptr && attr->pidList == nullptr) {
+        New(LIBPERF_ERR_INVALID_BPF_PARAM, "Bpf mode need collect pid or cgroup");
+        return LIBPERF_ERR_INVALID_BPF_PARAM;
+    }
+    if (attr->evtAttr != nullptr) {
+        New(LIBPERF_ERR_INVALID_BPF_PARAM, "Bpf mode doesn't support event group now");
+        return LIBPERF_ERR_INVALID_BPF_PARAM;
+    }
+    return SUCCESS;
+}
+
 static int CheckAttr(enum PmuTaskType collectType, struct PmuAttr *attr)
 {
     auto err = CheckUserAccess(collectType, attr);
@@ -338,6 +362,11 @@ static int CheckAttr(enum PmuTaskType collectType, struct PmuAttr *attr)
         return err;
     }
 
+    err = CheckBpfMode(collectType, attr);
+    if (err != SUCCESS) {
+        New(err);
+        return err;
+    }
     return SUCCESS;
 }
 
@@ -907,10 +936,19 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt
             taskParam->cpuList[i] = pmuEvt->cpuMaskList[i];
         }
     } else if (attr->cpuList == nullptr && attr->pidList != nullptr && pmuEvt->collectType == COUNTING) {
-        // For counting with pid list for system wide, open fd with cpu -1 and specific pid.
-        taskParam->numCpu = 1;
-        taskParam->cpuList = new int[taskParam->numCpu];
-        taskParam->cpuList[0] = -1;
+        if(attr->enableBpf) {
+            // collect data from all system cores in bpf mode
+            taskParam->numCpu = MAX_CPU_NUM;
+            taskParam->cpuList = new int[MAX_CPU_NUM];
+            for(int i = 0; i < MAX_CPU_NUM; i++) {
+                taskParam->cpuList[i] = i;
+            }
+        } else {
+            // For counting with pid list for system wide, open fd with cpu -1 and specific pid.
+            taskParam->numCpu = 1;
+            taskParam->cpuList = new int[taskParam->numCpu];
+            taskParam->cpuList[0] = -1;
+        }
     } else if (attr->cpuList == nullptr) {
         // For null cpulist, open fd with cpu 0,1,2...max_cpu
         const set<int> &onLineCpus = GetOnLineCpuIds();
@@ -997,6 +1035,8 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att
     if (attr->enableUserAccess) {
         taskParam->pmuEvt->config1 = REQUEST_USER_ACCESS;
     }
+    taskParam->pmuEvt->numEvent = attr->numEvt;
+    taskParam->pmuEvt->enableBpf = attr->enableBpf;
     return taskParam.release();
 }
 
diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h
index 6a7f8cb..d8cdd63 100644
--- a/pmu/pmu_event.h
+++ b/pmu/pmu_event.h
@@ -52,6 +52,8 @@ struct PmuEvt {
     int cgroupFd;
     std::string cgroupName;
     unsigned enableUserAccess : 1; // avoid uncore (config1 & 0x2)  == 0x2
+    unsigned numEvent;           // pmu event number for bpf cgroup init
+    unsigned enableBpf : 1;      // enable bpf mode in counting mode
 };
 
 namespace KUNPENG_PMU {
diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp
index 31b131b..af5cec1 100644
--- a/pmu/pmu_list.cpp
+++ b/pmu/pmu_list.cpp
@@ -28,6 +28,10 @@
 #include "pmu_event_list.h"
 #include "pmu_list.h"
 #include "pfm_event.h"
+#include "evt_list_default.h"
+#ifdef BPF_ENABLED
+    #include "bpf/evt_list_bpf.h"
+#endif
 
 using namespace std;
 using namespace pcerr;
@@ -87,10 +91,19 @@ namespace KUNPENG_PMU {
                 return err;
             }
             fdNum += CalRequireFd(cpuTopoList.size(), procTopoList.size(), taskParam->pmuEvt->collectType);
-            std::shared_ptr<EvtList> evtList =
-                    std::make_shared<EvtList>(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId);
-            evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd));
-            InsertEvtList(pd, evtList);
+        #ifdef BPF_ENABLED
+            if (taskParam->pmuEvt->enableBpf) {
+                std::shared_ptr<EvtListBpf> evtList =
+                        std::make_shared<EvtListBpf>(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId);
+                InsertEvtList(pd, evtList);
+            } else
+        #endif
+            {
+                std::shared_ptr<EvtListDefault> evtList =
+                std::make_shared<EvtListDefault>(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId);
+                evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd));
+                InsertEvtList(pd, evtList);
+            }
             pmuTaskAttrHead = pmuTaskAttrHead->next;
         }
 
@@ -100,9 +113,12 @@ namespace KUNPENG_PMU {
             return symbolErrNo;
         }
 
-        auto err = CheckRlimit(fdNum);
-        if (err != SUCCESS) {
-            return err;
+        int err;
+        if (!taskParam->pmuEvt->enableBpf) {  // in bpf mode, cpuSize * proSize whill exceed rlimit
+            err = CheckRlimit(fdNum);
+            if (err != SUCCESS) {
+                return err;
+            }
         }
 
         err = Init(pd);
@@ -739,7 +755,6 @@ namespace KUNPENG_PMU {
         }
 
         auto& evData = dataList[pd];
-        
         if (GetTaskType(pd) == COUNTING) {
             std::vector<PmuData> newPmuData;
             AggregateUncoreData(pd, evData.data, newPmuData);
diff --git a/test/test_perf/CMakeLists.txt b/test/test_perf/CMakeLists.txt
index 0978ab6..21c3677 100644
--- a/test/test_perf/CMakeLists.txt
+++ b/test/test_perf/CMakeLists.txt
@@ -12,6 +12,11 @@ add_compile_options(-g)
 set(CMAKE_CXX_STANDARD 14)
 aux_source_directory(. SOURCE_SRC)
 add_executable(test_perf ${SOURCE_SRC} ${CMAKE_CURRENT_LIST_DIR}/../../util/pcerr.cpp)
-target_link_libraries(test_perf sym kperf gtest m gtest_main elf_static dwarf_static pthread -g)
+
+set(COMMON_LIBS sym kperf gtest m gtest_main elf_static dwarf_static pthread -g)
+if (BPF)
+    set(BPF_LIBS bpf)
+endif()
+target_link_libraries(test_perf ${COMMON_LIBS} ${BPF_LIBS})
 
 add_subdirectory(case)
diff --git a/util/pcerr.cpp b/util/pcerr.cpp
index eba1369..6d9c53c 100644
--- a/util/pcerr.cpp
+++ b/util/pcerr.cpp
@@ -64,7 +64,9 @@ namespace pcerr {
             {LIBPERF_ERR_COUNT_MMAP_IS_NULL, "Count mmap page is null!"},
             {LIBPERF_ERR_ENABLE_USER_ACCESS_FAILED, "Enable user access failed!"},
             {LIBPERF_ERR_ALLOCATE_REGISTER_FAILED, "Allocate register failed!"},
-            {LIBPERF_ERR_CHECK_USER_ACCESS, "Check user access failed!"}
+            {LIBPERF_ERR_CHECK_USER_ACCESS, "Check user access failed!"},
+            {LIBPERF_ERR_INVALID_BPF_PARAM, "check bpf mode failed"},
+            {LIBPERF_ERR_BPF_ACT_FAILED, "failed to execute bpf obj action"}
     };
     static std::unordered_map<int, std::string> warnMsgs = {
             {LIBPERF_WARN_CTXID_LOST, "Some SPE context packets are not found in the traces."},
-- 
Gitee