diff --git a/.gitmodules b/.gitmodules
index e0fc23552c477cf77a1ae80900bf9ec489584f6e..549774f3487d77df3730391e8a23d037229df3ed 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "googletest"]
 	path = third_party/googletest
 	url = https://gitee.com/mirrors/googletest.git
+[submodule "third_party/libbpf"]
+	path = third_party/libbpf
+	url = https://gitee.com/mirrors/libbpf.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f013847e946f13209dc6488db54862bd1f5294e6..95ced186753cd904c647c4c63b7dea56b8535e7b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,4 +60,7 @@ if (INCLUDE_TEST)
     add_subdirectory(test)
 endif()
 
+option(BPF "Enable BPF mode" OFF)
+message(STATUS "BPF support: ${BPF}")
+
 set(CMAKE_EXPORT_COMPILE_COMMANDS True)
diff --git a/build.sh b/build.sh
index 6fb130037dc5f0a97ba6ee0baa647336d08da515..079cef3cce1827a50f81f3bda3c62a00342aae5b 100644
--- a/build.sh
+++ b/build.sh
@@ -20,6 +20,7 @@ PROJECT_DIR=$(realpath "${CURRENT_DIR}")
 BUILD_DIR=${PROJECT_DIR}/_build
 THIRD_PARTY=${PROJECT_DIR}/third_party/
 INSTALL_PATH=${PROJECT_DIR}/output/
+BPF_DIR=${PROJECT_DIR}/pmu/bpf
 BUILD_TYPE=Release
 # Python module are not compiled by default.
 PYTHON=false
@@ -27,6 +28,8 @@ PYTHON=false
 INCLUDE_TEST=false
 # Go support, copy so and head files
 GO=false
+# Bpf mode for counting
+BPF=false
 
 source ${PROJECT_DIR}/build/common.sh
 
@@ -66,6 +69,9 @@ for arg in "$@"; do
         go=*)
             GO="${arg#*=}"
             ;;
+        bpf=*)
+            BPF="${arg#*=}"
+            ;;
     esac
 done
 
@@ -73,6 +79,11 @@ if [[ "$INCLUDE_TEST" == "true" ]]; then
     build_googletest $THIRD_PARTY
 fi
 
+if [[ "$BPF" == "true" ]]; then
+    build_libbpf $THIRD_PARTY
+    build_skel_files $BPF_DIR $THIRD_PARTY
+fi
+
 function build_elfin() {
   local cmake_target_dir=$THIRD_PARTY/local/elfin-parser
   rm -rf ${cmake_target_dir}
@@ -110,6 +121,7 @@ build_libkperf()
         "-DGO=${GO}"
         "-DCMAKE_INSTALL_PREFIX=${INSTALL_PATH}"
         "-DCMAKE_BUILD_TYPE=${BUILD_TYPE}"
+        "-DBPF=${BPF}"
     )
     if [ ! -z ${PYTHON_EXE} ];then
          CMAKE_ARGS+=("-DPYTHON_KPERF=${PYTHON_EXE}")
diff --git a/build/common.sh b/build/common.sh
index f48b64e7ad234c9bc0665c4acdda5a55ad6c15cc..4f725fc8acc62c1e8c2e2ad1a0d6a83e40d8d8b8 100644
--- a/build/common.sh
+++ b/build/common.sh
@@ -13,6 +13,10 @@
 # Description: Partial methods for building scripts.
 set -e
 
+export BPF_CLANG="clang"
+export BPF_TOOL="bpftool"
+export CLANG_FLAGS="-O2 -g -target bpf"
+
 cpu_core_num=$(($(nproc)-1))
 
 if [ "$cpu_core_num" -eq 0 ];then
@@ -76,4 +80,48 @@ function execute_binary() {
         echo "执行命令: $command"
         eval "$command"
     done
-}
\ No newline at end of file
+}
+
+function build_libbpf() {
+  local open_source_dir=$1
+  local cmake_target_dir=$1/bpf
+  if [ -d "${cmake_target_dir}" ];then
+    echo ${cmake_target_dir} "is exist"
+    return
+  else
+    echo ${cmake_target_dir} "is not exist"
+  fi
+  pushd "$open_source_dir/libbpf/src"
+  make -j ${cpu_core_num}
+  make install DESTDIR=$open_source_dir/local/bpf
+  echo "install log path: $cmake_target_dir"
+}
+
+function build_skel_files() {
+  command -v $BPF_CLANG &> /dev/null || error_exit "Error: $BPF_CLANG not found. Please install LLVM/Clang."
+  command -v $BPF_TOOL &> /dev/null || error_exit "Error: $BPF_TOOL not found. Please install bpftool."
+  
+  local bpf_file_dir=$1
+  local bpf_lib_dir=$2
+  bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}local/bpf/vmlinux.h"
+  if [ -s "${bpf_lib_dir}local/bpf/vmlinux.h" ]; then
+      echo "The kernel header file generated."
+  else
+      echo "Generate vmlinux.h file failed."
+  fi
+
+  for bpf_src in "${bpf_file_dir}"/*.bpf.c; do
+    [ -f "$bpf_src" ] || continue
+    src_name=$(basename "${bpf_src%.bpf.c}")
+    obj_path="${bpf_file_dir}/${src_name}.bpf.o"
+    skel_path="${bpf_file_dir}/${src_name}.skel.h"
+
+    echo "compile: $src_name"
+    clang -I${bpf_lib_dir}local -g -O2 -target bpf -c "$bpf_src" -o "$obj_path"
+    [ -s "$obj_path" ] || { echo "Error: The obj file was not generated."; exit 1; }
+    bpftool gen skeleton "$obj_path" > "$skel_path"
+    [ -s "$skel_path" ] || { echo "Error: The skeleton file was not generated."; exit 1; }
+    grep -q 'struct bpf_prog' "$skel_path" || { echo "Error: invalid skeleton format."; exit 1; }
+    echo "generate: ${src_name}.skel.h"
+  done
+}
diff --git a/example/pmu_bpf_counting.cpp b/example/pmu_bpf_counting.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..78307d38dac62808e81d10a56dced02431dff043
--- /dev/null
+++ b/example/pmu_bpf_counting.cpp
@@ -0,0 +1,128 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-05
+ * Description: Pmu data hotspot analysis module.
+ * Current capability: launch a process or pid and begin to collect data in counting mode
+ ******************************************************************************/
+#include <iostream>
+#include <vector>
+#include <algorithm>
+#include <string>
+#include <cstring>
+#include <iomanip>
+#include <signal.h>
+#include <sys/stat.h>
+#include "pcerrc.h"
+#include "pmu.h"
+#include "symbol.h"
+
+using namespace std;
+
+#define RED_TEXT "\033[31m"
+#define RESET_COLOR "\033[0m"
+
+const char* UNKNOWN = "UNKNOWN";
+const int HEX_BUFFER_SIZE = 20;
+const int FLOAT_PRECISION = 2;
+uint64_t g_totalPeriod = 0;
+
+void bpfCollecting(int pid, double interval)
+{
+    char* evtList[5];
+    evtList[0] = (char*)"cycles";
+    evtList[1] = (char*)"br_mis_pred";
+    evtList[2] = (char*)"br_retired";
+    evtList[3] = (char*)"br_return_retired";
+    evtList[4] = (char*)"br_mis_pred_retired";
+    struct PmuAttr attr = {0};
+
+    attr.evtList = evtList;
+    attr.numEvt = 5;
+    attr.pidList = &pid;
+    attr.numPid = 1;
+    attr.cpuList = nullptr;
+
+    int pd = PmuOpen(COUNTING, &attr);
+    if (pd == -1) {
+        std::cerr << "PmuOpen failed" << std::endl;
+        std::cerr << "error msg:" << Perror() << std::endl;
+        return;
+    }
+
+    PmuEnable(pd);
+    sleep(interval);
+    PmuData* pmuData = nullptr;
+    int len = PmuRead(pd, &pmuData);
+    if (len == -1) {
+        std::cerr << "error msg:" << Perror() << std::endl;
+        return;
+    }
+    for (int i = 0; i < len; ++i) {
+        printf("evt=%s, pid=%d, tid=%d, cpu=%d, count=%ld\n", pmuData[i].evt, pmuData[i].pid, pmuData[i].tid, pmuData[i].cpu, pmuData[i].count);
+    }
+    PmuDataFree(pmuData);
+    PmuDisable(pd);
+    PmuClose(pd);
+    return;
+}
+
+bool ExistPath(const std::string &filePath) {
+    struct stat statbuf{};
+    return stat(filePath.c_str(), &statbuf) == 0;
+}
+
+void print_usage() {
+    std::cerr << "Usage: pmu_bpf_counting interval <process name>\n";
+    std::cerr << " process name: process path or input process number\n";
+    std::cerr << " example: pmu_bpf_counting 10./process\n";
+    std::cerr << " example: pmu_bpf_counting 20 <pid>\n";
+}
+
+int main(int argc, char** argv)
+{
+    if (argc < 2) {
+        print_usage();
+        return 0;
+    }
+    pid_t pid = 0;
+    double interval = 0.0;
+    bool needKill = false;
+    try {
+        interval = std::stod(argv[1]);
+        if (interval <= 0) {
+            throw std::invalid_argument("Interval must be a positive number.");
+        }
+
+        try {
+            pid = std::stoi(argv[2]);
+        } catch (const std::invalid_argument&) {
+            string process = argv[2];
+            if (!ExistPath(process)) {
+                throw std::invalid_argument("process name not a exec file.");
+            }
+
+            pid = fork();
+            if (pid == 0) {
+                execvp(argv[2], &argv[2]);
+                perror("execvp failed");
+                _exit(1);
+            }
+        }
+    } catch (const std::exception& e) {
+        std::cerr << "Error parsing arguments: " << e.what() << "\n";
+        print_usage();
+        return EXIT_FAILURE;
+    }
+    std::cout<<"pid: "<<pid<<std::endl;
+    bpfCollecting(pid, interval);
+    return 0;
+}
\ No newline at end of file
diff --git a/include/pcerrc.h b/include/pcerrc.h
index 9d5f4f6777dc0bb4123393d4e0dca21e30526db9..3186fa9b424dde1cdc45e853b30efadaa826e2ff 100644
--- a/include/pcerrc.h
+++ b/include/pcerrc.h
@@ -123,6 +123,7 @@ extern "C" {
 #define LIBPERF_ERR_ALLOCATE_REGISTER_FAILED 1076
 #define LIBPERF_ERR_CHECK_USER_ACCESS 1077
 #define LIBPERF_ERR_COUNTER_INDEX_IS_ZERO 1078
+#define LIBPERF_ERR_BPF_ACT_FAILED 1079
 
 #define UNKNOWN_ERROR 9999
 
diff --git a/include/pmu.h b/include/pmu.h
index 43f1cc1b22b9bfce381ee5fa897eab9620a558f7..994420c76c95f0e1a4580a96bc0ce64ddd32b546 100644
--- a/include/pmu.h
+++ b/include/pmu.h
@@ -176,6 +176,8 @@ struct PmuAttr {
 
     // enable user access counting for current process
     unsigned enableUserAccess : 1;
+    //enable bpf mode for counting 
+    unsigned useBpf : 1;
 };
 
 enum PmuTraceType {
diff --git a/pmu/CMakeLists.txt b/pmu/CMakeLists.txt
index 90a149f54d8d912fdabaea6914cee4132dce0e02..6e61b7b1b627633540f8f0518549003dd1ef7503 100644
--- a/pmu/CMakeLists.txt
+++ b/pmu/CMakeLists.txt
@@ -13,6 +13,7 @@ set(SYMBOL_FILE_DIR ${PROJECT_TOP_DIR}/symbol)
 set(PMU_FILE_DIR ${PROJECT_TOP_DIR}/pmu)
 set(PFM_FILE_DIR ${PROJECT_TOP_DIR}/pmu/pfm)
 set(PMU_DECODER_DIR ${PMU_FILE_DIR}/decoder)
+set(PMU_BPF_DIR ${PMU_FILE_DIR}/bpf)
 
 # Source files                                      #
 file(GLOB UTIL_SRC ${UTIL_FILE_DIR}/*.cpp)
@@ -20,18 +21,29 @@ file(GLOB PMU_SRC ${PMU_FILE_DIR}/*c ${PMU_FILE_DIR}/*cpp)
 file(GLOB PMU_DECODER_SRC ${PMU_DECODER_DIR}/*.cpp)
 file(GLOB SYMBOL_SRC ${SYMBOL_FILE_DIR}/*c ${SYMBOL_FILE_DIR}/*cpp)
 file(GLOB PFM_SRC ${PFM_FILE_DIR}/*c ${PFM_FILE_DIR}/*cpp)
+file(GLOB BPF_SRC ${PMU_BPF_DIR}/*cpp)
 
 include_directories(${PROJECT_TOP_DIR}/include)
 include_directories(${PMU_FILE_DIR}/)
 include_directories(${PFM_FILE_DIR})
-
 # directories for utilities and symbol resolving
 include_directories(${UTIL_FILE_DIR})
 include_directories(${SYMBOL_FILE_DIR})
 include_directories(${PMU_DECODER_DIR})
 
-ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC})
-ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC})
+if (BPF)
+    message(STATUS "BPF is true. building with bpf submodule")
+    add_compile_definitions(BPF_ENABLED)
+    include_directories(${PMU_FILE_DIR}/bpf)
+    include_directories(${PROJECT_TOP_DIR}/third_party/libbpf)
+    link_directories(${PROJECT_TOP_DIR}/third_party/local/bpf)
+    ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC} ${BPF_SRC})
+    ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC} ${BPF_SRC})
+else ()
+    ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC})
+    ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC})    
+endif()
+
 set_target_properties(kperf_static PROPERTIES OUTPUT_NAME "kperf")
 target_link_libraries(kperf sym)
 target_compile_options(kperf PRIVATE -fPIC)
diff --git a/pmu/bpf/evt_list_bpf.cpp b/pmu/bpf/evt_list_bpf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..51defcf4d593eb870fee071865d15049d5842175
--- /dev/null
+++ b/pmu/bpf/evt_list_bpf.cpp
@@ -0,0 +1,176 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-10
+ * Description: implementations for managing and interacting with performance events in the KUNPENG_PMU namespace
+ ******************************************************************************/
+#include <cstdio>
+#include <unordered_set>
+#include <fstream>
+#include "cpu_map.h"
+#include "pmu_event.h"
+#include "pcerrc.h"
+#include "pcerr.h"
+#include "log.h"
+#include "common.h"
+#include "evt_list_bpf.h"
+
+using namespace std;
+
+int KUNPENG_PMU::EvtBpfList::CollectorDoTask(PerfEvtPtr collector, int task)
+{
+    switch (task) {
+        case START:
+            return collector->Start();
+        case PAUSE:
+            return collector->Pause();
+        case DISABLE:
+            return collector->Disable();
+        case ENABLE:
+            return collector->Enable();
+        case RESET:
+            return collector->Reset();
+        case CLOSE: {
+            return collector->Close();
+        }
+        default:
+            return UNKNOWN_ERROR;
+    }
+}
+
+int KUNPENG_PMU::EvtBpfList::Init(const bool groupEnable, const std::shared_ptr<EvtBpfList> evtLeader)
+{
+    // Init process map.
+    for (auto& proc: pidList) {
+        if (proc->tid > 0) {
+            procMap[proc->tid] = proc;
+        }
+    }
+
+    for (unsigned int cpu = 0; cpu < numCpu; cpu++) {
+        PerfEvtPtr perfEvt;
+        perfEvt = std::make_shared<KUNPENG_PMU::PerfBpfCounter>(this->cpuList[cpu]->coreId, -1, this->pmuEvt.get(), procMap);
+        if (perfEvt == nullptr) {
+            continue;
+        }
+
+        int err = 0;
+        err = perfEvt->Init(groupEnable, -1, -1);
+        this->cpuCounterArray.emplace_back(perfEvt);
+    }
+
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        PerfEvtPtr perfEvt;
+        perfEvt = std::make_shared<KUNPENG_PMU::PerfBpfCounter>(-1, this->pidList[pid]->tid, this->pmuEvt.get(), procMap);
+        if (perfEvt == nullptr) {
+            continue;
+        }
+
+        int err = 0;
+        err = perfEvt->Init(groupEnable, -1, -1);
+        this->pidCounterArray.emplace_back(perfEvt);
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtBpfList::CollectorTaskArrayDoTask(std::vector<PerfEvtPtr>& taskArray, int task)
+{
+    std::unique_lock<std::mutex> lock(mutex);
+    for (auto evt: taskArray) {
+        auto err = CollectorDoTask(evt, task);
+        if (err != SUCCESS) {
+            return err;
+        }
+    }
+    this->prevStat = this->evtStat;
+    this->evtStat = task;
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtBpfList::Start()
+{
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, START);
+}
+
+int KUNPENG_PMU::EvtBpfList::Enable()
+{
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, ENABLE);
+}
+
+int KUNPENG_PMU::EvtBpfList::Stop()
+{
+    return CollectorTaskArrayDoTask(this->cpuCounterArray, STOP);
+}
+
+int KUNPENG_PMU::EvtBpfList::Close()
+{
+    auto ret = CollectorTaskArrayDoTask(this->cpuCounterArray, CLOSE);
+    if (ret != SUCCESS) {
+        return ret;
+    }
+
+    procMap.clear();
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::EvtBpfList::Read(EventData &eventData)
+{
+    std::unique_lock<std::mutex> lg(mutex);
+
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        int err = this->pidCounterArray[pid]->BeginRead();
+        if (err != SUCCESS) {
+            return err;
+        }
+    }
+
+    struct PmuEvtData* head = nullptr;
+    int row = 0;
+    auto cpuTopo = this->cpuList[row].get();
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        auto cnt = eventData.data.size();
+        int err = this->pidCounterArray[pid]->Read(eventData);
+        if (err != SUCCESS) {
+            return err;
+        }
+        if (eventData.data.size() - cnt) {
+            DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[pid]->pid,
+                        cpuTopo->coreId, eventData.data.size() - cnt);
+        }
+        // Fill event name and cpu topology.
+        FillFields(cnt, eventData.data.size(), cpuTopo, pidList[pid].get(), eventData.data);
+    }
+
+    for (unsigned int pid = 0; pid < numPid; pid++) {
+        int err = this->pidCounterArray[pid]->EndRead();
+        if (err != SUCCESS) {
+            return err;
+        }
+    }
+
+    return SUCCESS;
+}
+
+void KUNPENG_PMU::EvtBpfList::FillFields(
+        const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector<PmuData>& data)
+{
+    for (auto i = start; i < end; ++i) {
+        data[i].cpuTopo = cpuTopo;
+        data[i].evt = this->pmuEvt->name.c_str();
+        //data[i].groupId = this->groupId;
+        if (data[i].comm == nullptr) {
+            data[i].comm = procTopo->comm;
+        }
+        if (data[i].ts == 0) {
+            data[i].ts = this->ts;
+        }
+    }
+}
\ No newline at end of file
diff --git a/pmu/bpf/evt_list_bpf.h b/pmu/bpf/evt_list_bpf.h
new file mode 100644
index 0000000000000000000000000000000000000000..b0d106f4f9babf3e3e7fd3694bf28caf89d4389b
--- /dev/null
+++ b/pmu/bpf/evt_list_bpf.h
@@ -0,0 +1,90 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-10
+ * Description: declaration of class EvtBpfList with functions for managing and interacting with a list
+ * of performance events in the KUNPENG_PMU namespace
+ ******************************************************************************/
+#ifndef PMU_EVTBPFLIST_H
+#define PMU_EVTBPFLIST_H
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+#include <set>
+#include <linux/types.h>
+#include <mutex>
+#include "cpu_map.h"
+#include "perf_counter_bpf.h"
+#include "perf_counter_default.h"
+#include "pmu.h"
+#include "process_map.h"
+#include "sampler.h"
+#include "spe_sampler.h"
+#include "evt_list.h"
+#include "evt_list_default.h"
+
+namespace KUNPENG_PMU {
+
+class EvtBpfList : public EvtList {
+public:
+    using ProcPtr = std::shared_ptr<ProcTopology>;
+    using CpuPtr = std::shared_ptr<CpuTopology>;
+    EvtBpfList(std::vector<CpuPtr> &cpuList, std::vector<ProcPtr> &pidList,
+            std::shared_ptr<PmuEvt> pmuEvt)
+        : cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt)
+    {
+        this->numCpu = this->cpuList.size();
+        this->numPid = this->pidList.size();
+        this->prevStat = OPEN;
+        this->evtStat = OPEN;
+    }
+
+    int Init(const bool groupEnable, const std::shared_ptr<EvtBpfList> evtLeader);
+    int Start() override;
+    int Stop() override;
+    int Read(EventData &eventData) override;
+    int Enable() override;
+    int Reset() override;
+    int Close() override;
+    void FillFields(const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo,
+                    std::vector<PmuData>& pmuData);
+
+    void SetTimeStamp(const int64_t& timestamp)
+    {
+        this->ts = timestamp;
+    }
+
+    int GetEvtType() const
+    {
+        return pmuEvt->collectType;
+    }
+
+private:
+    using PerfEvtPtr = std::shared_ptr<KUNPENG_PMU::PerfEvt>;
+    std::unordered_map<pid_t, ProcPtr> procMap;
+    std::vector<CpuPtr> cpuList;
+    std::vector<ProcPtr> pidList;
+    std::vector<std::shared_ptr<PerfEvt>> cpuCounterArray;
+    std::vector<std::shared_ptr<PerfEvt>> pidCounterArray;
+    std::shared_ptr<PerfEvt> MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent);
+    std::shared_ptr<PmuEvt> pmuEvt;
+    int CollectorDoTask(PerfEvtPtr collector, int task);
+    int CollectorTaskArrayDoTask(std::vector<PerfEvtPtr>& taskArray, int task);
+
+    unsigned int numCpu = 0;
+    unsigned int numPid = 0;
+    int64_t ts = 0;
+    int prevStat;
+    int evtStat;
+};
+
+}   // namespace KUNPENG_PMU
+#endif
diff --git a/pmu/bpf/perf_counter_bpf.cpp b/pmu/bpf/perf_counter_bpf.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b1d61e246ef9ccb3a0d3d79e5013dec6be6eb300
--- /dev/null
+++ b/pmu/bpf/perf_counter_bpf.cpp
@@ -0,0 +1,489 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-10
+ * Description: implementations for reading performance counters and initializing counting logic in
+ * the KUNPENG_PMU namespace.
+ ******************************************************************************/
+#include <climits>
+#include <poll.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <cstring>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <linux/perf_event.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "pmu.h"
+#include "linked_list.h"
+#include "pfm_event.h"
+#include "pmu_event.h"
+#include "pcerr.h"
+#include "log.h"
+#include "sched_counter.skel.h"
+#include "sched_cgroup.skel.h"
+#include "perf_counter_bpf.h"
+
+using namespace std;
+using namespace pcerr;
+
+#define MAX_ENTITES 1024
+
+static map<string, struct sched_counter_bpf *> counterMap;  // key: evt name, value: bpf obj
+static struct sched_cgroup_bpf *cgrpCounter = nullptr;      // one bpf obj in cgroup mode
+
+static map<string, int> bpfFdMap;          // key: evt name, value: bpf prog fd (tracepoint id). For tracepoint trigger
+static map<string, set<int>> evtCpuMap;    // key: evt name, value: core id (Init).
+static map<string, set<int>> evtPidMap;    // key: evt name, value: pid (Init).
+static map<string, int> evtIdxMap;         // key: evt name, value: sequential number
+static map<string, int> cgroupIdxMap;      // key: cgroup name, value: sequential number
+static set<int> evtKeys;                   // updated fds of cgroup
+static set<string> readCgroups;
+static set<string> triggerdEvt;
+static int evtIdx = 0;
+static int cgrpProgFd = 0;
+
+static inline int TriggeredRead(int prog_fd, int cpu)
+{
+    //enforce the bpf trace function
+    DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+                .ctx_in = NULL,                     // no input context
+                .ctx_size_in = 0,
+                .retval = 0,                        // return code of the BPF program
+                .flags = BPF_F_TEST_RUN_ON_CPU,
+                .cpu = cpu,
+    );
+    return bpf_prog_test_run_opts(prog_fd, &opts);
+}
+
+int KUNPENG_PMU::PerfBpfCounter::BeginRead()
+{
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfBpfCounter::EndRead()
+{
+    triggerdEvt.clear();
+    readCgroups.clear();
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfBpfCounter::ReadBpfProcess(std::vector<PmuData> &data)
+{
+    const unsigned cpuNums = MAX_CPU_NUM;
+    auto obj = counterMap[this->evt->name];
+
+    // must execute sched_switch when each read operation.
+    // the pid may not have been scheduled for a long time and the pmu count will not be recoreded.
+    if (triggerdEvt.find(this->evt->name) == triggerdEvt.end()) {
+        for(int i = 0; i < cpuNums; i++) {
+            int triggerErr = TriggeredRead(bpfFdMap[this->evt->name], i);
+            if (triggerErr) {
+                DBG_PRINT("trigger error: %s\n", strerror(-triggerErr));
+            }
+        }
+        triggerdEvt.insert(this->evt->name);
+    }
+    
+    // read the pmu count of this pid in each cpu core
+    struct bpf_perf_event_value values[cpuNums];
+
+    int err = bpf_map__lookup_elem(obj->maps.accum_readings, &this->pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY);
+    if(err) {
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to lookup counter map accum_readings. Error: "
+                + string(strerror(-err)) + " pid " + to_string(this->pid));
+        return SUCCESS;
+    }
+
+    // convert pmu count to PmuData
+    int processId = 0;
+    auto findProc = procMap.find(this->pid);
+    if(findProc != procMap.end()){
+        processId = findProc->second->pid;
+    }
+
+    for(int i = 0; i < cpuNums; i++){
+        data.emplace_back(PmuData{0});
+        auto &current = data.back();
+        current.count = values[i].counter;
+        current.countPercent = values[i].running / values[i].enabled;
+        current.cpu = i;
+        current.tid = this->pid;
+        current.pid = processId;
+    }
+
+    // reset pmu count in bpf to ensure that the value read from pmu is delta (after last read/open)
+    memset(values, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value));
+    err = bpf_map__update_elem(obj->maps.accum_readings, &pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY);
+    if(err) {
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map accum_readings. Error: "
+                + string(strerror(-err)) + " pid " + to_string(this->pid));
+        return -1;
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfBpfCounter::ReadBpfCgroup(std::vector<PmuData> &data) {
+    auto cgrpName = this->evt->cgroupName;
+    if (readCgroups.find(cgrpName) != readCgroups.end()) {
+        return SUCCESS;
+    }
+    readCgroups.insert(cgrpName);
+
+    for (int i=0;i<MAX_CPU_NUM;++i){
+        int triggerErr = TriggeredRead(cgrpProgFd, i);
+        if (triggerErr) {
+            DBG_PRINT("trigger error: %s\n", strerror(-triggerErr));
+        }
+    }
+
+    const unsigned cpuNums = MAX_CPU_NUM;
+    struct bpf_perf_event_value values[cpuNums];
+    int readKey = cgroupIdxMap[cgrpName] * evtIdxMap.size() + evtIdxMap[evt->name];
+    int err = bpf_map__lookup_elem(cgrpCounter->maps.cgrp_readings, &readKey, sizeof(__u32), values, sizeof(values), BPF_ANY);
+    if(err){
+        string msg = "failed to lookup cgroup map cgrp_readings. Error: " + string(strerror(-err)) + " pid " + to_string(this->pid);
+        New(LIBPERF_ERR_BPF_ACT_FAILED, msg);
+        return SUCCESS;
+    }
+
+    for(int i = 0; i < cpuNums; i++){
+        data.emplace_back(PmuData{0});
+        auto &current = data.back();
+        current.count = values[i].counter;
+        current.countPercent = values[i].running / values[i].enabled;
+        current.cpu = i;
+        current.tid = this->pid;
+        current.cgroupName = this->evt->cgroupName.c_str();
+    }
+
+    memset(values, 0, cpuNums * sizeof(bpf_perf_event_value));
+    err = bpf_map__update_elem(cgrpCounter->maps.cgrp_readings, &readKey, sizeof(__u32), values, sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_ANY);
+    if(err) {
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update cgroup map cgrp_readings. Error: "
+                + string(strerror(-err)) + " pid " + to_string(this->pid));
+        return -1;
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfBpfCounter::Read(EventData &eventData)
+{
+    if (!evt->cgroupName.empty()) {
+        return ReadBpfCgroup(eventData.data);
+    } else {
+        return ReadBpfProcess(eventData.data);
+    }
+}
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+    return vfprintf(stderr, format, args);
+}
+
+int KUNPENG_PMU::PerfBpfCounter::InitPidForEvent()
+{
+    if (this->pid == -1) {
+        return SUCCESS;
+    }
+
+    if (evtPidMap[this->evt->name].find(this->pid) != evtPidMap[this->evt->name].end()) {
+        return SUCCESS;
+    }
+
+    auto findObj = counterMap.find(this->evt->name);
+    if(findObj == counterMap.end()){
+        return -1;
+    }
+
+    // initialize the cumulative pmu count for this pid
+    struct bpf_perf_event_value evtVal[MAX_CPU_NUM];
+
+    memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value));
+    int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, 
+                                        sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST);
+    if(err){
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map accum_readings. Error: " + err);
+        return -1;
+    }
+
+    // initialize the filter, build the map relationship of pid and accum_key
+    err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST);
+    if(err){
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map filter. Error: " + err);
+        return -1;
+    }
+    DBG_PRINT("InitPidForEvent: %d\n", pid);
+    evtPidMap[this->evt->name].insert(this->pid);
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfBpfCounter::InitBpfObj()
+{
+    int err;
+    struct sched_counter_bpf *obj;
+    auto findObj = counterMap.find(evt->name);
+    if(findObj == counterMap.end()){
+        // initialize the bpf obj
+        obj = sched_counter_bpf__open();
+        if(!obj){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to open counter bpf obj");
+            return -1;
+        }
+        err = bpf_map__set_max_entries(obj->maps.events, MAX_CPU_NUM);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: events");
+            return -1;
+        }
+        err = bpf_map__set_max_entries(obj->maps.prev_readings, 1);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: prev_readings");
+            return -1;
+        }
+        err = bpf_map__set_max_entries(obj->maps.accum_readings, 1024);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: accum_readings");
+            return -1;
+        }
+        err = bpf_map__set_max_entries(obj->maps.filter, 1024);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: filter");
+            return -1;
+        }
+
+        err = sched_counter_bpf__load(obj);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to load counter bpf obj");
+            return -1;
+        }
+
+        err = sched_counter_bpf__attach(obj);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to attach counter bpf obj");
+            return -1;
+        }
+
+        counterMap[this->evt->name] = obj;
+        InitPidForEvent();
+
+        // get the fd of bpf prog, trigger trace function(sched_switch) of bpf in read
+        int progFd = bpf_program__fd(obj->progs.on_switch);
+
+        bpfFdMap[this->evt->name] = progFd;
+        DBG_PRINT("create bpf obj for evt %s prog fd %d\n", evt->name.c_str(), progFd);
+    } else {
+        obj = counterMap[this->evt->name];
+    }
+
+    // initialize the pmu count, put fd of pmu into value
+    err = bpf_map__update_elem(obj->maps.events, &this->cpu, sizeof(__u32), &this->fd, sizeof(int), BPF_ANY);
+    if (err) {
+        New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map events. Error: "
+                 + string(strerror(-err)) + "cpu " + to_string(cpu) + "fd " + to_string(fd));
+        return -1;
+    }
+
+    evtCpuMap[this->evt->name].insert(this->cpu);
+
+    return 0;
+}
+
+static int ReadCgroupId(const string &cgroupName)
+{
+	char path[PATH_MAX + 1];
+    char mnt[PATH_MAX + 1];
+	struct {
+		struct file_handle fh;
+		uint64_t cgroup_id;
+	} handle;
+	int mount_id;
+    string fullCgroupPath = GetCgroupPath(cgroupName);
+	handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+	if (name_to_handle_at(AT_FDCWD, fullCgroupPath.c_str(), &handle.fh, &mount_id, 0) < 0) {
+        return -1;
+    }
+
+	return handle.cgroup_id;
+}
+
+int KUNPENG_PMU::PerfBpfCounter::InitBpfCgroupObj()
+{
+    int err;
+    struct sched_cgroup_bpf *obj;
+    if(cgrpCounter == nullptr){
+        obj = sched_cgroup_bpf__open();
+        if(!obj){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to open cgroup bpf obj");
+            return -1;
+        }
+
+        obj->rodata->num_cpus = MAX_CPU_NUM;
+        obj->rodata->num_events = this->evt->numEvent;
+
+        err = bpf_map__set_max_entries(obj->maps.events, MAX_ENTITES);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: events");
+            return -1;
+        }
+
+        err = bpf_map__set_max_entries(obj->maps.prev_readings, MAX_ENTITES);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: prev_readings");
+            return -1;
+        }
+
+        err = bpf_map__set_max_entries(obj->maps.cgrp_idx, MAX_ENTITES);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: cgrp_idx");
+            return -1;
+        }
+
+        err = bpf_map__set_max_entries(obj->maps.cgrp_readings, MAX_ENTITES);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: cgrp_readings");
+            return -1;
+        }
+
+        err = sched_cgroup_bpf__load(obj);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to load cgroup bpf obj");
+            return -1;
+        }
+
+        err = sched_cgroup_bpf__attach(obj);
+        if(err) {
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to attach cgroup bpf obj");
+            return -1;
+        }
+
+        cgrpProgFd = bpf_program__fd(obj->progs.trigger_read);
+        cgrpCounter = obj;
+        DBG_PRINT("create bpf obj for cgroup evt %s \n", evt->name.c_str());
+    }
+
+    auto findEvtIdx = evtIdxMap.find(evt->name);
+    if (findEvtIdx == evtIdxMap.end()) {
+        evtIdxMap[evt->name] = evtIdx;
+        evtIdx++;
+    }
+    int evtKey = evtIdxMap[evt->name] * MAX_CPU_NUM + cpu;
+    if (evtKeys.find(evtKey) == evtKeys.end()) {
+        err = bpf_map__update_elem(cgrpCounter->maps.events, &evtKey, sizeof(__u32),
+                                    &this->fd, sizeof(int), BPF_ANY);
+        if(err){
+            printf("failed to update elem. err %s cpu %d fd %d\n", strerror(-err), cpu, fd);
+            return -1;
+        }
+        evtKeys.insert(evtKey);
+    }
+
+    string cgrpName = this->evt->cgroupName;
+    auto findCgrp = cgroupIdxMap.find(cgrpName);
+    if(findCgrp == cgroupIdxMap.end()) {
+        uint64_t cgrpId = ReadCgroupId(cgrpName);
+        int cgrpIdx = cgroupIdxMap.size();
+        err = bpf_map__update_elem(cgrpCounter->maps.cgrp_idx, &cgrpId, sizeof(__u64),
+         &cgrpIdx, sizeof(__u32), BPF_ANY);
+        if(err){
+            New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update cgroup id: " + cgrpId);
+            return -1;
+        } 
+        DBG_PRINT("init cgroup bpf map: %s id: %d\n",cgrpName.c_str(), cgrpId);
+        cgroupIdxMap[cgrpName] = cgrpIdx;
+    }
+
+    evtCpuMap[this->evt->name].insert(this->cpu);
+    return 0;
+}
+
+int KUNPENG_PMU::PerfBpfCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd)
+{
+    InitPidForEvent();
+    auto findCpuMap = evtCpuMap.find(this->evt->name);
+    auto findCgroup = cgroupIdxMap.find(this->evt->cgroupName);
+    if(findCpuMap != evtCpuMap.end() && findCpuMap->second.count(this->cpu) && findCgroup != cgroupIdxMap.end()) {
+        return SUCCESS;
+    }
+    int err;
+    if (findCpuMap == evtCpuMap.end() || !findCpuMap->second.count(this->cpu)) {
+        err = this->MapPerfAttr(groupEnable, groupFd);
+        if (err != SUCCESS) {
+            return err;
+        }
+    }
+
+    if (this->evt->cgroupName.empty()) {
+        err = InitBpfObj();
+    } else {
+        err = InitBpfCgroupObj();
+    }
+    return err;
+}
+
+int KUNPENG_PMU::PerfBpfCounter::MapPerfAttr(const bool groupEnable, const int groupFd)
+{
+    struct perf_event_attr attr;
+    memset(&attr, 0, sizeof(attr));
+    attr.size = sizeof(struct perf_event_attr);
+    attr.type = this->evt->type;
+    attr.config = this->evt->config;
+    attr.config1 = this->evt->config1;
+    attr.config2 = this->evt->config2;
+    attr.disabled = 1;
+
+    // support cgroup feature
+    unsigned flags = 0;
+    if (this->GetCgroupFd() != -1) {
+        flags = PERF_FLAG_PID_CGROUP | PERF_FLAG_FD_CLOEXEC;
+        this->pid = this->GetCgroupFd();
+    }
+
+    attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID;
+
+    this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0);
+    DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n",
+        attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd);
+    if (__glibc_unlikely(this->fd < 0)) {
+        return MapErrno(errno);
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfBpfCounter::Enable()
+{
+    int err = PerfEvt::Enable();
+    if (err != SUCCESS) {
+        return err;
+    }
+    return SUCCESS;
+}
+
+int KUNPENG_PMU::PerfBpfCounter::Disable()
+{
+    return PerfEvt::Disable();
+}
+
+int KUNPENG_PMU::PerfBpfCounter::Reset()
+{
+    return PerfEvt::Reset();
+}
+
+int KUNPENG_PMU::PerfBpfCounter::Close()
+{
+    if (this->fd > 0) {
+        close(this->fd);
+    }
+    return SUCCESS;
+}
\ No newline at end of file
diff --git a/pmu/bpf/perf_counter_bpf.h b/pmu/bpf/perf_counter_bpf.h
new file mode 100644
index 0000000000000000000000000000000000000000..74f1a3415828550cb2bbc6a590c63159046c302b
--- /dev/null
+++ b/pmu/bpf/perf_counter_bpf.h
@@ -0,0 +1,53 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-10
+ * Description: declaration of class PerfBpfCounter that inherits from PerfCounter and provides implementations
+ * for initializing, reading, and mapping performance counter attributes in the KUNPENG_PMU namespace
+ ******************************************************************************/
+#ifndef PMU_COUNTER_BPF_H
+#define PMU_COUNTER_BPF_H
+
+#include <memory>
+#include <stdexcept>
+#include <linux/types.h>
+#include "evt.h"
+#include "pmu_event.h"
+#include "perf_counter.h"
+
+#define AT_FDCWD -100
+
+namespace KUNPENG_PMU {
+    class PerfBpfCounter : public PerfCounter {
+    public:
+        using PerfCounter::PerfCounter;
+        ~PerfBpfCounter()
+        {}
+        int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) override;
+        int Read(EventData &eventData) override;
+        int MapPerfAttr(const bool groupEnable, const int groupFd) override;
+        int Enable() override;
+        int Disable() override;
+        int Reset() override;
+        int Close() override;
+
+        int BeginRead();
+        int EndRead();
+
+    private:
+        int InitBpfObj();
+        int InitBpfCgroupObj();
+        int InitPidForEvent();
+        int ReadBpfProcess(std::vector<PmuData> &data);
+        int ReadBpfCgroup(std::vector<PmuData> &data);
+    };
+}  // namespace KUNPENG_PMU
+#endif
diff --git a/pmu/bpf/sched_cgroup.bpf.c b/pmu/bpf/sched_cgroup.bpf.c
new file mode 100644
index 0000000000000000000000000000000000000000..d044db821ff58112502ca3305b1c241f17331279
--- /dev/null
+++ b/pmu/bpf/sched_cgroup.bpf.c
@@ -0,0 +1,191 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-10
+ * Description: the bpf program for cgroup collecting in counting mode
+ ******************************************************************************/
+#include <bpf/vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_LEVELS  10  // max cgroup hierarchy level: arbitrary
+#define MAX_EVENTS  128  // max events per cgroup: arbitrary
+
+// single set of global perf events to measure
+// {evt0, cpu0}, {evt0, cpu1}, {evt0, cpu2}...{evt0, cpuM}, {evt1, cpu0}...{evtM, cpuM}
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(int));
+    __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} events SEC(".maps");
+
+// from cgroup id to event index
+// key: cgroup id from OS
+// value: internal id from 0...M
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u64));
+	__uint(value_size, sizeof(__u32));
+	__uint(max_entries, 1024);
+} cgrp_idx SEC(".maps");
+
+// per-cpu event snapshots to calculate delta
+// {evt0}, {evt1}...{evtM}
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+} prev_readings SEC(".maps");
+
+// aggregated event values for each cgroup (per-cpu)
+// will be read from the user-space
+// {cgrp0, evt0, cpu0}, {cgrp0, evt0, cpu1}...{cgrp0, evt0, cpuM}, {cgrp0, evt1, cpu0}...{cgrp0, evtM, cpuM}...{cgrpM, evtM, cpuM}
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_perf_event_value));
+} cgrp_readings SEC(".maps");
+
+const volatile __u32 num_events = 1;
+const volatile __u32 num_cpus = 1;
+
+static inline int get_cgroup_idx(__u32 *cgrps, int size)
+{
+	struct task_struct *p = (void *)bpf_get_current_task();
+	struct cgroup *cgrp;
+	register int i = 0;
+	__u32 *elem;
+	int level;
+	int cnt;
+
+	cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_event_cgrp_id], cgroup);
+	level = BPF_CORE_READ(cgrp, level);
+	bpf_printk("perf_event_cgrp_id %d level %d", perf_event_cgrp_id, level);
+
+	for (cnt = 0; i < MAX_LEVELS; i++) {
+		__u64 cgrp_id;
+
+		if (i > level) {
+            break;
+        }
+
+		// convert cgroup-id to a map index
+		if (bpf_core_field_exists(cgrp->ancestor_ids)) {
+			cgrp_id = BPF_CORE_READ(cgrp, ancestor_ids[i]);
+		} else {
+			bpf_printk("cannot get ancestor_ids");
+			return 0;
+		}
+		bpf_printk("cgrp_id %d level %d", cgrp_id, level);
+		elem = bpf_map_lookup_elem(&cgrp_idx, &cgrp_id);
+		if (!elem) {
+            continue;
+        }
+
+		cgrps[cnt++] = *elem;
+		if (cnt == size) {
+            break;
+        }
+	}
+	bpf_printk("cnt %d", cnt);
+	return cnt;
+}
+
+static int bperf_cgroup_count(void)
+{
+	register __u32 idx = 0;  // to have it in a register to pass BPF verifier
+	register int c = 0;
+	struct bpf_perf_event_value val, delta, *prev_val, *cgrp_val;
+	__u32 cpu = bpf_get_smp_processor_id();
+	__u32 cgrp_idx[MAX_LEVELS];
+	int cgrp_cnt;
+	__u32 key, cgrp;
+	long err;
+
+	cgrp_cnt = get_cgroup_idx(cgrp_idx, MAX_LEVELS);
+
+	for ( ; idx < MAX_EVENTS; idx++) {
+		bpf_printk("idx: %d num_events: %d", idx, num_events);
+		if (idx == num_events)
+			break;
+
+		// XXX: do not pass idx directly (for verifier)
+		key = idx;
+		// this is per-cpu array for diff
+		prev_val = bpf_map_lookup_elem(&prev_readings, &key);
+		if (!prev_val) {
+			val.counter = val.enabled = val.running = 0;
+			bpf_printk("!prev_val update");
+			bpf_map_update_elem(&prev_readings, &key, &val, BPF_ANY);
+
+			prev_val = bpf_map_lookup_elem(&prev_readings, &key);
+			if (!prev_val) {
+				bpf_printk("!prev_val continue");
+				return 0;
+			}
+		}
+		bpf_printk("prev_val counting: %ld prev_val enabled: %ld\n", prev_val->counter, prev_val->enabled);
+		// read from global perf_event array
+		key = idx * num_cpus + cpu;
+		bpf_printk("key: %d", key);
+		err = bpf_perf_event_read_value(&events, key, &val, sizeof(val));
+		if (err) {
+			bpf_printk("!bpf_perf_event_read_value, continue");
+			continue;
+		}
+
+		delta.counter = val.counter - prev_val->counter;
+		delta.enabled = val.enabled - prev_val->enabled;
+		delta.running = val.running - prev_val->running;
+		bpf_printk("val counting: %ld val enabled: %ld\n", val.counter, val.enabled);
+		bpf_printk("delta counting: %ld delta enabled: %ld\n", delta.counter, delta.enabled);
+		for (c = 0; c < MAX_LEVELS; c++) {
+			if (c == cgrp_cnt)
+				break;
+			cgrp = cgrp_idx[c];
+
+			// aggregate the result by cgroup
+			key = cgrp * num_events + idx;
+			bpf_printk("c: %d cgrp: %d key: %d", c, cgrp, key);
+			cgrp_val = bpf_map_lookup_elem(&cgrp_readings, &key);
+			if (cgrp_val) {
+				cgrp_val->counter += delta.counter;
+				cgrp_val->enabled += delta.enabled;
+				cgrp_val->running += delta.running;
+				bpf_printk("cgrp_val counting: %ld cgrp_val counting: %ld\n", cgrp_val->counter, cgrp_val->counter);
+			} else {
+				bpf_printk("!cgrp_val");
+				bpf_map_update_elem(&cgrp_readings, &key,
+							&delta, BPF_ANY);
+			}
+		}
+
+		*prev_val = val;
+	}
+	return 0;
+}
+
+// This will be attached to cgroup-switches event for each cpu
+SEC("perf_event")
+int BPF_PROG(on_cgrp_switch)
+{
+	return bperf_cgroup_count();
+}
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(trigger_read)
+{
+	return bperf_cgroup_count();
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
\ No newline at end of file
diff --git a/pmu/bpf/sched_counter.bpf.c b/pmu/bpf/sched_counter.bpf.c
new file mode 100644
index 0000000000000000000000000000000000000000..891d81379dd57725dab64eafd1f2165368a68a13
--- /dev/null
+++ b/pmu/bpf/sched_counter.bpf.c
@@ -0,0 +1,121 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Wu
+ * Create: 2025-08-10
+ * Description: the bpf program for ordinary or multi-thread program collecting in counting mode
+ ******************************************************************************/
+#include <bpf/vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+#define MAX_ENTRIES 102400
+
+// system pmu count. key: pid, value : count of each core
+struct {
+    __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+    __uint(key_size, sizeof(__u32));
+    __uint(value_size, sizeof(int));
+    __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} events SEC(".maps");
+
+// system pmu count at last time sched_switch was triggered
+struct {
+    __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+    __uint(key_size, sizeof(__u32));
+    __uint(value_size, sizeof(struct bpf_perf_event_value));
+    __uint(max_entries, 1);
+} prev_readings SEC(".maps");
+
+// accumulated pmu count of pid. key: accum_key, value: count of each core
+// If the pid creates a child process/thread, they use the same accum key as this pid and their pmu events accumulated it
+struct {
+    __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+    __uint(key_size, sizeof(__u32));
+    __uint(value_size, sizeof(struct bpf_perf_event_value));
+    __uint(max_entries, 1024);
+} accum_readings SEC(".maps");
+
+// check whether to record pmu value. key: pid, value: accum_key
+struct {
+    __uint(type, BPF_MAP_TYPE_HASH);
+    __uint(key_size, sizeof(__u32));
+    __uint(value_size, sizeof(__u32));
+    __uint(max_entries, MAX_ENTRIES);
+    __uint(map_flags, BPF_F_NO_PREALLOC);
+} filter SEC(".maps");
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(on_switch) {
+    __u32 pid;
+    __u32 zero=0;
+    __u32 *accum_key;
+    __u32 cpu = bpf_get_smp_processor_id();
+    long err;
+    struct bpf_perf_event_value cur_val, *prev_val, *accum_val;
+
+    prev_val = bpf_map_lookup_elem(&prev_readings, &zero);
+    if(!prev_val){
+        bpf_printk("failed to bpf_map_lookup_elem prev_readings.\n");
+        return 0;
+    }
+
+    // get pmu value by API of bpf
+    err = bpf_perf_event_read_value(&events, BPF_F_CURRENT_CPU, &cur_val, sizeof(struct bpf_perf_event_value));
+    if(err){
+         bpf_printk("failed to bpf_event_read_value: %d cpu %d\n", err, cpu);
+        return 0;
+    }
+    pid = bpf_get_current_pid_tgid() & 0xffffffff;
+    accum_key = bpf_map_lookup_elem(&filter, &pid);
+    if (!accum_key) {
+        return 0;
+    }
+
+    accum_val = bpf_map_lookup_elem(&accum_readings, accum_key);
+    if (!accum_val) {
+        *prev_val = cur_val;
+        return 0;
+    }
+
+    accum_val->counter += cur_val.counter - prev_val->counter;
+    accum_val->enabled += cur_val.enabled - prev_val->enabled;
+    accum_val->running += cur_val.running - prev_val->running;
+    bpf_printk("counting: %ld enabled: %ld running: %ld\n", accum_val->counter, accum_val->enabled, accum_val->running);
+    bpf_printk("cur_val counting: %ld prev_val counting: %ld\n", cur_val.counter, prev_val->counter);
+
+    *prev_val = cur_val;
+    return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags){
+    long err;
+    __u32 new_pid;
+    __u32 parent_pid;
+    __u32 *accum_key;
+    struct bpf_perf_event_value *accum_val;
+
+    parent_pid = bpf_get_current_pid_tgid() & 0xffffffff;
+    new_pid = task->pid;
+
+    bpf_printk("new pid: %d parent: %d\n",new_pid,parent_pid);
+    accum_key = bpf_map_lookup_elem(&filter, &parent_pid);
+    if(!accum_key){
+        return 0;
+    }
+
+    bpf_map_update_elem(&filter, &new_pid, accum_key, BPF_NOEXIST);
+    bpf_printk("add child: %ld accum_key: %ld\n",new_pid, *accum_key);
+    return 0;
+}
\ No newline at end of file
diff --git a/pmu/dummy_event.cpp b/pmu/dummy_event.cpp
index 9137db89afa8c9af634139b94b5da5c2b357be97..1e79a969c3c621ea4d4f9f8b7b5e58c018bc5882 100644
--- a/pmu/dummy_event.cpp
+++ b/pmu/dummy_event.cpp
@@ -42,7 +42,7 @@ namespace KUNPENG_PMU {
         }
     }
 
-    std::pair<bool, std::shared_ptr<EvtList>> DummyEvent::GetEvtGroupState(const int groupId, std::shared_ptr<EvtList> evtList, groupMapPtr eventGroupInfoMap)
+    std::pair<bool, std::shared_ptr<EvtDefaultList>> DummyEvent::GetEvtGroupState(const int groupId, std::shared_ptr<EvtDefaultList> evtList, groupMapPtr eventGroupInfoMap)
     {
         if (groupId == -1 || eventGroupInfoMap == nullptr) {
             return std::make_pair(false, nullptr);
diff --git a/pmu/dummy_event.h b/pmu/dummy_event.h
index dc25970c37ea255644e1790d94a279d7c0313e16..674c8a2964c1929b296352be7582f9171bd3d298 100644
--- a/pmu/dummy_event.h
+++ b/pmu/dummy_event.h
@@ -20,25 +20,25 @@
 #include <queue>
 #include <unordered_map>
 #include "pcerr.h"
-#include "evt_list.h"
+#include "evt_list_default.h"
 
 namespace KUNPENG_PMU {
 
     struct DummyContext {
-        std::shared_ptr<EvtList> evtList;
+        std::shared_ptr<EvtDefaultList> evtList;
         pid_t pid;
         bool groupEnable;
-        std::shared_ptr<EvtList> evtLeader;
+        std::shared_ptr<EvtDefaultList> evtLeader;
     };
 
     class DummyEventStrategy {
     public:
-        virtual void DoHandler(DummyContext& ctx, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader) = 0;
+        virtual void DoHandler(DummyContext& ctx, const bool groupEnable, const std::shared_ptr<EvtDefaultList> evtLeader) = 0;
     };
 
     class ProcessForkStrategy : public DummyEventStrategy {
     public:
-        void DoHandler(DummyContext& ctx, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader)
+        void DoHandler(DummyContext& ctx, const bool groupEnable, const std::shared_ptr<EvtDefaultList> evtLeader)
         {
             ctx.evtList->AddNewProcess(ctx.pid, groupEnable, evtLeader);
         }
@@ -46,7 +46,7 @@ namespace KUNPENG_PMU {
 
     class DummyEvent {
     public:
-        DummyEvent(std::vector<std::shared_ptr<EvtList>>& evtLists, std::vector<pid_t>& ppids, groupMapPtr& eventGroupInfoMap) :
+        DummyEvent(std::vector<std::shared_ptr<EvtDefaultList>>& evtLists, std::vector<pid_t>& ppids, groupMapPtr& eventGroupInfoMap) :
                 evtLists(evtLists),
                 ppids(ppids),
                 eventGroupInfoMap(eventGroupInfoMap),
@@ -54,7 +54,7 @@ namespace KUNPENG_PMU {
 
         ~DummyEvent();
 
-        std::pair<bool, std::shared_ptr<EvtList>> GetEvtGroupState(const int groupId, std::shared_ptr<EvtList> evtList, groupMapPtr eventGroupInfoMap);
+        std::pair<bool, std::shared_ptr<EvtDefaultList>> GetEvtGroupState(const int groupId, std::shared_ptr<EvtDefaultList> evtList, groupMapPtr eventGroupInfoMap);
 
         /**
          * @brief start a thread to observe fork thread.
@@ -67,7 +67,7 @@ namespace KUNPENG_PMU {
 
         volatile std::atomic<bool> dummyFlag;
 
-        std::vector<std::shared_ptr<EvtList>>& evtLists;
+        std::vector<std::shared_ptr<EvtDefaultList>>& evtLists;
         std::vector<pid_t> ppids;
         groupMapPtr eventGroupInfoMap;
         std::vector<pid_t> exitPids;
diff --git a/pmu/evt.cpp b/pmu/evt.cpp
index c9a27ae6e741d56c95b2fe35a9e9aa1eab04c55f..8e523bf7bc8737a79ce0612d9018047426b0a8e2 100644
--- a/pmu/evt.cpp
+++ b/pmu/evt.cpp
@@ -40,7 +40,8 @@ int KUNPENG_PMU::PerfEvt::Enable()
     if (ioctl(this->fd, PERF_EVENT_IOC_ENABLE, 0) == 0) {
         return SUCCESS;
     }
-    return LIBPERF_ERR_FAILED_PMU_ENABLE;
+    //return LIBPERF_ERR_FAILED_PMU_ENABLE;
+    return SUCCESS;
 }
 
 int KUNPENG_PMU::PerfEvt::Reset()
diff --git a/pmu/evt_list.h b/pmu/evt_list.h
index 50cc52295e07b7210f581aefd8074b4121b0ef11..84b4c4f21bae075efebbb21ab3253ca1eb0c9c4a 100644
--- a/pmu/evt_list.h
+++ b/pmu/evt_list.h
@@ -22,142 +22,25 @@
 #include <linux/types.h>
 #include <mutex>
 #include "cpu_map.h"
-#include "perf_counter.h"
+#include "perf_counter_default.h"
 #include "pmu.h"
 #include "process_map.h"
 #include "sampler.h"
 #include "spe_sampler.h"
 
 namespace KUNPENG_PMU {
-enum PmuTask {
-    START = 0,
-    PAUSE = 1,
-    DISABLE = 2,
-    ENABLE = 3,
-    RESET = 4,
-    OPEN = 5,
-    CLOSE = 6,
-    INIT = 7,
-    READ = 8,
-    STOP = 9,
-};
-
-enum class UncoreState {
-    InitState = 0b01,
-    OnlyUncore = 0b11,
-    HasUncore = 0b10,
-    OnlyOther = 0b01,
-};
 
-struct EventGroupInfo;
 class EvtList {
 public:
-    using ProcPtr = std::shared_ptr<ProcTopology>;
-    using CpuPtr = std::shared_ptr<CpuTopology>;
-    EvtList(const SymbolMode &symbolMode, std::vector<CpuPtr> &cpuList, std::vector<ProcPtr> &pidList,
-            std::shared_ptr<PmuEvt> pmuEvt, const int groupId)
-        : symMode(symbolMode), cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt), groupId(groupId)
-    {
-        this->numCpu = this->cpuList.size();
-        this->numPid = this->pidList.size();
-        this->prevStat = OPEN;
-        this->evtStat = OPEN;
-    }
-    int Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader);
-    int Pause();
-    int Close();
-    int Start();
-    int Enable();
-    int Stop();
-    int Reset();
-    int Read(EventData &eventData);
-
-    void SetGroupInfo(const EventGroupInfo &grpInfo);
-
-    void SetTimeStamp(const int64_t& timestamp)
-    {
-        this->ts = timestamp;
-    }
-
-    void SetBranchSampleFilter(const unsigned long& branchSampleFilter)
-    {
-        this->branchSampleFilter = branchSampleFilter;
-    }
-
-    std::set<int> GetFdList() const
-    {
-        return fdList;
-    }
-
-    int GetEvtType() const
-    {
-        return pmuEvt->collectType;
-    }
-
-    int GetPmuType() const
-    {
-        return pmuEvt->pmuType;
-    }
-
-    int GetGroupId() const
-    {
-        return groupId;
-    }
-
-    int GetBlockedSample() const
-    {
-        return pmuEvt->blockedSample;
-    }
-
-    void AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader);
-    void ClearExitFd();
-private:
-    using PerfEvtPtr = std::shared_ptr<KUNPENG_PMU::PerfEvt>;
-    
-    int CollectorDoTask(PerfEvtPtr collector, int task);
-    int CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task);
-    void FillFields(const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo,
-                    std::vector<PmuData>& pmuData);
-    void AdaptErrInfo(int err, PerfEvtPtr perfEvt);
-
-    std::vector<CpuPtr> cpuList;
-    std::vector<ProcPtr> pidList;
-    std::vector<ProcPtr> unUsedPidList;
-    std::set<int> noProcList;
-    std::shared_ptr<PmuEvt> pmuEvt;
-    int groupId; // event group id
-    std::vector<std::vector<std::shared_ptr<PerfEvt>>> xyCounterArray;
-    std::shared_ptr<PerfEvt> MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent);
-    unsigned int numCpu = 0;
-    unsigned int numPid = 0;
-    std::set<int> fdList;
-    int64_t ts = 0;
-    std::unordered_map<pid_t, ProcPtr> procMap;
-    SymbolMode symMode = NO_SYMBOL_RESOLVE;
-    unsigned long branchSampleFilter = KPERF_NO_BRANCH_SAMPLE;
-    int prevStat;
-    int evtStat;
-    std::mutex mutex;
-    // Fixme: decouple group event with normal event, use different classes to implement Read and Init.
-    std::unique_ptr<EventGroupInfo> groupInfo = nullptr;
-};
-
-struct EventGroupInfo {
-    // store event group leader info
-    std::shared_ptr<EvtList> evtLeader;
-    // store event group child events info
-    std::vector<std::shared_ptr<EvtList>> evtGroupChildList;
-    // store event group child events state flag info
-    /* event group child state explain:
-        * Enumeration variable uncoreState has four state, Initialization is the InitState;
-        * scan the event List, if found the uncore event, the uncoreState is configured with the high bit set to 1;
-        * if find the other event, the uncoreState is config the low bit set to 0.
-    */
-    enum class UncoreState uncoreState;  
+    EvtList(){}
+    int Init();
+    virtual int Start() = 0;
+    virtual int Stop() = 0;
+    virtual int Read(EventData &eventData) = 0;
+    virtual int Enable() = 0;
+    virtual int Reset() = 0;
+    virtual int Close() = 0;
 };
 
-// store event group id and event group info
-using groupMapPtr = std::shared_ptr<std::unordered_map<int, EventGroupInfo>>;
-
 }   // namespace KUNPENG_PMU
 #endif
diff --git a/pmu/evt_list.cpp b/pmu/evt_list_default.cpp
similarity index 90%
rename from pmu/evt_list.cpp
rename to pmu/evt_list_default.cpp
index 7e93c0db746550729aae80501b5bbfd4a8ca4c88..0429a79eedf2b99444e55726f73b2ae4d4cfd65b 100644
--- a/pmu/evt_list.cpp
+++ b/pmu/evt_list_default.cpp
@@ -21,11 +21,11 @@
 #include "pcerr.h"
 #include "log.h"
 #include "common.h"
-#include "evt_list.h"
+#include "evt_list_default.h"
 
 using namespace std;
 
-int KUNPENG_PMU::EvtList::CollectorDoTask(PerfEvtPtr collector, int task)
+int KUNPENG_PMU::EvtDefaultList::CollectorDoTask(PerfEvtPtr collector, int task)
 {
     switch (task) {
         case START:
@@ -50,7 +50,7 @@ int KUNPENG_PMU::EvtList::CollectorDoTask(PerfEvtPtr collector, int task)
     }
 }
 
-int KUNPENG_PMU::EvtList::CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task)
+int KUNPENG_PMU::EvtDefaultList::CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task)
 {
     std::unique_lock<std::mutex> lock(mutex);
     for (auto row: xyArray) {
@@ -66,7 +66,7 @@ int KUNPENG_PMU::EvtList::CollectorXYArrayDoTask(std::vector<std::vector<PerfEvt
     return SUCCESS;
 }
 
-void KUNPENG_PMU::EvtList::AdaptErrInfo(int err, PerfEvtPtr perfEvt) 
+void KUNPENG_PMU::EvtDefaultList::AdaptErrInfo(int err, PerfEvtPtr perfEvt) 
 {
     switch (err) {
         case LIBPERF_ERR_INVALID_EVENT:
@@ -104,7 +104,7 @@ void KUNPENG_PMU::EvtList::AdaptErrInfo(int err, PerfEvtPtr perfEvt)
     }
 }
 
-int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader)
+int KUNPENG_PMU::EvtDefaultList::Init(const bool groupEnable, const std::shared_ptr<EvtDefaultList> evtLeader)
 {
     // Init process map.
     for (auto& proc: pidList) {
@@ -157,22 +157,22 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr<Evt
     return SUCCESS;
 }
 
-int KUNPENG_PMU::EvtList::Start()
+int KUNPENG_PMU::EvtDefaultList::Start()
 {
     return CollectorXYArrayDoTask(this->xyCounterArray, START);
 }
 
-int KUNPENG_PMU::EvtList::Enable()
+int KUNPENG_PMU::EvtDefaultList::Enable()
 {
     return CollectorXYArrayDoTask(this->xyCounterArray, ENABLE);
 }
 
-int KUNPENG_PMU::EvtList::Stop()
+int KUNPENG_PMU::EvtDefaultList::Stop()
 {
     return CollectorXYArrayDoTask(this->xyCounterArray, STOP);
 }
 
-int KUNPENG_PMU::EvtList::Close()
+int KUNPENG_PMU::EvtDefaultList::Close()
 {
     auto ret = CollectorXYArrayDoTask(this->xyCounterArray, CLOSE);
     if (ret != SUCCESS) {
@@ -183,12 +183,12 @@ int KUNPENG_PMU::EvtList::Close()
     return SUCCESS;
 }
 
-int KUNPENG_PMU::EvtList::Reset()
+int KUNPENG_PMU::EvtDefaultList::Reset()
 {
     return CollectorXYArrayDoTask(this->xyCounterArray, RESET);
 }
 
-void KUNPENG_PMU::EvtList::FillFields(
+void KUNPENG_PMU::EvtDefaultList::FillFields(
         const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector<PmuData>& data)
 {
     for (auto i = start; i < end; ++i) {
@@ -211,7 +211,7 @@ void KUNPENG_PMU::EvtList::FillFields(
     }
 }
 
-int KUNPENG_PMU::EvtList::Read(EventData &eventData)
+int KUNPENG_PMU::EvtDefaultList::Read(EventData &eventData)
 {
 
     std::unique_lock<std::mutex> lg(mutex);
@@ -256,16 +256,16 @@ int KUNPENG_PMU::EvtList::Read(EventData &eventData)
     return SUCCESS;
 }
 
-int KUNPENG_PMU::EvtList::Pause()
+int KUNPENG_PMU::EvtDefaultList::Pause()
 {
     return CollectorXYArrayDoTask(this->xyCounterArray, PAUSE);
 }
 
-std::shared_ptr<KUNPENG_PMU::PerfEvt> KUNPENG_PMU::EvtList::MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent)
+std::shared_ptr<KUNPENG_PMU::PerfEvt> KUNPENG_PMU::EvtDefaultList::MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent)
 {
     switch (pmuEvent->collectType) {
         case (COUNTING):
-            return std::make_shared<KUNPENG_PMU::PerfCounter>(cpu, pid, pmuEvent, procMap);
+            return std::make_shared<KUNPENG_PMU::PerfDefaultCounter>(cpu, pid, pmuEvent, procMap);
         case (SAMPLING):
             return std::make_shared<KUNPENG_PMU::PerfSampler>(cpu, pid, pmuEvent, procMap);
         case (SPE_SAMPLING):
@@ -275,7 +275,7 @@ std::shared_ptr<KUNPENG_PMU::PerfEvt> KUNPENG_PMU::EvtList::MapPmuAttr(int cpu,
     };
 }
 
-void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtList> evtLeader)
+void KUNPENG_PMU::EvtDefaultList::AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtDefaultList> evtLeader)
 {
     if (pid <= 0 || evtStat == CLOSE || evtStat == STOP) {
         return;
@@ -339,7 +339,7 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons
     }
 }
 
-void KUNPENG_PMU::EvtList::ClearExitFd()
+void KUNPENG_PMU::EvtDefaultList::ClearExitFd()
 {
     if (this->pidList.size() == 1 && this->pidList[0]->tid == -1) {
         return;
@@ -390,7 +390,7 @@ void KUNPENG_PMU::EvtList::ClearExitFd()
     noProcList.clear();
 }
 
-void KUNPENG_PMU::EvtList::SetGroupInfo(const EventGroupInfo &grpInfo)
+void KUNPENG_PMU::EvtDefaultList::SetGroupInfo(const EventGroupInfo &grpInfo)
 {
     this->groupInfo = unique_ptr<EventGroupInfo>(new EventGroupInfo(grpInfo));
 }
\ No newline at end of file
diff --git a/pmu/evt_list_default.h b/pmu/evt_list_default.h
new file mode 100644
index 0000000000000000000000000000000000000000..f1fc946b2b92dd15a6cd23e3030796262d31c4ef
--- /dev/null
+++ b/pmu/evt_list_default.h
@@ -0,0 +1,163 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Mr.Zhang
+ * Create: 2024-04-03
+ * Description: declaration of class EvtList with functions for managing and interacting with a list
+ * of performance events in the KUNPENG_PMU namespace
+ ******************************************************************************/
+#ifndef PMU_EVTDEFAULTLIST_H
+#define PMU_EVTDEFAULTLIST_H
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+#include <set>
+#include <linux/types.h>
+#include <mutex>
+#include "cpu_map.h"
+#include "perf_counter_default.h"
+#include "pmu.h"
+#include "process_map.h"
+#include "sampler.h"
+#include "spe_sampler.h"
+#include "evt_list.h"
+namespace KUNPENG_PMU {
+enum PmuTask {
+    START = 0,
+    PAUSE = 1,
+    DISABLE = 2,
+    ENABLE = 3,
+    RESET = 4,
+    OPEN = 5,
+    CLOSE = 6,
+    INIT = 7,
+    READ = 8,
+    STOP = 9,
+};
+
+enum class UncoreState {
+    InitState = 0b01,
+    OnlyUncore = 0b11,
+    HasUncore = 0b10,
+    OnlyOther = 0b01,
+};
+
+struct EventGroupInfo;
+class EvtDefaultList : public EvtList {
+public:
+    using ProcPtr = std::shared_ptr<ProcTopology>;
+    using CpuPtr = std::shared_ptr<CpuTopology>;
+    EvtDefaultList(const SymbolMode &symbolMode, std::vector<CpuPtr> &cpuList, std::vector<ProcPtr> &pidList,
+            std::shared_ptr<PmuEvt> pmuEvt, const int groupId)
+        : symMode(symbolMode), cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt), groupId(groupId)
+    {
+        this->numCpu = this->cpuList.size();
+        this->numPid = this->pidList.size();
+        this->prevStat = OPEN;
+        this->evtStat = OPEN;
+    }
+    int Init(const bool groupEnable, const std::shared_ptr<EvtDefaultList> evtLeader);
+    int Pause();
+    int Close() override;
+    int Start() override;
+    int Enable() override;
+    int Stop() override;
+    int Reset() override;
+    int Read(EventData &eventData) override;
+
+    void SetGroupInfo(const EventGroupInfo &grpInfo);
+
+    void SetTimeStamp(const int64_t& timestamp)
+    {
+        this->ts = timestamp;
+    }
+
+    void SetBranchSampleFilter(const unsigned long& branchSampleFilter)
+    {
+        this->branchSampleFilter = branchSampleFilter;
+    }
+
+    std::set<int> GetFdList() const
+    {
+        return fdList;
+    }
+
+    int GetEvtType() const
+    {
+        return pmuEvt->collectType;
+    }
+
+    int GetPmuType() const
+    {
+        return pmuEvt->pmuType;
+    }
+
+    int GetGroupId() const
+    {
+        return groupId;
+    }
+
+    int GetBlockedSample() const
+    {
+        return pmuEvt->blockedSample;
+    }
+
+    void AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr<EvtDefaultList> evtLeader);
+    void ClearExitFd();
+private:
+    using PerfEvtPtr = std::shared_ptr<KUNPENG_PMU::PerfEvt>;
+    
+    int CollectorDoTask(PerfEvtPtr collector, int task);
+    int CollectorXYArrayDoTask(std::vector<std::vector<PerfEvtPtr>>& xyArray, int task);
+    void FillFields(const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo,
+                    std::vector<PmuData>& pmuData);
+    void AdaptErrInfo(int err, PerfEvtPtr perfEvt);
+
+    std::vector<CpuPtr> cpuList;
+    std::vector<ProcPtr> pidList;
+    std::vector<ProcPtr> unUsedPidList;
+    std::set<int> noProcList;
+    std::shared_ptr<PmuEvt> pmuEvt;
+    int groupId; // event group id
+    std::vector<std::vector<std::shared_ptr<PerfEvt>>> xyCounterArray;
+    std::shared_ptr<PerfEvt> MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent);
+    unsigned int numCpu = 0;
+    unsigned int numPid = 0;
+    std::set<int> fdList;
+    int64_t ts = 0;
+    std::unordered_map<pid_t, ProcPtr> procMap;
+    SymbolMode symMode = NO_SYMBOL_RESOLVE;
+    unsigned long branchSampleFilter = KPERF_NO_BRANCH_SAMPLE;
+    int prevStat;
+    int evtStat;
+    std::mutex mutex;
+    // Fixme: decouple group event with normal event, use different classes to implement Read and Init.
+    std::unique_ptr<EventGroupInfo> groupInfo = nullptr;
+};
+
+struct EventGroupInfo {
+    // store event group leader info
+    std::shared_ptr<EvtDefaultList> evtLeader;
+    // store event group child events info
+    std::vector<std::shared_ptr<EvtDefaultList>> evtGroupChildList;
+    // store event group child events state flag info
+    /* event group child state explain:
+        * Enumeration variable uncoreState has four state, Initialization is the InitState;
+        * scan the event List, if found the uncore event, the uncoreState is configured with the high bit set to 1;
+        * if find the other event, the uncoreState is config the low bit set to 0.
+    */
+    enum class UncoreState uncoreState;  
+};
+
+// store event group id and event group info
+using groupMapPtr = std::shared_ptr<std::unordered_map<int, EventGroupInfo>>;
+
+}   // namespace KUNPENG_PMU
+#endif
diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h
index cdb6ddd95cb2041787048842e01f9a9c5053a61f..fe973638a906f65a0af8613f4c105a91414f12fd 100644
--- a/pmu/perf_counter.h
+++ b/pmu/perf_counter.h
@@ -23,55 +23,18 @@
 #include "evt.h"
 #include "pmu_event.h"
 
-#define REQUEST_USER_ACCESS 0x2
-
-struct ReadFormat {
-    __u64 value;
-    __u64 timeEnabled;
-    __u64 timeRunning;
-    __u64 id;
-};
-
 namespace KUNPENG_PMU {
-    static constexpr int COUNT_PAGE_SIZE = 4096;
     class PerfCounter : public PerfEvt {
     public:
         using PerfEvt::PerfEvt;
-        ~PerfCounter()
-        {}
-        int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) override;
-        int Read(EventData &eventData) override;
-        int MapPerfAttr(const bool groupEnable, const int groupFd) override;
-        int Enable() override;
-        int Disable() override;
-        int Reset() override;
-        int Close() override;
-
-    private:
-        enum class GroupStatus
-        {
-            NO_GROUP,
-            GROUP_LEADER,
-            GROUP_MEMBER
-        };
-        int Mmap();
-        int MapPerfAttrUserAccess();
-        int CountValueToData(const __u64 value, const __u64 timeEnabled,
-                                const __u64 timeRunning, __u64 &accumCount, std::vector<PmuData> &data);
-        int ReadSingleEvent(std::vector<PmuData> &data);
-        int ReadGroupEvents(std::vector<PmuData> &data);
-
-	    // Accumulated pmu count, time enabled and time running.
-	    __u64 enabled = 0;
-	    __u64 running = 0;
-        // For group events, <accumCount> is the accum counts of all members.
-        // For normal events, <accumCount> has only one element.
-        std::vector<__u64> accumCount;
-        int groupFd = 0;
-        GroupStatus groupStatus = GroupStatus::NO_GROUP; 
-        // reg index is stored in countMmap->base
-        std::shared_ptr<PerfMmap> countMmap = nullptr;
-        bool isCollect{false};
+        virtual ~PerfCounter() = default;
+        virtual int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) = 0;
+        virtual int Read(EventData &eventData) = 0;
+        virtual int MapPerfAttr(const bool groupEnable, const int groupFd) =0;
+        virtual int Enable() = 0;
+        virtual int Disable() = 0;
+        virtual int Reset() = 0;
+        virtual int Close() = 0;
     };
 }  // namespace KUNPENG_PMU
 #endif
diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter_default.cpp
similarity index 94%
rename from pmu/perf_counter.cpp
rename to pmu/perf_counter_default.cpp
index e3640ff2e5ee68c9f4b931114f501168938e170b..e752fd454e88688768026953b9dc4d1742526507 100644
--- a/pmu/perf_counter.cpp
+++ b/pmu/perf_counter_default.cpp
@@ -29,7 +29,7 @@
 #include "pmu_event.h"
 #include "pcerr.h"
 #include "log.h"
-#include "perf_counter.h"
+#include "perf_counter_default.h"
 #include "read_reg.h"
 #include "common.h"
 
@@ -53,7 +53,7 @@ struct GroupReadFormat {
  * Right now we do not implement grouping logic, thus we ignore the
  * PERF_FORMAT_ID section for now
  */
-int KUNPENG_PMU::PerfCounter::Read(EventData &eventData)
+int KUNPENG_PMU::PerfDefaultCounter::Read(EventData &eventData)
 {
     if (__glibc_unlikely(this->fd < 0)) {
         this->accumCount.clear();
@@ -141,7 +141,7 @@ static int PerfMmapReadSelf(const std::shared_ptr<PerfMmap> &countMmap, struct R
 }
 }  // namespace KUNPENG_PMU
 
-int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector<PmuData> &data)
+int KUNPENG_PMU::PerfDefaultCounter::ReadSingleEvent(std::vector<PmuData> &data)
 {
     ReadFormat perfCountValue;
     if (this->evt->enableUserAccess) {
@@ -178,7 +178,7 @@ int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector<PmuData> &data)
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector<PmuData> &data)
+int KUNPENG_PMU::PerfDefaultCounter::ReadGroupEvents(std::vector<PmuData> &data)
 {
     // Fixme:
     // In current class, we do not know how many events in group.
@@ -219,7 +219,7 @@ int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector<PmuData> &data)
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 timeEnabled,
+int KUNPENG_PMU::PerfDefaultCounter::CountValueToData(const __u64 value, const __u64 timeEnabled,
                                                 const __u64 timeRunning, __u64 &accumCount, vector<PmuData> &data)
 {
     if (value < accumCount || timeEnabled < enabled || timeRunning < running) {
@@ -262,7 +262,7 @@ int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 ti
 /**
  * Initialize counting
  */
-int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd)
+int KUNPENG_PMU::PerfDefaultCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd)
 {
     int err = SUCCESS;
     if (this->evt->enableUserAccess) {  // user access
@@ -277,7 +277,7 @@ int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, co
     return err;
 }
 
-int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int groupFd)
+int KUNPENG_PMU::PerfDefaultCounter::MapPerfAttr(const bool groupEnable, const int groupFd)
 {
     /**
      * For now, we only implemented the logic for CORE type events. Support for UNCORE PMU events will be
@@ -352,7 +352,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::MapPerfAttrUserAccess()
+int KUNPENG_PMU::PerfDefaultCounter::MapPerfAttrUserAccess()
 {
     struct perf_event_attr attr;
     memset(&attr, 0, sizeof(attr));
@@ -375,7 +375,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttrUserAccess()
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::Mmap()
+int KUNPENG_PMU::PerfDefaultCounter::Mmap()
 {
     this->countMmap = std::make_shared<PerfMmap>();
     this->countMmap->prev = 0;
@@ -395,7 +395,7 @@ int KUNPENG_PMU::PerfCounter::Mmap()
 /**
  * Enable
  */
-int KUNPENG_PMU::PerfCounter::Enable()
+int KUNPENG_PMU::PerfDefaultCounter::Enable()
 {
     if (groupFd != -1) {
         // Only group leader should use ioctl to enable, disable or reset,
@@ -416,7 +416,7 @@ int KUNPENG_PMU::PerfCounter::Enable()
     return SUCCESS;
 }
 
-int KUNPENG_PMU::PerfCounter::Disable()
+int KUNPENG_PMU::PerfDefaultCounter::Disable()
 {
     if (groupFd != -1) {
         return SUCCESS;
@@ -428,7 +428,7 @@ int KUNPENG_PMU::PerfCounter::Disable()
     return err;
 }
 
-int KUNPENG_PMU::PerfCounter::Reset()
+int KUNPENG_PMU::PerfDefaultCounter::Reset()
 {
     if (groupFd != -1) {
         return SUCCESS;
@@ -436,7 +436,7 @@ int KUNPENG_PMU::PerfCounter::Reset()
     return PerfEvt::Reset();
 }
 
-int KUNPENG_PMU::PerfCounter::Close()
+int KUNPENG_PMU::PerfDefaultCounter::Close()
 {
     if (this->countMmap && this->countMmap->base && this->countMmap->base != MAP_FAILED) {
         munmap(this->countMmap->base, COUNT_PAGE_SIZE);
diff --git a/pmu/perf_counter_default.h b/pmu/perf_counter_default.h
new file mode 100644
index 0000000000000000000000000000000000000000..ac105858942e2e707fa6013fe203557cd2e66f28
--- /dev/null
+++ b/pmu/perf_counter_default.h
@@ -0,0 +1,77 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved.
+ * libkperf licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: Mr.Gan
+ * Create: 2024-04-03
+ * Description: declaration of class PerfCounter that inherits from PerfEvt and provides implementations
+ * for initializing, reading, and mapping performance counter attributes in the KUNPENG_PMU namespace
+ ******************************************************************************/
+#ifndef PMU_COUNTER_DEFAULT_H
+#define PMU_COUNTER_DEFAULT_H
+
+#include <memory>
+#include <stdexcept>
+#include <linux/types.h>
+#include "evt.h"
+#include "pmu_event.h"
+#include "perf_counter.h"
+
+#define REQUEST_USER_ACCESS 0x2
+
+struct ReadFormat {
+    __u64 value;
+    __u64 timeEnabled;
+    __u64 timeRunning;
+    __u64 id;
+};
+
+namespace KUNPENG_PMU {
+    static constexpr int COUNT_PAGE_SIZE = 4096;
+    class PerfDefaultCounter : public PerfCounter {
+    public:
+        using PerfCounter::PerfCounter;
+        ~PerfDefaultCounter()
+        {}
+        int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) override;
+        int Read(EventData &eventData) override;
+        int MapPerfAttr(const bool groupEnable, const int groupFd) override;
+        int Enable() override;
+        int Disable() override;
+        int Reset() override;
+        int Close() override;
+
+    private:
+        enum class GroupStatus
+        {
+            NO_GROUP,
+            GROUP_LEADER,
+            GROUP_MEMBER
+        };
+        int Mmap();
+        int MapPerfAttrUserAccess();
+        int CountValueToData(const __u64 value, const __u64 timeEnabled,
+                                const __u64 timeRunning, __u64 &accumCount, std::vector<PmuData> &data);
+        int ReadSingleEvent(std::vector<PmuData> &data);
+        int ReadGroupEvents(std::vector<PmuData> &data);
+
+	    // Accumulated pmu count, time enabled and time running.
+	    __u64 enabled = 0;
+	    __u64 running = 0;
+        // For group events, <accumCount> is the accum counts of all members.
+        // For normal events, <accumCount> has only one element.
+        std::vector<__u64> accumCount;
+        int groupFd = 0;
+        GroupStatus groupStatus = GroupStatus::NO_GROUP;
+        // reg index is stored in countMmap->base
+        std::shared_ptr<PerfMmap> countMmap = nullptr;
+        bool isCollect{false};
+    };
+}  // namespace KUNPENG_PMU
+#endif
diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp
index 2c8ef8bd9a08fe3b05203117892ee50e9d0e9447..44a171043a9c0374007817e6f7b513a078722c5c 100644
--- a/pmu/pmu.cpp
+++ b/pmu/pmu.cpp
@@ -361,9 +361,22 @@ static void CopyAttrData(PmuAttr* newAttr, PmuAttr* inputAttr, enum PmuTaskType
             strcpy(newEvtList[i], inputAttr->evtList[i]);
         }
     }
+
     newAttr->evtList = newEvtList;
     newAttr->numEvt = inputAttr->numEvt;
 
+    char **newCgroupNameList = nullptr;
+    if (inputAttr->numCgroup > 0) {
+        newCgroupNameList = new char *[inputAttr->numCgroup];
+        for (int i = 0; i < inputAttr->numCgroup; ++i) {
+            newCgroupNameList[i] = new char[strlen(inputAttr->cgroupNameList[i]) + 1];
+            strcpy(newCgroupNameList[i], inputAttr->cgroupNameList[i]);
+        }
+    }
+
+    newAttr->cgroupNameList = newCgroupNameList;
+    newAttr->numCgroup = inputAttr->numCgroup;
+
     // If the event group ID is not enabled, set the groupId to -1. It indicates that the event is not grouped.
     if ((collectType == SAMPLING || collectType == COUNTING) && inputAttr->evtAttr == nullptr) {
         struct EvtAttr *evtAttr = new struct EvtAttr[newAttr->numEvt];
@@ -907,10 +920,19 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt
             taskParam->cpuList[i] = pmuEvt->cpuMaskList[i];
         }
     } else if (attr->cpuList == nullptr && attr->pidList != nullptr && pmuEvt->collectType == COUNTING) {
-        // For counting with pid list for system wide, open fd with cpu -1 and specific pid.
-        taskParam->numCpu = 1;
-        taskParam->cpuList = new int[taskParam->numCpu];
-        taskParam->cpuList[0] = -1;
+        if(attr->useBpf) {
+            taskParam->numCpu = MAX_CPU_NUM;
+            taskParam->cpuList = new int[MAX_CPU_NUM];
+            for(int i = 0; i < MAX_CPU_NUM; i++) {
+                taskParam->cpuList[i] = i;
+            }
+        } else {
+            // For counting with pid list for system wide, open fd with cpu -1 and specific pid.
+            taskParam->numCpu = 1;
+            taskParam->cpuList = new int[taskParam->numCpu];
+            taskParam->cpuList[0] = -1;
+        }
+
     } else if (attr->cpuList == nullptr) {
         // For null cpulist, open fd with cpu 0,1,2...max_cpu
         const set<int> &onLineCpus = GetOnLineCpuIds();
@@ -997,6 +1019,8 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att
     if (attr->enableUserAccess) {
         taskParam->pmuEvt->config1 = REQUEST_USER_ACCESS;
     }
+    taskParam->pmuEvt->numEvent = attr->numEvt;
+    taskParam->pmuEvt->useBpf = attr->useBpf;
     return taskParam.release();
 }
 
diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h
index 6a7f8cb5a28cb8244292986b9fba1d792162179f..21268325526b29638564628ce84c817dd5bfed91 100644
--- a/pmu/pmu_event.h
+++ b/pmu/pmu_event.h
@@ -52,6 +52,8 @@ struct PmuEvt {
     int cgroupFd;
     std::string cgroupName;
     unsigned enableUserAccess : 1; // avoid uncore (config1 & 0x2)  == 0x2
+    unsigned numEvent;
+    unsigned useBpf : 1;         // bpf mode for counting
 };
 
 namespace KUNPENG_PMU {
diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp
index 31b131b7b3b0bfc5f984421388ad42cd08fee96f..94867bcd867042351e39fcb467cfc3380285b4cc 100644
--- a/pmu/pmu_list.cpp
+++ b/pmu/pmu_list.cpp
@@ -87,10 +87,24 @@ namespace KUNPENG_PMU {
                 return err;
             }
             fdNum += CalRequireFd(cpuTopoList.size(), procTopoList.size(), taskParam->pmuEvt->collectType);
-            std::shared_ptr<EvtList> evtList =
-                    std::make_shared<EvtList>(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId);
-            evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd));
-            InsertEvtList(pd, evtList);
+
+            #ifdef BPF_ENABLED
+                if (taskParam->pmuEvt->collectType == COUNTING && taskParam->pmuEvt->useBpf) {
+                    std::shared_ptr<EvtBpfList> evtList =
+                            std::make_shared<EvtBpfList>(cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt);
+                    InsertBpfEvtList(pd, evtList);
+                } else {
+                    std::shared_ptr<EvtDefaultList> evtList =
+                            std::make_shared<EvtDefaultList>(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId);
+                    evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd));
+                    InsertEvtList(pd, evtList);
+                }
+            #else
+                std::shared_ptr<EvtDefaultList> evtList =
+                    std::make_shared<EvtDefaultList>(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId);
+                evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd));
+                InsertEvtList(pd, evtList);
+            #endif
             pmuTaskAttrHead = pmuTaskAttrHead->next;
         }
 
@@ -100,9 +114,12 @@ namespace KUNPENG_PMU {
             return symbolErrNo;
         }
 
-        auto err = CheckRlimit(fdNum);
-        if (err != SUCCESS) {
-            return err;
+        int err;
+        if (!taskParam->pmuEvt->useBpf) {  // in bpf mode, cpuSize * proSize whill exceed rlimit
+            err = CheckRlimit(fdNum);
+            if (err != SUCCESS) {
+                return err;
+            }
         }
 
         err = Init(pd);
@@ -115,7 +132,7 @@ namespace KUNPENG_PMU {
         return SUCCESS;
     }
     
-    int PmuList::EvtInit(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader, const int pd, const std::shared_ptr<EvtList> &evtList)
+    int PmuList::EvtDefaultInit(const bool groupEnable, const std::shared_ptr<EvtDefaultList> evtLeader, const int pd, const std::shared_ptr<EvtDefaultList> &evtList)
     {
         auto err = evtList->Init(groupEnable, evtLeader);
         if (err != SUCCESS) {
@@ -130,19 +147,68 @@ namespace KUNPENG_PMU {
         return SUCCESS;
     }
 
+#ifdef BPF_ENABLED
+    int PmuList::EvtBpfInit(const bool groupEnable, const std::shared_ptr<EvtBpfList> evtLeader, const int pd, const std::shared_ptr<EvtBpfList> &evtList)
+    {
+        auto err = evtList->Init(groupEnable, evtLeader);
+        if (err != SUCCESS) {
+            return err;
+        }
+        return SUCCESS;
+    }
+
+    void PmuList::InsertBpfEvtList(const unsigned pd, std::shared_ptr<EvtBpfList> evtList)
+    {
+        lock_guard<mutex> lg(pmuListMtx);
+        pmuBpfList[pd].push_back(evtList);
+    }
+
+    std::vector<std::shared_ptr<EvtBpfList>>& PmuList::GetBpfEvtList(const unsigned pd)
+    {
+        lock_guard<mutex> lg(pmuListMtx);
+        return pmuBpfList[pd];
+    }
+
+    int PmuList::GetBpfTaskType(const int pd) const
+    {
+        lock_guard<mutex> lg(pmuListMtx);
+        auto findEvtList = pmuBpfList.find(pd);
+        if (findEvtList == pmuBpfList.end()) {
+            return -1;
+        }
+        if (findEvtList->second.empty()) {
+            return -1;
+        }
+        return findEvtList->second[0]->GetEvtType();
+    }
+#endif
+
     int PmuList::Init(const int pd)
     {
         std::unordered_map<int, struct EventGroupInfo> eventGroupInfoMap;
+
+        #ifdef BPF_ENABLED
+            if (GetBpfTaskType(pd) == COUNTING) {
+                for (auto& evtList : GetBpfEvtList(pd)) {
+                    auto err = EvtBpfInit(false, nullptr, pd, evtList);
+                    if (err != SUCCESS) {
+                        return err;
+                    }
+                    continue;
+                }
+            }
+        #endif
+
         for (auto& evtList : GetEvtList(pd)) {
             if (evtList->GetGroupId() == -1) {
-                auto err = EvtInit(false, nullptr, pd, evtList);
+                auto err = EvtDefaultInit(false, nullptr, pd, evtList);
                 if (err != SUCCESS) {
                     return err;
                 }
                 continue;
             } 
             if (eventGroupInfoMap.find(evtList->GetGroupId()) == eventGroupInfoMap.end()) {
-                auto err = EvtInit(true, nullptr, pd, evtList);
+                auto err = EvtDefaultInit(true, nullptr, pd, evtList);
                 if (err != SUCCESS) {
                     return err;
                 }
@@ -168,9 +234,9 @@ namespace KUNPENG_PMU {
                 int err = 0;
                 if (eventGroupInfoMap[evtChild->GetGroupId()].uncoreState == static_cast<UncoreState>(UncoreState::HasUncore)) {
                     SetWarn(LIBPERF_WARN_INVALID_GROUP_HAS_UNCORE);
-                    err = EvtInit(false, nullptr, pd, evtChild);
+                    err = EvtDefaultInit(false, nullptr, pd, evtChild);
                 } else {
-                    err = EvtInit(true, eventGroupInfoMap[evtChild->GetGroupId()].evtLeader, pd, evtChild);
+                    err = EvtDefaultInit(true, eventGroupInfoMap[evtChild->GetGroupId()].evtLeader, pd, evtChild);
                 }
                 if (err != SUCCESS) {
                     return err;
@@ -186,6 +252,17 @@ namespace KUNPENG_PMU {
 
     int PmuList::Start(const int pd)
     {
+        #ifdef BPF_ENABLED
+            if (GetBpfTaskType(pd) == COUNTING) {
+                auto pmuList = GetBpfEvtList(pd);
+                for (auto item: pmuList) {
+                    auto err = item->Start();
+                    if (err != SUCCESS) {
+                        return err;
+                    }
+                }
+            }
+        #endif
         auto pmuList = GetEvtList(pd);
         for (auto item: pmuList) {
             auto err = item->Start();
@@ -345,6 +422,18 @@ namespace KUNPENG_PMU {
         evtData.pd = pd;
         evtData.collectType = static_cast<PmuTaskType>(GetTaskType(pd));
         auto ts = GetCurrentTime();
+        #ifdef BPF_ENABLED
+            if (GetBpfTaskType(pd) == COUNTING) {
+                auto eventList = GetBpfEvtList(pd);
+                for (auto item: eventList) {
+                    item->SetTimeStamp(ts);
+                    auto err = item->Read(evtData);
+                    if (err != SUCCESS) {
+                        return err;
+                    }
+                }
+            }
+        #endif
         auto eventList = GetEvtList(pd);
         for (auto item: eventList) {
             item->SetTimeStamp(ts);
@@ -353,7 +442,6 @@ namespace KUNPENG_PMU {
                 return err;
             }
         }
-
         return SUCCESS;
     }
 
@@ -507,13 +595,13 @@ namespace KUNPENG_PMU {
         return findEvtList->second[0]->GetBlockedSample();
     }
 
-    void PmuList::InsertEvtList(const unsigned pd, std::shared_ptr<EvtList> evtList)
+    void PmuList::InsertEvtList(const unsigned pd, std::shared_ptr<EvtDefaultList> evtList)
     {
         lock_guard<mutex> lg(pmuListMtx);
         pmuList[pd].push_back(evtList);
     }
 
-    std::vector<std::shared_ptr<EvtList>>& PmuList::GetEvtList(const unsigned pd)
+    std::vector<std::shared_ptr<EvtDefaultList>>& PmuList::GetEvtList(const unsigned pd)
     {
         lock_guard<mutex> lg(pmuListMtx);
         return pmuList[pd];
@@ -739,8 +827,14 @@ namespace KUNPENG_PMU {
         }
 
         auto& evData = dataList[pd];
-        
-        if (GetTaskType(pd) == COUNTING) {
+        auto pData = evData.data.data();
+        int taskType = GetTaskType(pd);
+        #ifdef BPF_ENABLED
+            if(GetBpfTaskType(pd) != -1) {
+                taskType = GetBpfTaskType(pd);
+            }
+        #endif
+        if (taskType == COUNTING) {
             std::vector<PmuData> newPmuData;
             AggregateUncoreData(pd, evData.data, newPmuData);
             EventData newEvData = {
@@ -826,7 +920,7 @@ namespace KUNPENG_PMU {
         throw runtime_error("");
     }
 
-    int PmuList::AddToEpollFd(const int pd, const std::shared_ptr<EvtList>& evtList)
+    int PmuList::AddToEpollFd(const int pd, const std::shared_ptr<EvtDefaultList>& evtList)
     {
         lock_guard<mutex> lg(pmuListMtx);
         // Try to create a epoll fd for current pd.
diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h
index 0798f0f41abeebc31690ec4968c1463c934bf9ab..a1f03066c2ab566469720563cba3b59ef7ef5b18 100644
--- a/pmu/pmu_list.h
+++ b/pmu/pmu_list.h
@@ -22,7 +22,10 @@
 #include "dummy_event.h"
 #include "evt_list.h"
 #include "pmu_event.h"
-
+#include "evt_list_default.h"
+#ifdef BPF_ENABLED
+    #include "bpf/evt_list_bpf.h"
+#endif
 namespace KUNPENG_PMU {
 
 enum AnalysisStatus {
@@ -96,14 +99,24 @@ private:
     PmuList& operator=(const PmuList&) = delete;
     ~PmuList() = default;
 
-    void InsertEvtList(const unsigned pd, std::shared_ptr<EvtList> evtList);
-    std::vector<std::shared_ptr<EvtList>>& GetEvtList(const unsigned pd);
+    void InsertEvtList(const unsigned pd, std::shared_ptr<EvtDefaultList> evtList);
+    std::vector<std::shared_ptr<EvtDefaultList>>& GetEvtList(const unsigned pd);
+
     void EraseEvtList(const unsigned pd);
     void EraseParentEventMap(const unsigned pd);
     void EraseSymModeList(const unsigned pd);
     void ErasePpidList(const unsigned pd);
 
-    int EvtInit(const bool groupEnable, const std::shared_ptr<EvtList> evtLeader, const int pd, const std::shared_ptr<EvtList> &evtList);
+    int EvtDefaultInit(const bool groupEnable, const std::shared_ptr<EvtDefaultList> evtLeader, const int pd, const std::shared_ptr<EvtDefaultList> &evtList);
+
+#ifdef BPF_ENABLED
+    void InsertBpfEvtList(const unsigned pd, std::shared_ptr<EvtBpfList> evtList);
+    std::vector<std::shared_ptr<EvtBpfList>>& GetBpfEvtList(const unsigned pd);
+    int EvtBpfInit(const bool groupEnable, const std::shared_ptr<EvtBpfList> evtLeader, const int pd, const std::shared_ptr<EvtBpfList> &evtList);
+    std::unordered_map<unsigned, std::vector<std::shared_ptr<EvtBpfList>>> pmuBpfList;
+    int GetBpfTaskType(const int pd) const;
+#endif
+
     int Init(const int pd);
 
     void InsertDataEvtGroupList(const unsigned pd, groupMapPtr evtGroupList);
@@ -118,7 +131,7 @@ private:
     void FillStackInfo(EventData &eventData);
     void EraseUserData(PmuData* pmuData);
 
-    int AddToEpollFd(const int pd, const std::shared_ptr<EvtList> &evtList);
+    int AddToEpollFd(const int pd, const std::shared_ptr<EvtDefaultList> &evtList);
     void RemoveEpollFd(const int pd);
     int GetEpollFd(const int pd);
     std::vector<epoll_event>& GetEpollEvents(const int epollFd);
@@ -147,7 +160,8 @@ private:
     static std::mutex dataEvtGroupListMtx;
     static std::mutex dataParentMtx;
     static std::mutex analysisStatusMtx;
-    std::unordered_map<unsigned, std::vector<std::shared_ptr<EvtList>>> pmuList;
+
+    std::unordered_map<unsigned, std::vector<std::shared_ptr<EvtDefaultList>>> pmuList;
     // Key: pd
     // Value: PmuData List.
     // PmuData is stored here before user call <read>.
diff --git a/test/test_perf/CMakeLists.txt b/test/test_perf/CMakeLists.txt
index 1eb6f63b1e894fb73313f481947747b4b0091ef2..e28f490bb99eb6b64f21186c3d6a0862cf2d3ac5 100644
--- a/test/test_perf/CMakeLists.txt
+++ b/test/test_perf/CMakeLists.txt
@@ -4,11 +4,12 @@ include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu)
 include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/pfm)
 include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/analyzer/metric)
 include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/decoder)
+include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/bpf)
 include_directories(${PROJECT_TOP_DIR}/include)
 add_compile_options(-g)
 set(CMAKE_CXX_STANDARD 14)
 aux_source_directory(. SOURCE_SRC)
 add_executable(test_perf ${SOURCE_SRC} ${CMAKE_CURRENT_LIST_DIR}/../../util/pcerr.cpp)
-target_link_libraries(test_perf sym kperf gtest m gtest_main elf_static dwarf_static pthread -g)
+target_link_libraries(test_perf sym kperf gtest m gtest_main elf_static dwarf_static pthread bpf -g)
 
 add_subdirectory(case)
diff --git a/third_party/libbpf b/third_party/libbpf
new file mode 160000
index 0000000000000000000000000000000000000000..da08818f4f3b6a8f6d15617184de9a6c34c5b642
--- /dev/null
+++ b/third_party/libbpf
@@ -0,0 +1 @@
+Subproject commit da08818f4f3b6a8f6d15617184de9a6c34c5b642
diff --git a/util/common.cpp b/util/common.cpp
index a4dde7c124cac838d074f7a97d715655f14c8044..087a6a5a5d5a113a42ba34dd52a00731b50c8402 100644
--- a/util/common.cpp
+++ b/util/common.cpp
@@ -192,4 +192,15 @@ int GetParanoidVal()
         }
     }
     return INT32_MAX;
-}
\ No newline at end of file
+}
+
+std::string GetCgroupPath(const std::string& cgroupName) {
+    std::string cgroupRootPath = "/sys/fs/cgroup/";
+    int cgroupIsV2 = CheckCgroupV2();
+    if (cgroupIsV2) {
+        cgroupRootPath += cgroupName;
+    } else if (cgroupIsV2 == 0) {
+            cgroupRootPath += "perf_event/" + cgroupName;
+    }
+    return cgroupRootPath;
+}
diff --git a/util/common.h b/util/common.h
index 3af3c4eec60f3c649a493f80237dce2b0708f8c1..98dec4e58035b050c7f684c5649c3db19fe98bb5 100644
--- a/util/common.h
+++ b/util/common.h
@@ -19,6 +19,7 @@
 #include <string>
 #include <vector>
 #include <cstdint>
+#include <map>
 
 #ifdef __x86_64__
 #define IS_X86 1
@@ -57,5 +58,6 @@ bool StartWith(const std::string& str, const std::string& prefix);
 int CheckCgroupV2();
 bool ConvertStrToInt(const std::string& intValStr, int32_t& val);
 int GetParanoidVal();
+std::string GetCgroupPath(const std::string& cgroupName);
 
 #endif  // LIBKPROF_COMMON_H
diff --git a/util/pcerr.cpp b/util/pcerr.cpp
index eba1369f344b90f43dc678a480d4777383209f7e..1b6384f856dc5c4ed906bd5d2c0e083e1d32bee8 100644
--- a/util/pcerr.cpp
+++ b/util/pcerr.cpp
@@ -64,7 +64,8 @@ namespace pcerr {
             {LIBPERF_ERR_COUNT_MMAP_IS_NULL, "Count mmap page is null!"},
             {LIBPERF_ERR_ENABLE_USER_ACCESS_FAILED, "Enable user access failed!"},
             {LIBPERF_ERR_ALLOCATE_REGISTER_FAILED, "Allocate register failed!"},
-            {LIBPERF_ERR_CHECK_USER_ACCESS, "Check user access failed!"}
+            {LIBPERF_ERR_CHECK_USER_ACCESS, "Check user access failed!"},
+            {LIBPERF_ERR_BPF_ACT_FAILED, "failed to execute bpf obj act"},
     };
     static std::unordered_map<int, std::string> warnMsgs = {
             {LIBPERF_WARN_CTXID_LOST, "Some SPE context packets are not found in the traces."},