From 35fc6db5aa31aafe933a4ad1d3f580bca5ff2d63 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 18 Aug 2025 10:52:50 +0800 Subject: [PATCH] add bpf mode files && revise compile --- build/common.sh | 6 +- include/pcerrc.h | 2 + include/pmu.h | 2 + pmu/CMakeLists.txt | 11 +- pmu/bpf/evt_list_bpf.cpp | 166 ++++++ pmu/bpf/evt_list_bpf.h | 61 +++ pmu/bpf/perf_counter_bpf.cpp | 502 ++++++++++++++++++ pmu/bpf/perf_counter_bpf.h | 59 ++ pmu/bpf/sched_counter.bpf.c | 8 +- pmu/evt_list.h | 62 ++- pmu/{evt_list.cpp => evt_list_default.cpp} | 70 +-- pmu/evt_list_default.h | 60 +++ pmu/perf_counter.h | 53 +- ...f_counter.cpp => perf_counter_default.cpp} | 30 +- pmu/perf_counter_default.h | 76 +++ pmu/pmu.cpp | 48 +- pmu/pmu_event.h | 2 + pmu/pmu_list.cpp | 31 +- test/test_perf/CMakeLists.txt | 7 +- util/pcerr.cpp | 4 +- 20 files changed, 1105 insertions(+), 155 deletions(-) create mode 100644 pmu/bpf/evt_list_bpf.cpp create mode 100644 pmu/bpf/evt_list_bpf.h create mode 100644 pmu/bpf/perf_counter_bpf.cpp create mode 100644 pmu/bpf/perf_counter_bpf.h rename pmu/{evt_list.cpp => evt_list_default.cpp} (84%) create mode 100644 pmu/evt_list_default.h rename pmu/{perf_counter.cpp => perf_counter_default.cpp} (93%) create mode 100644 pmu/perf_counter_default.h diff --git a/build/common.sh b/build/common.sh index 0ce407c..c2330e1 100644 --- a/build/common.sh +++ b/build/common.sh @@ -102,8 +102,8 @@ function build_skel_files() { local bpf_file_dir=$1 local bpf_lib_dir=$2 - bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}local/bpf/vmlinux.h" - if [ -s "${bpf_lib_dir}local/bpf/vmlinux.h" ]; then + bpftool btf dump file /sys/kernel/btf/vmlinux format c > "${bpf_lib_dir}local/bpf/usr/include/bpf/vmlinux.h" + if [ -s "${bpf_lib_dir}local/bpf/usr/include/bpf/vmlinux.h" ]; then echo "The kernel header file generated." else echo "Generate vmlinux.h file failed." @@ -116,7 +116,7 @@ function build_skel_files() { skel_path="${bpf_file_dir}/${src_name}.skel.h" echo "compile: $src_name" - clang -I${bpf_lib_dir}local -g -O2 -target bpf -c "$bpf_src" -o "$obj_path" + clang -I${bpf_lib_dir}local/bpf/usr/include -g -O2 -target bpf -c "$bpf_src" -o "$obj_path" [ -s "$obj_path" ] || { echo "Error: The obj file was not generated."; exit 1; } bpftool gen skeleton "$obj_path" > "$skel_path" [ -s "$skel_path" ] || { echo "Error: The skeleton file was not generated."; exit 1; } diff --git a/include/pcerrc.h b/include/pcerrc.h index 9d5f4f6..b2a1fdf 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -123,6 +123,8 @@ extern "C" { #define LIBPERF_ERR_ALLOCATE_REGISTER_FAILED 1076 #define LIBPERF_ERR_CHECK_USER_ACCESS 1077 #define LIBPERF_ERR_COUNTER_INDEX_IS_ZERO 1078 +#define LIBPERF_ERR_BPF_ACT_FAILED 1079 +#define LIBPERF_ERR_INVALID_BPF_PARAM 1080 #define UNKNOWN_ERROR 9999 diff --git a/include/pmu.h b/include/pmu.h index 43f1cc1..0361455 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -176,6 +176,8 @@ struct PmuAttr { // enable user access counting for current process unsigned enableUserAccess : 1; + // enable bpf mode for counting + unsigned enableBpf : 1; }; enum PmuTraceType { diff --git a/pmu/CMakeLists.txt b/pmu/CMakeLists.txt index 4d7f7cf..d33bf40 100644 --- a/pmu/CMakeLists.txt +++ b/pmu/CMakeLists.txt @@ -36,16 +36,21 @@ if (BPF) message(STATUS "BPF is true. building with bpf submodule") add_compile_definitions(BPF_ENABLED) include_directories(${PMU_FILE_DIR}/bpf) - include_directories(${PROJECT_TOP_DIR}/third_party/libbpf) - link_directories(${PROJECT_TOP_DIR}/third_party/local/bpf) + include_directories(${PROJECT_TOP_DIR}/third_party/local/bpf/usr/include) + link_directories(${PROJECT_TOP_DIR}/third_party/local/bpf/usr/lib64) set(BPF_SOURCES ${BPF_SRC}) else () set(BPF_SOURCES "") endif() ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC} ${BPF_SOURCES}) ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC} ${BPF_SOURCES}) + set_target_properties(kperf_static PROPERTIES OUTPUT_NAME "kperf") -target_link_libraries(kperf sym) +if (BPF) + target_link_libraries(kperf sym bpf) +else () + target_link_libraries(kperf sym) +endif() target_compile_options(kperf PRIVATE -fPIC) install(TARGETS kperf DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) install(TARGETS kperf_static DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) diff --git a/pmu/bpf/evt_list_bpf.cpp b/pmu/bpf/evt_list_bpf.cpp new file mode 100644 index 0000000..607ebf7 --- /dev/null +++ b/pmu/bpf/evt_list_bpf.cpp @@ -0,0 +1,166 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Wu + * Create: 2025-08-10 + * Description: implementations for managing and interacting with performance events of EvtListBpf in the KUNPENG_PMU namespace + ******************************************************************************/ +#include +#include +#include +#include "cpu_map.h" +#include "pmu_event.h" +#include "pcerrc.h" +#include "pcerr.h" +#include "log.h" +#include "common.h" +#include "evt_list_bpf.h" + +using namespace std; + +int KUNPENG_PMU::EvtListBpf::Init(const bool groupEnable, const std::shared_ptr evtLeader) +{ + // Init process map. + for (auto& proc: pidList) { + if (proc->tid > 0) { + procMap[proc->tid] = proc; + } + } + + for (unsigned int cpu = 0; cpu < numCpu; cpu++) { + PerfEvtPtr perfEvt = + std::make_shared(this->cpuList[cpu]->coreId, -1, this->pmuEvt.get(), procMap); + if (perfEvt == nullptr) { + continue; + } + + int err = 0; + err = perfEvt->Init(groupEnable, -1, -1); + if (err != SUCCESS) { + return err; + } + this->cpuCounterArray.emplace_back(perfEvt); + } + + for (unsigned int pid = 0; pid < numPid; pid++) { + PerfEvtPtr perfEvt = + std::make_shared(-1, this->pidList[pid]->tid, this->pmuEvt.get(), procMap); + if (perfEvt == nullptr) { + continue; + } + + perfEvt->Init(groupEnable, -1, -1); // init pid, ignore the result of perf_event_open + this->pidCounterArray.emplace_back(perfEvt); + } + return SUCCESS; +} + +int KUNPENG_PMU::EvtListBpf::CollectorTaskArrayDoTask(std::vector& taskArray, int task) +{ + std::unique_lock lock(mutex); + for (auto evt: taskArray) { + auto err = CollectorDoTask(evt, task); + if (err != SUCCESS) { + return err; + } + } + this->prevStat = this->evtStat; + this->evtStat = task; + return SUCCESS; +} + +int KUNPENG_PMU::EvtListBpf::Start() +{ + return CollectorTaskArrayDoTask(this->cpuCounterArray, START); +} + +int KUNPENG_PMU::EvtListBpf::Enable() +{ + return CollectorTaskArrayDoTask(this->cpuCounterArray, ENABLE); +} + +int KUNPENG_PMU::EvtListBpf::Stop() +{ + return CollectorTaskArrayDoTask(this->cpuCounterArray, STOP); +} + +int KUNPENG_PMU::EvtListBpf::Reset() +{ + return CollectorTaskArrayDoTask(this->cpuCounterArray, RESET); +} + +int KUNPENG_PMU::EvtListBpf::Pause() +{ + return CollectorTaskArrayDoTask(this->cpuCounterArray, PAUSE); +} + +int KUNPENG_PMU::EvtListBpf::Close() +{ + auto ret = CollectorTaskArrayDoTask(this->cpuCounterArray, CLOSE); + if (ret != SUCCESS) { + return ret; + } + + procMap.clear(); + return SUCCESS; +} + +int KUNPENG_PMU::EvtListBpf::Read(EventData &eventData) +{ + std::unique_lock lg(mutex); + + for (unsigned int pid = 0; pid < numPid; pid++) { + int err = this->pidCounterArray[pid]->BeginRead(); + if (err != SUCCESS) { + return err; + } + } + + struct PmuEvtData* head = nullptr; + int row = 0; + auto cpuTopo = this->cpuList[row].get(); + for (unsigned int pid = 0; pid < numPid; pid++) { + auto cnt = eventData.data.size(); + int err = this->pidCounterArray[pid]->Read(eventData); + if (err != SUCCESS) { + return err; + } + if (eventData.data.size() - cnt) { + DBG_PRINT("evt: %s pid: %d cpu: %d samples num: %d\n", pmuEvt->name.c_str(), pidList[pid]->pid, + cpuTopo->coreId, eventData.data.size() - cnt); + } + // Fill event name and cpu topology. + FillFields(cnt, eventData.data.size(), cpuTopo, pidList[pid].get(), eventData.data); + } + + for (unsigned int pid = 0; pid < numPid; pid++) { + int err = this->pidCounterArray[pid]->EndRead(); + if (err != SUCCESS) { + return err; + } + } + + return SUCCESS; +} + +void KUNPENG_PMU::EvtListBpf::FillFields( + size_t start, size_t end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector& data) +{ + for (auto i = start; i < end; ++i) { + data[i].cpuTopo = cpuTopo; + data[i].evt = this->pmuEvt->name.c_str(); + if (data[i].comm == nullptr) { + data[i].comm = procTopo->comm; + } + if (data[i].ts == 0) { + data[i].ts = this->ts; + } + } +} \ No newline at end of file diff --git a/pmu/bpf/evt_list_bpf.h b/pmu/bpf/evt_list_bpf.h new file mode 100644 index 0000000..00afaad --- /dev/null +++ b/pmu/bpf/evt_list_bpf.h @@ -0,0 +1,61 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Wu + * Create: 2025-08-10 + * Description: declaration of class EvtListBpf with functions for managing and interacting with a list + * of performance events in the KUNPENG_PMU namespace + ******************************************************************************/ +#ifndef PMU_EVTLISTBPF_H +#define PMU_EVTLISTBPF_H +#include +#include +#include +#include +#include +#include +#include "cpu_map.h" +#include "perf_counter_bpf.h" +#include "perf_counter_default.h" +#include "pmu.h" +#include "process_map.h" +#include "sampler.h" +#include "spe_sampler.h" +#include "evt_list.h" + +namespace KUNPENG_PMU { + +class EvtListBpf : public EvtList { +public: + EvtListBpf(const SymbolMode &symbolMode, std::vector &cpuList, std::vector &pidList, + std::shared_ptr pmuEvt, const int groupId) + : EvtList(symbolMode, cpuList, pidList, pmuEvt, groupId){} + + int Init(const bool groupEnable, const std::shared_ptr evtLeader); + int Pause(); + int Close() override; + int Start() override; + int Enable() override; + int Stop() override; + int Reset() override; + int Read(EventData &eventData) override; + + void SetGroupInfo(const EventGroupInfo &grpInfo) override {}; + void AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr evtLeader) override {}; + +private: + std::vector> cpuCounterArray; + std::vector> pidCounterArray; + int CollectorTaskArrayDoTask(std::vector& taskArray, int task); + void FillFields(size_t start, size_t end, CpuTopology* cpuTopo, ProcTopology* procTopo, std::vector& pmuData); +}; + +} // namespace KUNPENG_PMU +#endif diff --git a/pmu/bpf/perf_counter_bpf.cpp b/pmu/bpf/perf_counter_bpf.cpp new file mode 100644 index 0000000..bfe2026 --- /dev/null +++ b/pmu/bpf/perf_counter_bpf.cpp @@ -0,0 +1,502 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Wu + * Create: 2025-08-10 + * Description: implementations for reading performance counters and initializing counting logic + * of PerfCounterBpf in the KUNPENG_PMU namespace. + ******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pmu.h" +#include "linked_list.h" +#include "pfm_event.h" +#include "pmu_event.h" +#include "pcerr.h" +#include "log.h" +#include "sched_counter.skel.h" +#include "sched_cgroup.skel.h" +#include "perf_counter_bpf.h" + +using namespace std; +using namespace pcerr; + +#define MAX_ENTITES 102400 + +static map counterMap; // key: evt name, value: bpf obj +static struct sched_cgroup_bpf *cgrpCounter = nullptr; // one bpf obj in cgroup mode +static std::unordered_map evtDataMap; +static set evtKeys; // updated fds of cgroup +static set readCgroups; +static set triggerdEvt; +static int evtIdx = 0; +static int cgrpProgFd = 0; + +static inline int TriggeredRead(int prog_fd, int cpu) +{ + // enforce the bpf trace function + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = NULL, // no input context + .ctx_size_in = 0, + .retval = 0, // return code of the BPF program + .flags = BPF_F_TEST_RUN_ON_CPU, + .cpu = cpu, + ); + return bpf_prog_test_run_opts(prog_fd, &opts); +} + +int KUNPENG_PMU::PerfCounterBpf::BeginRead() +{ + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounterBpf::EndRead() +{ + triggerdEvt.clear(); + readCgroups.clear(); + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounterBpf::ReadBpfProcess(std::vector &data) +{ + const unsigned cpuNums = MAX_CPU_NUM; + auto obj = counterMap[this->evt->name]; + + // must execute sched_switch when each read operation. + // the pid may not have been scheduled for a long time and the pmu count will not be recoreded. + if (triggerdEvt.find(this->evt->name) == triggerdEvt.end()) { + for (int i = 0; i < cpuNums; i++) { + int triggerErr = TriggeredRead(evtDataMap[this->evt->name].bpfFd, i); + if (triggerErr) { + DBG_PRINT("trigger error: %s\n", strerror(-triggerErr)); + } + } + triggerdEvt.insert(this->evt->name); + } + + // read the pmu count of this pid in each cpu core + struct bpf_perf_event_value values[cpuNums]; + + int err = bpf_map__lookup_elem( + obj->maps.accum_readings, &this->pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to lookup counter map accum_readings. Error: " + + string(strerror(-err)) + " pid " + to_string(this->pid)); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + // convert pmu count to PmuData + int processId = 0; + auto findProc = procMap.find(this->pid); + if (findProc != procMap.end()) { + processId = findProc->second->pid; + } + + for (int i = 0; i < cpuNums; i++) { + data.emplace_back(PmuData{0}); + auto ¤t = data.back(); + current.count = values[i].counter; + current.countPercent = values[i].running / values[i].enabled; + current.cpu = i; + current.tid = this->pid; + current.pid = processId; + } + + // reset pmu count in bpf to ensure that the value read from pmu is delta (after last read/open) + memset(values, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value)); + err = bpf_map__update_elem( + obj->maps.accum_readings, &pid, sizeof(__u32), values, sizeof(bpf_perf_event_value) * cpuNums, BPF_ANY); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map accum_readings. Error: " + + string(strerror(-err)) + " pid " + to_string(this->pid)); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounterBpf::ReadBpfCgroup(std::vector &data) +{ + auto cgrpName = this->evt->cgroupName; + if (readCgroups.find(cgrpName) != readCgroups.end()) { + return SUCCESS; + } + readCgroups.insert(cgrpName); + + for (int i=0;ievt->name].eventId; + int err = bpf_map__lookup_elem(cgrpCounter->maps.cgrp_readings, &readKey, sizeof(__u32), values, sizeof(values), BPF_ANY); + if (err) { + string msg = + "failed to lookup cgroup map cgrp_readings. Error: " + string(strerror(-err)) + " pid " + to_string(this->pid); + New(LIBPERF_ERR_BPF_ACT_FAILED, msg); + return SUCCESS; + } + + for (int i = 0; i < cpuNums; i++) { + data.emplace_back(PmuData{0}); + auto ¤t = data.back(); + current.count = values[i].counter; + current.countPercent = values[i].running / values[i].enabled; + current.cpu = i; + current.tid = this->pid; + current.cgroupName = this->evt->cgroupName.c_str(); + } + + memset(values, 0, cpuNums * sizeof(bpf_perf_event_value)); + err = bpf_map__update_elem(cgrpCounter->maps.cgrp_readings, &readKey, sizeof(__u32), values, sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_ANY); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update cgroup map cgrp_readings. Error: " + + string(strerror(-err)) + " pid " + to_string(this->pid)); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounterBpf::Read(EventData &eventData) +{ + if (!evt->cgroupName.empty()) { + return ReadBpfCgroup(eventData.data); + } else { + return ReadBpfProcess(eventData.data); + } +} + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} + +int KUNPENG_PMU::PerfCounterBpf::InitPidForEvent() +{ + if (this->pid == -1) { + return SUCCESS; + } + + if (evtDataMap[this->evt->name].pids.find(this->pid) != evtDataMap[this->evt->name].pids.end()) { + return SUCCESS; + } + + auto findObj = counterMap.find(this->evt->name); + if (findObj == counterMap.end()) { + return -1; + } + + // initialize the cumulative pmu count for this pid + struct bpf_perf_event_value evtVal[MAX_CPU_NUM]; + + memset(evtVal, 0, MAX_CPU_NUM * sizeof(bpf_perf_event_value)); + int err = bpf_map__update_elem(findObj->second->maps.accum_readings, &pid, sizeof(__u32), evtVal, + sizeof(bpf_perf_event_value) * MAX_CPU_NUM, BPF_NOEXIST); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map accum_readings. Error: " + err); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + // initialize the filter, build the map relationship of pid and accum_key + err = bpf_map__update_elem(findObj->second->maps.filter, &pid, sizeof(__u32), &pid, sizeof(__u32), BPF_NOEXIST); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map filter. Error: " + err); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + DBG_PRINT("InitPidForEvent: %d\n", pid); + evtDataMap[this->evt->name].pids.insert(this->pid); + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounterBpf::InitBpfObj() +{ + int err; + struct sched_counter_bpf *obj; + auto findObj = counterMap.find(evt->name); + if (findObj == counterMap.end()) { + // initialize the bpf obj + obj = sched_counter_bpf__open(); + if (!obj) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to open counter bpf obj"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + err = bpf_map__set_max_entries(obj->maps.events, MAX_CPU_NUM); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: events"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + err = bpf_map__set_max_entries(obj->maps.prev_readings, 1); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: prev_readings"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + err = bpf_map__set_max_entries(obj->maps.accum_readings, 1024); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: accum_readings"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + err = bpf_map__set_max_entries(obj->maps.filter, MAX_ENTITES); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of counter map: filter"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + err = sched_counter_bpf__load(obj); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to load counter bpf obj"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + err = sched_counter_bpf__attach(obj); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to attach counter bpf obj"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + counterMap[this->evt->name] = obj; + err = InitPidForEvent(); + if (err == LIBPERF_ERR_BPF_ACT_FAILED) { + return err; + } + // get the fd of bpf prog, trigger trace function(sched_switch) of bpf in read + int progFd = bpf_program__fd(obj->progs.on_switch); + + evtDataMap[this->evt->name].bpfFd = progFd; + DBG_PRINT("create bpf obj for evt %s prog fd %d\n", evt->name.c_str(), progFd); + } else { + obj = counterMap[this->evt->name]; + } + + // initialize the pmu count, put fd of pmu into value + err = bpf_map__update_elem(obj->maps.events, &this->cpu, sizeof(__u32), &this->fd, sizeof(int), BPF_ANY); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update counter map events. Error: " + + string(strerror(-err)) + "cpu " + to_string(cpu) + "fd " + to_string(fd)); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + evtDataMap[this->evt->name].cpus.insert(this->cpu); + return SUCCESS; +} + +static uint64_t ReadCgroupId(const string &cgroupName) +{ + char path[PATH_MAX + 1]; + char mnt[PATH_MAX + 1]; + struct { + struct file_handle fh; + uint64_t cgroup_id; + } handle; + int mount_id; + std::string fullCgroupPath = "/sys/fs/cgroup/"; + int cgroupIsV2 = CheckCgroupV2(); + if (cgroupIsV2) { + fullCgroupPath += cgroupName; + } else if (cgroupIsV2 == 0) { + fullCgroupPath += "perf_event/" + cgroupName; + } + handle.fh.handle_bytes = sizeof(handle.cgroup_id); + if (name_to_handle_at(AT_FDCWD, fullCgroupPath.c_str(), &handle.fh, &mount_id, 0) < 0) { + return -1; + } + + return handle.cgroup_id; +} + +int KUNPENG_PMU::PerfCounterBpf::InitBpfCgroupObj() +{ + int err; + struct sched_cgroup_bpf *obj; + if (cgrpCounter == nullptr) { + obj = sched_cgroup_bpf__open(); + if(!obj){ + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to open cgroup bpf obj"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + obj->rodata->num_cpus = MAX_CPU_NUM; + obj->rodata->num_events = this->evt->numEvent; + + err = bpf_map__set_max_entries(obj->maps.events, MAX_ENTITES); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: events"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + err = bpf_map__set_max_entries(obj->maps.prev_readings, MAX_ENTITES); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: prev_readings"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + err = bpf_map__set_max_entries(obj->maps.cgrp_idx, MAX_ENTITES * 100); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: cgrp_idx"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + err = bpf_map__set_max_entries(obj->maps.cgrp_readings, MAX_ENTITES); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to set max entries of cgroup map: cgrp_readings"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + err = sched_cgroup_bpf__load(obj); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to load cgroup bpf obj"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + err = sched_cgroup_bpf__attach(obj); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to attach cgroup bpf obj"); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + + cgrpProgFd = bpf_program__fd(obj->progs.trigger_read); + cgrpCounter = obj; + DBG_PRINT("create bpf obj for cgroup evt %s \n", evt->name.c_str()); + } + + auto findEvtIdx = evtDataMap.find(this->evt->name); + if (findEvtIdx == evtDataMap.end()) { + evtDataMap[this->evt->name].eventId = evtIdx; + evtIdx++; + } + int evtKey = evtDataMap[this->evt->name].eventId * MAX_CPU_NUM + cpu; + if (evtKeys.find(evtKey) == evtKeys.end()) { + err = bpf_map__update_elem(cgrpCounter->maps.events, &evtKey, sizeof(__u32), + &this->fd, sizeof(int), BPF_ANY); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update cgroup map events. Error: " + + string(strerror(-err)) + "cpu " + to_string(cpu) + "fd " + to_string(fd)); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + evtKeys.insert(evtKey); + } + + string cgrpName = this->evt->cgroupName; + auto findCgrp = cgroupIdxMap.find(cgrpName); + if (findCgrp == cgroupIdxMap.end()) { + uint64_t cgrpId = ReadCgroupId(cgrpName); + if (cgrpId == UINT64_MAX) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to get cgroup id of: " + cgrpName); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + int cgrpIdx = cgroupIdxMap.size(); + err = bpf_map__update_elem(cgrpCounter->maps.cgrp_idx, &cgrpId, sizeof(__u64), &cgrpIdx, sizeof(__u32), BPF_ANY); + if (err) { + New(LIBPERF_ERR_BPF_ACT_FAILED, "failed to update cgroup id: " + cgrpId); + return LIBPERF_ERR_BPF_ACT_FAILED; + } + DBG_PRINT("init cgroup bpf map: %s id: %d\n", cgrpName.c_str(), cgrpId); + cgroupIdxMap[cgrpName] = cgrpIdx; + } + + evtDataMap[this->evt->name].cpus.insert(this->cpu); + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounterBpf::Init(const bool groupEnable, const int groupFd, const int resetOutputFd) +{ + int err = InitPidForEvent(); + if (err == LIBPERF_ERR_BPF_ACT_FAILED) { + return err; + } + auto findCpuMap = evtDataMap.find(this->evt->name); + auto findCgroup = cgroupIdxMap.find(this->evt->cgroupName); + if (findCpuMap != evtDataMap.end() && findCpuMap->second.cpus.count(this->cpu) && findCgroup != cgroupIdxMap.end()) { + return SUCCESS; + } + + if (findCpuMap == evtDataMap.end() || !findCpuMap->second.cpus.count(this->cpu)) { + err = this->MapPerfAttr(groupEnable, groupFd); + if (err != SUCCESS) { + return err; + } + } + + if (this->evt->cgroupName.empty()) { + err = InitBpfObj(); + } else { + err = InitBpfCgroupObj(); + } + return err; +} + +int KUNPENG_PMU::PerfCounterBpf::MapPerfAttr(const bool groupEnable, const int groupFd) +{ + struct perf_event_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = this->evt->type; + attr.config = this->evt->config; + attr.config1 = this->evt->config1; + attr.config2 = this->evt->config2; + attr.disabled = 1; + + // support cgroup feature + unsigned flags = 0; + if (this->GetCgroupFd() != -1) { + flags = PERF_FLAG_PID_CGROUP | PERF_FLAG_FD_CLOEXEC; + this->pid = this->GetCgroupFd(); + } + + attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; + + this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); + DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", + attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd); + if (__glibc_unlikely(this->fd < 0)) { + return MapErrno(errno); + } + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounterBpf::Enable() +{ + int err = PerfEvt::Enable(); + if (err != SUCCESS) { + return err; + } + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounterBpf::Disable() +{ + return PerfEvt::Disable(); +} + +int KUNPENG_PMU::PerfCounterBpf::Reset() +{ + return PerfEvt::Reset(); +} + +int KUNPENG_PMU::PerfCounterBpf::Close() +{ + if (this->fd > 0) { + close(this->fd); + } + return SUCCESS; +} \ No newline at end of file diff --git a/pmu/bpf/perf_counter_bpf.h b/pmu/bpf/perf_counter_bpf.h new file mode 100644 index 0000000..1cf1c23 --- /dev/null +++ b/pmu/bpf/perf_counter_bpf.h @@ -0,0 +1,59 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Wu + * Create: 2025-08-10 + * Description: declaration of class PerfCounterBpf that inherits from PerfCounter. + ******************************************************************************/ +#ifndef PMU_COUNTER_BPF_H +#define PMU_COUNTER_BPF_H + +#include +#include +#include +#include "evt.h" +#include "pmu_event.h" +#include "perf_counter.h" + +#define AT_FDCWD -100 + +struct BpfEvent { + int bpfFd = -1; + int eventId = -1; + std::set cpus; + std::set pids; +}; + +namespace KUNPENG_PMU { + class PerfCounterBpf : public PerfCounter { + public: + using PerfCounter::PerfCounter; + ~PerfCounterBpf() + {} + int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) override; + int Read(EventData &eventData) override; + int MapPerfAttr(const bool groupEnable, const int groupFd) override; + int Enable() override; + int Disable() override; + int Reset() override; + int Close() override; + + int BeginRead(); + int EndRead(); + private: + int InitBpfObj(); + int InitBpfCgroupObj(); + int InitPidForEvent(); + int ReadBpfProcess(std::vector &data); + int ReadBpfCgroup(std::vector &data); + std::map cgroupIdxMap; // key: cgroup name, value: sequential number + }; +} // namespace KUNPENG_PMU +#endif diff --git a/pmu/bpf/sched_counter.bpf.c b/pmu/bpf/sched_counter.bpf.c index 26e9ed1..f213676 100644 --- a/pmu/bpf/sched_counter.bpf.c +++ b/pmu/bpf/sched_counter.bpf.c @@ -56,7 +56,8 @@ struct { } filter SEC(".maps"); SEC("raw_tp/sched_switch") -int BPF_PROG(on_switch) { +int BPF_PROG(on_switch) +{ __u32 pid; __u32 zero=0; __u32 *accum_key; @@ -99,7 +100,8 @@ int BPF_PROG(on_switch) { } SEC("tp_btf/task_newtask") -int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags){ +int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags) +{ long err; __u32 new_pid; __u32 parent_pid; @@ -115,6 +117,6 @@ int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags){ } bpf_map_update_elem(&filter, &new_pid, accum_key, BPF_NOEXIST); - bpf_printk("new pid: %d parent: %d add child: %ld accum_key: %ld\n", new_pid, parent_pid, new_pid, *accum_key); + bpf_printk("new pid: %d parent: %d accum_key: %ld\n", new_pid, parent_pid, *accum_key); return 0; } \ No newline at end of file diff --git a/pmu/evt_list.h b/pmu/evt_list.h index 50cc522..4fb9de3 100644 --- a/pmu/evt_list.h +++ b/pmu/evt_list.h @@ -22,7 +22,7 @@ #include #include #include "cpu_map.h" -#include "perf_counter.h" +#include "perf_counter_default.h" #include "pmu.h" #include "process_map.h" #include "sampler.h" @@ -63,16 +63,15 @@ public: this->prevStat = OPEN; this->evtStat = OPEN; } - int Init(const bool groupEnable, const std::shared_ptr evtLeader); - int Pause(); - int Close(); - int Start(); - int Enable(); - int Stop(); - int Reset(); - int Read(EventData &eventData); - - void SetGroupInfo(const EventGroupInfo &grpInfo); + virtual ~EvtList() = default; + virtual int Init(const bool groupEnable, const std::shared_ptr evtLeader) = 0; + virtual int Pause() = 0; + virtual int Close() = 0; + virtual int Start() = 0; + virtual int Enable() = 0; + virtual int Stop() = 0; + virtual int Reset() = 0; + virtual int Read(EventData &eventData) = 0; void SetTimeStamp(const int64_t& timestamp) { @@ -109,37 +108,48 @@ public: return pmuEvt->blockedSample; } - void AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr evtLeader); - void ClearExitFd(); -private: - using PerfEvtPtr = std::shared_ptr; - - int CollectorDoTask(PerfEvtPtr collector, int task); - int CollectorXYArrayDoTask(std::vector>& xyArray, int task); - void FillFields(const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo, - std::vector& pmuData); - void AdaptErrInfo(int err, PerfEvtPtr perfEvt); + virtual void SetGroupInfo(const EventGroupInfo &grpInfo) = 0; + virtual void AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr evtLeader) = 0; +protected: + using PerfEvtPtr = std::shared_ptr; std::vector cpuList; std::vector pidList; std::vector unUsedPidList; std::set noProcList; std::shared_ptr pmuEvt; int groupId; // event group id - std::vector>> xyCounterArray; - std::shared_ptr MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent); unsigned int numCpu = 0; unsigned int numPid = 0; std::set fdList; int64_t ts = 0; + std::vector>> xyCounterArray; std::unordered_map procMap; SymbolMode symMode = NO_SYMBOL_RESOLVE; unsigned long branchSampleFilter = KPERF_NO_BRANCH_SAMPLE; int prevStat; int evtStat; std::mutex mutex; - // Fixme: decouple group event with normal event, use different classes to implement Read and Init. - std::unique_ptr groupInfo = nullptr; + + int CollectorDoTask(PerfEvtPtr collector, int task) + { + switch (task) { + case START: + return collector->Start(); + case PAUSE: + return collector->Pause(); + case DISABLE: + return collector->Disable(); + case ENABLE: + return collector->Enable(); + case RESET: + return collector->Reset(); + case CLOSE: + return collector->Close(); + default: + return UNKNOWN_ERROR; + } + } }; struct EventGroupInfo { @@ -160,4 +170,4 @@ struct EventGroupInfo { using groupMapPtr = std::shared_ptr>; } // namespace KUNPENG_PMU -#endif +#endif \ No newline at end of file diff --git a/pmu/evt_list.cpp b/pmu/evt_list_default.cpp similarity index 84% rename from pmu/evt_list.cpp rename to pmu/evt_list_default.cpp index 7e93c0d..6f96e88 100644 --- a/pmu/evt_list.cpp +++ b/pmu/evt_list_default.cpp @@ -21,36 +21,11 @@ #include "pcerr.h" #include "log.h" #include "common.h" -#include "evt_list.h" +#include "evt_list_default.h" using namespace std; -int KUNPENG_PMU::EvtList::CollectorDoTask(PerfEvtPtr collector, int task) -{ - switch (task) { - case START: - return collector->Start(); - case PAUSE: - return collector->Pause(); - case DISABLE: - return collector->Disable(); - case ENABLE: - return collector->Enable(); - case RESET: - return collector->Reset(); - case CLOSE: { - auto ret = collector->Close(); - if (ret == SUCCESS) { - fdList.erase(collector->GetFd()); - } - return ret; - } - default: - return UNKNOWN_ERROR; - } -} - -int KUNPENG_PMU::EvtList::CollectorXYArrayDoTask(std::vector>& xyArray, int task) +int KUNPENG_PMU::EvtListDefault::CollectorXYArrayDoTask(std::vector>& xyArray, int task) { std::unique_lock lock(mutex); for (auto row: xyArray) { @@ -66,7 +41,7 @@ int KUNPENG_PMU::EvtList::CollectorXYArrayDoTask(std::vector evtLeader) +int KUNPENG_PMU::EvtListDefault::Init(const bool groupEnable, const std::shared_ptr evtLeader) { // Init process map. for (auto& proc: pidList) { @@ -127,7 +102,8 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrSetSymbolMode(symMode); perfEvt->SetBranchSampleFilter(branchSampleFilter); - int groupFd = groupEnable && evtLeader ? evtLeader->xyCounterArray[row][col]->GetFd():-1; + auto evtleaderDefault = std::dynamic_pointer_cast(evtLeader); + int groupFd = groupEnable && evtleaderDefault ? evtleaderDefault->xyCounterArray[row][col]->GetFd():-1; int err = perfEvt->Init(groupEnable, groupFd, resetOutPutFd); if (err == LIBPERF_ERR_NO_PERMISSION && !this->pmuEvt->excludeKernel && !this->pmuEvt->excludeUser && GetParanoidVal() > 1) { perfEvt->SetNeedTryExcludeKernel(true); @@ -157,22 +133,22 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrxyCounterArray, START); } -int KUNPENG_PMU::EvtList::Enable() +int KUNPENG_PMU::EvtListDefault::Enable() { return CollectorXYArrayDoTask(this->xyCounterArray, ENABLE); } -int KUNPENG_PMU::EvtList::Stop() +int KUNPENG_PMU::EvtListDefault::Stop() { return CollectorXYArrayDoTask(this->xyCounterArray, STOP); } -int KUNPENG_PMU::EvtList::Close() +int KUNPENG_PMU::EvtListDefault::Close() { auto ret = CollectorXYArrayDoTask(this->xyCounterArray, CLOSE); if (ret != SUCCESS) { @@ -183,20 +159,21 @@ int KUNPENG_PMU::EvtList::Close() return SUCCESS; } -int KUNPENG_PMU::EvtList::Reset() +int KUNPENG_PMU::EvtListDefault::Reset() { return CollectorXYArrayDoTask(this->xyCounterArray, RESET); } -void KUNPENG_PMU::EvtList::FillFields( - const size_t& start, const size_t& end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector& data) +void KUNPENG_PMU::EvtListDefault::FillFields( + size_t start, size_t end, CpuTopology* cpuTopo, ProcTopology* procTopo, vector& data) { for (auto i = start; i < end; ++i) { data[i].cpuTopo = cpuTopo; if (groupInfo && pmuEvt->collectType == COUNTING && i - start > 0) { // For group events, PmuData are all read by event leader, // and then some PmuData elements should be related to group members. - data[i].evt = groupInfo->evtGroupChildList[i-start-1]->pmuEvt->name.c_str(); + std::shared_ptr child = std::dynamic_pointer_cast(groupInfo->evtGroupChildList[i-start-1]); + data[i].evt = child->pmuEvt->name.c_str(); } else { // For no group events or group leader. data[i].evt = this->pmuEvt->name.c_str(); @@ -211,7 +188,7 @@ void KUNPENG_PMU::EvtList::FillFields( } } -int KUNPENG_PMU::EvtList::Read(EventData &eventData) +int KUNPENG_PMU::EvtListDefault::Read(EventData &eventData) { std::unique_lock lg(mutex); @@ -256,16 +233,16 @@ int KUNPENG_PMU::EvtList::Read(EventData &eventData) return SUCCESS; } -int KUNPENG_PMU::EvtList::Pause() +int KUNPENG_PMU::EvtListDefault::Pause() { return CollectorXYArrayDoTask(this->xyCounterArray, PAUSE); } -std::shared_ptr KUNPENG_PMU::EvtList::MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent) +std::shared_ptr KUNPENG_PMU::EvtListDefault::MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent) { switch (pmuEvent->collectType) { case (COUNTING): - return std::make_shared(cpu, pid, pmuEvent, procMap); + return std::make_shared(cpu, pid, pmuEvent, procMap); case (SAMPLING): return std::make_shared(cpu, pid, pmuEvent, procMap); case (SPE_SAMPLING): @@ -275,7 +252,7 @@ std::shared_ptr KUNPENG_PMU::EvtList::MapPmuAttr(int cpu, }; } -void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr evtLeader) +void KUNPENG_PMU::EvtListDefault::AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr evtLeader) { if (pid <= 0 || evtStat == CLOSE || evtStat == STOP) { return; @@ -300,7 +277,8 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons int err = 0; if (groupEnable) { int sz = this->pidList.size(); - auto groupFd = evtLeader?evtLeader->xyCounterArray[row][sz - 1]->GetFd():-1; + std::shared_ptr evtLeaderDefault = std::dynamic_pointer_cast(evtLeader); + auto groupFd = evtLeaderDefault?evtLeaderDefault->xyCounterArray[row][sz - 1]->GetFd():-1; err = perfEvt->Init(groupEnable, groupFd, -1); } else { err = perfEvt->Init(groupEnable, -1, -1); @@ -339,7 +317,7 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons } } -void KUNPENG_PMU::EvtList::ClearExitFd() +void KUNPENG_PMU::EvtListDefault::ClearExitFd() { if (this->pidList.size() == 1 && this->pidList[0]->tid == -1) { return; @@ -390,7 +368,7 @@ void KUNPENG_PMU::EvtList::ClearExitFd() noProcList.clear(); } -void KUNPENG_PMU::EvtList::SetGroupInfo(const EventGroupInfo &grpInfo) +void KUNPENG_PMU::EvtListDefault::SetGroupInfo(const EventGroupInfo &grpInfo) { this->groupInfo = unique_ptr(new EventGroupInfo(grpInfo)); } \ No newline at end of file diff --git a/pmu/evt_list_default.h b/pmu/evt_list_default.h new file mode 100644 index 0000000..bc4d5d2 --- /dev/null +++ b/pmu/evt_list_default.h @@ -0,0 +1,60 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Zhang + * Create: 2024-04-03 + * Description: declaration of class EvtListDefault with functions for managing and interacting with a list + * of performance events in the KUNPENG_PMU namespace + ******************************************************************************/ +#ifndef PMU_EVTLISTDEFAULT_H +#define PMU_EVTLISTDEFAULT_H +#include +#include +#include +#include +#include +#include +#include "cpu_map.h" +#include "perf_counter_default.h" +#include "pmu.h" +#include "process_map.h" +#include "sampler.h" +#include "spe_sampler.h" +#include "evt_list.h" + +namespace KUNPENG_PMU { + +class EvtListDefault : public EvtList { +public: + EvtListDefault(const SymbolMode &symbolMode, std::vector &cpuList, std::vector &pidList, + std::shared_ptr pmuEvt, const int groupId) + : EvtList(symbolMode, cpuList, pidList, pmuEvt, groupId){} + int Init(const bool groupEnable, const std::shared_ptr evtLeader); + int Pause(); + int Close() override; + int Start() override; + int Enable() override; + int Stop() override; + int Reset() override; + int Read(EventData &eventData) override; + + void SetGroupInfo(const EventGroupInfo &grpInfo) override; + void AddNewProcess(pid_t pid, const bool groupEnable, const std::shared_ptr evtLeader) override; + void ClearExitFd(); +private: + int CollectorXYArrayDoTask(std::vector>& xyArray, int task); + void FillFields(size_t start, size_t end, CpuTopology* cpuTopo, ProcTopology* procTopo, std::vector& pmuData); + void AdaptErrInfo(int err, PerfEvtPtr perfEvt); + std::shared_ptr MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent); + // Fixme: decouple group event with normal event, use different classes to implement Read and Init. + std::unique_ptr groupInfo = nullptr; +}; +} // namespace KUNPENG_PMU +#endif diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h index cdb6ddd..fe97363 100644 --- a/pmu/perf_counter.h +++ b/pmu/perf_counter.h @@ -23,55 +23,18 @@ #include "evt.h" #include "pmu_event.h" -#define REQUEST_USER_ACCESS 0x2 - -struct ReadFormat { - __u64 value; - __u64 timeEnabled; - __u64 timeRunning; - __u64 id; -}; - namespace KUNPENG_PMU { - static constexpr int COUNT_PAGE_SIZE = 4096; class PerfCounter : public PerfEvt { public: using PerfEvt::PerfEvt; - ~PerfCounter() - {} - int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) override; - int Read(EventData &eventData) override; - int MapPerfAttr(const bool groupEnable, const int groupFd) override; - int Enable() override; - int Disable() override; - int Reset() override; - int Close() override; - - private: - enum class GroupStatus - { - NO_GROUP, - GROUP_LEADER, - GROUP_MEMBER - }; - int Mmap(); - int MapPerfAttrUserAccess(); - int CountValueToData(const __u64 value, const __u64 timeEnabled, - const __u64 timeRunning, __u64 &accumCount, std::vector &data); - int ReadSingleEvent(std::vector &data); - int ReadGroupEvents(std::vector &data); - - // Accumulated pmu count, time enabled and time running. - __u64 enabled = 0; - __u64 running = 0; - // For group events, is the accum counts of all members. - // For normal events, has only one element. - std::vector<__u64> accumCount; - int groupFd = 0; - GroupStatus groupStatus = GroupStatus::NO_GROUP; - // reg index is stored in countMmap->base - std::shared_ptr countMmap = nullptr; - bool isCollect{false}; + virtual ~PerfCounter() = default; + virtual int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) = 0; + virtual int Read(EventData &eventData) = 0; + virtual int MapPerfAttr(const bool groupEnable, const int groupFd) =0; + virtual int Enable() = 0; + virtual int Disable() = 0; + virtual int Reset() = 0; + virtual int Close() = 0; }; } // namespace KUNPENG_PMU #endif diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter_default.cpp similarity index 93% rename from pmu/perf_counter.cpp rename to pmu/perf_counter_default.cpp index e3640ff..1ced311 100644 --- a/pmu/perf_counter.cpp +++ b/pmu/perf_counter_default.cpp @@ -10,8 +10,8 @@ * See the Mulan PSL v2 for more details. * Author: Mr.Gan * Create: 2024-04-03 - * Description: implementations for reading performance counters and initializing counting logic in - * the KUNPENG_PMU namespace. + * Description: implementations for reading performance counters and initializing counting logic + * of PerfCounterDefault in the KUNPENG_PMU namespace. ******************************************************************************/ #include #include @@ -29,7 +29,7 @@ #include "pmu_event.h" #include "pcerr.h" #include "log.h" -#include "perf_counter.h" +#include "perf_counter_default.h" #include "read_reg.h" #include "common.h" @@ -53,7 +53,7 @@ struct GroupReadFormat { * Right now we do not implement grouping logic, thus we ignore the * PERF_FORMAT_ID section for now */ -int KUNPENG_PMU::PerfCounter::Read(EventData &eventData) +int KUNPENG_PMU::PerfCounterDefault::Read(EventData &eventData) { if (__glibc_unlikely(this->fd < 0)) { this->accumCount.clear(); @@ -141,7 +141,7 @@ static int PerfMmapReadSelf(const std::shared_ptr &countMmap, struct R } } // namespace KUNPENG_PMU -int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector &data) +int KUNPENG_PMU::PerfCounterDefault::ReadSingleEvent(std::vector &data) { ReadFormat perfCountValue; if (this->evt->enableUserAccess) { @@ -178,7 +178,7 @@ int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector &data) return SUCCESS; } -int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector &data) +int KUNPENG_PMU::PerfCounterDefault::ReadGroupEvents(std::vector &data) { // Fixme: // In current class, we do not know how many events in group. @@ -219,7 +219,7 @@ int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector &data) return SUCCESS; } -int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 timeEnabled, +int KUNPENG_PMU::PerfCounterDefault::CountValueToData(const __u64 value, const __u64 timeEnabled, const __u64 timeRunning, __u64 &accumCount, vector &data) { if (value < accumCount || timeEnabled < enabled || timeRunning < running) { @@ -262,7 +262,7 @@ int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 ti /** * Initialize counting */ -int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, const int resetOutputFd) +int KUNPENG_PMU::PerfCounterDefault::Init(const bool groupEnable, const int groupFd, const int resetOutputFd) { int err = SUCCESS; if (this->evt->enableUserAccess) { // user access @@ -277,7 +277,7 @@ int KUNPENG_PMU::PerfCounter::Init(const bool groupEnable, const int groupFd, co return err; } -int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int groupFd) +int KUNPENG_PMU::PerfCounterDefault::MapPerfAttr(const bool groupEnable, const int groupFd) { /** * For now, we only implemented the logic for CORE type events. Support for UNCORE PMU events will be @@ -352,7 +352,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou return SUCCESS; } -int KUNPENG_PMU::PerfCounter::MapPerfAttrUserAccess() +int KUNPENG_PMU::PerfCounterDefault::MapPerfAttrUserAccess() { struct perf_event_attr attr; memset(&attr, 0, sizeof(attr)); @@ -375,7 +375,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttrUserAccess() return SUCCESS; } -int KUNPENG_PMU::PerfCounter::Mmap() +int KUNPENG_PMU::PerfCounterDefault::Mmap() { this->countMmap = std::make_shared(); this->countMmap->prev = 0; @@ -395,7 +395,7 @@ int KUNPENG_PMU::PerfCounter::Mmap() /** * Enable */ -int KUNPENG_PMU::PerfCounter::Enable() +int KUNPENG_PMU::PerfCounterDefault::Enable() { if (groupFd != -1) { // Only group leader should use ioctl to enable, disable or reset, @@ -416,7 +416,7 @@ int KUNPENG_PMU::PerfCounter::Enable() return SUCCESS; } -int KUNPENG_PMU::PerfCounter::Disable() +int KUNPENG_PMU::PerfCounterDefault::Disable() { if (groupFd != -1) { return SUCCESS; @@ -428,7 +428,7 @@ int KUNPENG_PMU::PerfCounter::Disable() return err; } -int KUNPENG_PMU::PerfCounter::Reset() +int KUNPENG_PMU::PerfCounterDefault::Reset() { if (groupFd != -1) { return SUCCESS; @@ -436,7 +436,7 @@ int KUNPENG_PMU::PerfCounter::Reset() return PerfEvt::Reset(); } -int KUNPENG_PMU::PerfCounter::Close() +int KUNPENG_PMU::PerfCounterDefault::Close() { if (this->countMmap && this->countMmap->base && this->countMmap->base != MAP_FAILED) { munmap(this->countMmap->base, COUNT_PAGE_SIZE); diff --git a/pmu/perf_counter_default.h b/pmu/perf_counter_default.h new file mode 100644 index 0000000..1a84dd7 --- /dev/null +++ b/pmu/perf_counter_default.h @@ -0,0 +1,76 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Gan + * Create: 2024-04-03 + * Description: declaration of class PerfCounterDefault that inherits from PerfCounter. + ******************************************************************************/ +#ifndef PMU_COUNTER_DEFAULT_H +#define PMU_COUNTER_DEFAULT_H + +#include +#include +#include +#include "evt.h" +#include "pmu_event.h" +#include "perf_counter.h" + +#define REQUEST_USER_ACCESS 0x2 + +struct ReadFormat { + __u64 value; + __u64 timeEnabled; + __u64 timeRunning; + __u64 id; +}; + +namespace KUNPENG_PMU { + static constexpr int COUNT_PAGE_SIZE = 4096; + class PerfCounterDefault : public PerfCounter { + public: + using PerfCounter::PerfCounter; + ~PerfCounterDefault() + {} + int Init(const bool groupEnable, const int groupFd, const int resetOutputFd) override; + int Read(EventData &eventData) override; + int MapPerfAttr(const bool groupEnable, const int groupFd) override; + int Enable() override; + int Disable() override; + int Reset() override; + int Close() override; + + private: + enum class GroupStatus + { + NO_GROUP, + GROUP_LEADER, + GROUP_MEMBER + }; + int Mmap(); + int MapPerfAttrUserAccess(); + int CountValueToData(const __u64 value, const __u64 timeEnabled, + const __u64 timeRunning, __u64 &accumCount, std::vector &data); + int ReadSingleEvent(std::vector &data); + int ReadGroupEvents(std::vector &data); + + // Accumulated pmu count, time enabled and time running. + __u64 enabled = 0; + __u64 running = 0; + // For group events, is the accum counts of all members. + // For normal events, has only one element. + std::vector<__u64> accumCount; + int groupFd = 0; + GroupStatus groupStatus = GroupStatus::NO_GROUP; + // reg index is stored in countMmap->base + std::shared_ptr countMmap = nullptr; + bool isCollect{false}; + }; +} // namespace KUNPENG_PMU +#endif diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 2c8ef8b..5dfb9d5 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -293,6 +293,30 @@ static int CheckUserAccess(enum PmuTaskType collectType, struct PmuAttr *attr) return SUCCESS; } +static int CheckBpfMode(enum PmuTaskType collectType, struct PmuAttr *attr) +{ + if (!attr->enableBpf) { + return SUCCESS; + } + #ifndef BPF_ENABLED + New(LIBPERF_ERR_INVALID_BPF_PARAM, "No compilation of 'bpf=true' to support bpf mode"); + return LIBPERF_ERR_INVALID_BPF_PARAM; + #endif + if (collectType != COUNTING) { + New(LIBPERF_ERR_INVALID_BPF_PARAM, "Bpf mode only support counting"); + return LIBPERF_ERR_INVALID_BPF_PARAM; + } + if (attr->cgroupNameList == nullptr && attr->pidList == nullptr) { + New(LIBPERF_ERR_INVALID_BPF_PARAM, "Bpf mode need collect pid or cgroup"); + return LIBPERF_ERR_INVALID_BPF_PARAM; + } + if (attr->evtAttr != nullptr) { + New(LIBPERF_ERR_INVALID_BPF_PARAM, "Bpf mode doesn't support event group now"); + return LIBPERF_ERR_INVALID_BPF_PARAM; + } + return SUCCESS; +} + static int CheckAttr(enum PmuTaskType collectType, struct PmuAttr *attr) { auto err = CheckUserAccess(collectType, attr); @@ -338,6 +362,11 @@ static int CheckAttr(enum PmuTaskType collectType, struct PmuAttr *attr) return err; } + err = CheckBpfMode(collectType, attr); + if (err != SUCCESS) { + New(err); + return err; + } return SUCCESS; } @@ -907,10 +936,19 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt taskParam->cpuList[i] = pmuEvt->cpuMaskList[i]; } } else if (attr->cpuList == nullptr && attr->pidList != nullptr && pmuEvt->collectType == COUNTING) { - // For counting with pid list for system wide, open fd with cpu -1 and specific pid. - taskParam->numCpu = 1; - taskParam->cpuList = new int[taskParam->numCpu]; - taskParam->cpuList[0] = -1; + if(attr->enableBpf) { + // collect data from all system cores in bpf mode + taskParam->numCpu = MAX_CPU_NUM; + taskParam->cpuList = new int[MAX_CPU_NUM]; + for(int i = 0; i < MAX_CPU_NUM; i++) { + taskParam->cpuList[i] = i; + } + } else { + // For counting with pid list for system wide, open fd with cpu -1 and specific pid. + taskParam->numCpu = 1; + taskParam->cpuList = new int[taskParam->numCpu]; + taskParam->cpuList[0] = -1; + } } else if (attr->cpuList == nullptr) { // For null cpulist, open fd with cpu 0,1,2...max_cpu const set &onLineCpus = GetOnLineCpuIds(); @@ -997,6 +1035,8 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att if (attr->enableUserAccess) { taskParam->pmuEvt->config1 = REQUEST_USER_ACCESS; } + taskParam->pmuEvt->numEvent = attr->numEvt; + taskParam->pmuEvt->enableBpf = attr->enableBpf; return taskParam.release(); } diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h index 6a7f8cb..d8cdd63 100644 --- a/pmu/pmu_event.h +++ b/pmu/pmu_event.h @@ -52,6 +52,8 @@ struct PmuEvt { int cgroupFd; std::string cgroupName; unsigned enableUserAccess : 1; // avoid uncore (config1 & 0x2) == 0x2 + unsigned numEvent; // pmu event number for bpf cgroup init + unsigned enableBpf : 1; // enable bpf mode in counting mode }; namespace KUNPENG_PMU { diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 31b131b..af5cec1 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -28,6 +28,10 @@ #include "pmu_event_list.h" #include "pmu_list.h" #include "pfm_event.h" +#include "evt_list_default.h" +#ifdef BPF_ENABLED + #include "bpf/evt_list_bpf.h" +#endif using namespace std; using namespace pcerr; @@ -87,10 +91,19 @@ namespace KUNPENG_PMU { return err; } fdNum += CalRequireFd(cpuTopoList.size(), procTopoList.size(), taskParam->pmuEvt->collectType); - std::shared_ptr evtList = - std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId); - evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd)); - InsertEvtList(pd, evtList); + #ifdef BPF_ENABLED + if (taskParam->pmuEvt->enableBpf) { + std::shared_ptr evtList = + std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId); + InsertEvtList(pd, evtList); + } else + #endif + { + std::shared_ptr evtList = + std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId); + evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd)); + InsertEvtList(pd, evtList); + } pmuTaskAttrHead = pmuTaskAttrHead->next; } @@ -100,9 +113,12 @@ namespace KUNPENG_PMU { return symbolErrNo; } - auto err = CheckRlimit(fdNum); - if (err != SUCCESS) { - return err; + int err; + if (!taskParam->pmuEvt->enableBpf) { // in bpf mode, cpuSize * proSize whill exceed rlimit + err = CheckRlimit(fdNum); + if (err != SUCCESS) { + return err; + } } err = Init(pd); @@ -739,7 +755,6 @@ namespace KUNPENG_PMU { } auto& evData = dataList[pd]; - if (GetTaskType(pd) == COUNTING) { std::vector newPmuData; AggregateUncoreData(pd, evData.data, newPmuData); diff --git a/test/test_perf/CMakeLists.txt b/test/test_perf/CMakeLists.txt index 0978ab6..21c3677 100644 --- a/test/test_perf/CMakeLists.txt +++ b/test/test_perf/CMakeLists.txt @@ -12,6 +12,11 @@ add_compile_options(-g) set(CMAKE_CXX_STANDARD 14) aux_source_directory(. SOURCE_SRC) add_executable(test_perf ${SOURCE_SRC} ${CMAKE_CURRENT_LIST_DIR}/../../util/pcerr.cpp) -target_link_libraries(test_perf sym kperf gtest m gtest_main elf_static dwarf_static pthread -g) + +set(COMMON_LIBS sym kperf gtest m gtest_main elf_static dwarf_static pthread -g) +if (BPF) + set(BPF_LIBS bpf) +endif() +target_link_libraries(test_perf ${COMMON_LIBS} ${BPF_LIBS}) add_subdirectory(case) diff --git a/util/pcerr.cpp b/util/pcerr.cpp index eba1369..6d9c53c 100644 --- a/util/pcerr.cpp +++ b/util/pcerr.cpp @@ -64,7 +64,9 @@ namespace pcerr { {LIBPERF_ERR_COUNT_MMAP_IS_NULL, "Count mmap page is null!"}, {LIBPERF_ERR_ENABLE_USER_ACCESS_FAILED, "Enable user access failed!"}, {LIBPERF_ERR_ALLOCATE_REGISTER_FAILED, "Allocate register failed!"}, - {LIBPERF_ERR_CHECK_USER_ACCESS, "Check user access failed!"} + {LIBPERF_ERR_CHECK_USER_ACCESS, "Check user access failed!"}, + {LIBPERF_ERR_INVALID_BPF_PARAM, "check bpf mode failed"}, + {LIBPERF_ERR_BPF_ACT_FAILED, "failed to execute bpf obj action"} }; static std::unordered_map warnMsgs = { {LIBPERF_WARN_CTXID_LOST, "Some SPE context packets are not found in the traces."}, -- Gitee