From 77049bde169f71d4ab4be90ed7388315b82b795c Mon Sep 17 00:00:00 2001 From: "lijindong (C)" <2220386943@qq.com> Date: Sat, 29 Nov 2025 17:45:09 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0enable=5Fexec=5Fon=EF=BC=8C?= =?UTF-8?q?=E9=80=82=E9=85=8Dlauch=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Details_Usage.md | 93 ++++++++++++++++++ go/src/libkperf/kperf/kperf.go | 9 ++ include/pcerrc.h | 1 + include/pmu.h | 3 + pmu/evt.h | 1 + pmu/evt_list_default.cpp | 20 +++- pmu/perf_counter_default.cpp | 5 +- pmu/pmu.cpp | 19 ++++ pmu/pmu_event.cpp | 18 ++++ pmu/pmu_event.h | 6 ++ pmu/pmu_list.cpp | 5 + pmu/sampler.cpp | 18 +++- pmu/spe.cpp | 81 ++++++++++++---- pmu/spe.h | 4 +- pmu/spe_sampler.cpp | 2 +- python/modules/_libkperf/Pmu.py | 17 +++- python/modules/_libkperf/Symbol.py | 145 ----------------------------- python/modules/kperf/perror.py | 1 + python/modules/kperf/pmu.py | 4 +- symbol/symbol.h | 12 --- test/test_perf/test_api.cpp | 23 +++++ util/process_map.cpp | 6 ++ util/process_map.h | 14 +++ 23 files changed, 324 insertions(+), 183 deletions(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 0baafac..bedac26 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -2047,3 +2047,96 @@ HIP_L1 1952 |——4009cc sum_a(void*)+0x1c8 /home/test/libkperf/example/case/falsesharing_long.c:33 [151] |——400bbc inc_b(void*)+0x1cc /home/test/libkperf/example/case/falsesharing_long.c:59 [114] ``` + +### 通过使能enableExecOn的方式采集进程 +```c++ +#include "pmu.h" +#include "pcerrc.h" +#include "symbol.h" + +#include +#include +#include +#include +#include +#include +#include + +int main() { + std::vector comms; + comms.push_back("ls"); + comms.push_back("-l"); + int fd[2]; + pipe(fd); + pid_t pid = fork(); + if (pid == 0) { + close(fd[1]); + char buf[4]; + int ret = read(fd[0], buf, 4); + if (ret < 1) { + std::cout << "read error" << std::endl; + exit(EXIT_FAILURE); + } + //启动进程数据 + char **argv = new char*[comms.size() + 1]; + for (size_t i = 0; i < comms.size(); ++i) { + argv[i] = strdup(comms[i].c_str()); + } + argv[comms.size()] = NULL; + execvp(argv[0], argv); + perror("exec commands failed!"); + for (size_t i = 0; i < comms.size(); ++i) { + free(argv[i]); + } + delete []argv; + exit(EXIT_FAILURE); + } else { + close(fd[0]); + PmuAttr attr = {0}; + int pidList[1] = {pid}; + attr.numPid = 1; + attr.pidList = pidList; + + char* evtList[1] = {"cycles"}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.symbolMode = RESOLVE_ELF_DWARF; + attr.period = 4096; + attr.enableOnExec = 1; + + int pd = PmuOpen(SAMPLING, &attr); + if (pd == -1) { + std::cout << Perror() << std::endl; + kill(pid, 9); + return 1; + } else { + std::cout << "pmu open success" << std::endl; + } + + int ret = write(fd[1], "data", 4); + if (ret < 0) { + kill(pid, 9); + std::cout << "pipe write error" << std::endl; + return 1; + } + + PmuData* data = nullptr; + sleep(1); + int len = PmuRead(pd, &data); + for (int i = 0; i < len; i++) { + std::cout << "comm=" << data[i].comm << " " << data[i].ts << " " << data[i].pid << " " << data[i].tid; + + if (data[i].stack) { + auto result = data[i].stack; + if (result->symbol != nullptr) { + Symbol *data = result->symbol; + std::cout << std::hex << data->addr << " " << data->symbolName << "+0x" << data->offset << " " << data->codeMapAddr << " (" << data->module << ")" << " (" << std::dec << data->fileName << ":" << data->lineNum << ")" << std::endl; + } + } else { + std::cout << std::endl; + } + } + kill(pid, 9); + } +} +``` \ No newline at end of file diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index e1d29ab..bd67a8b 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -89,6 +89,10 @@ void SetEnableHwMetric(struct PmuAttr* attr, unsigned enableHwMetric) { attr->enableHwMetric = enableHwMetric; } +void SetEnableOnExec(struct PmuAttr* attr, unsigned enableOnExec) { + attr->enableOnExec = enableOnExec; +} + struct PmuData* IPmuRead(int fd, int* len) { struct PmuData* pmuData = NULL; *len = PmuRead(fd, &pmuData); @@ -407,6 +411,7 @@ type PmuAttr struct { EnableUserAccess bool // enable user access counting for current process EnableBpf bool // enable bpf mode for counting EnableHwMetric bool // enable hw metric + EnableOnExec bool // enable enable_on_exec, after PmuOpen is called, if the load is started, enabling enable_on_exec will automatically enable the performance event after the load starts,withoud the need to call PmuEnable } type CpuTopology struct { @@ -679,6 +684,10 @@ func ToCPmuAttr(attr PmuAttr) (*C.struct_PmuAttr, int) { C.SetEnableHwMetric(cAttr, C.uint(1)) } + if attr.EnableOnExec { + C.SetEnableOnExec(cAttr, C.uint(1)) + } + return cAttr, 0 } diff --git a/include/pcerrc.h b/include/pcerrc.h index 47e797b..853af06 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -132,6 +132,7 @@ extern "C" { #define LIBPERF_ERR_INVALID_MIN_LATENCY 1085 #define LIBPERF_ERR_INVALID_EVT_FILTER 1086 #define LIBPERF_ERR_INVALID_DATA_FILTER 1087 +#define LIBPERF_ERR_NOT_SUPPORT_EXEC_ON 1088 #define UNKNOWN_ERROR 9999 diff --git a/include/pmu.h b/include/pmu.h index 2856a3f..076c40e 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -179,6 +179,9 @@ struct PmuAttr { unsigned enableBpf : 1; // enable hw metric unsigned enableHwMetric : 1; + // enable enable_on_exec + // after PmuOpen is called, if the load is started, enabling enable_on_exec will automatically enable the performance event after the load starts,withoud the need to call PmuEnable. + unsigned enableOnExec : 1; }; enum PmuTraceType { diff --git a/pmu/evt.h b/pmu/evt.h index f4b0264..2f67d75 100644 --- a/pmu/evt.h +++ b/pmu/evt.h @@ -20,6 +20,7 @@ #include #include #include +#include "process_map.h" #include "common.h" #include "symbol.h" #include "pmu_event.h" diff --git a/pmu/evt_list_default.cpp b/pmu/evt_list_default.cpp index ba43d9b..4535e64 100644 --- a/pmu/evt_list_default.cpp +++ b/pmu/evt_list_default.cpp @@ -188,7 +188,12 @@ void KUNPENG_PMU::EvtListDefault::FillFields( } data[i].groupId = this->groupId; if (data[i].comm == nullptr) { - data[i].comm = procTopo->comm; + // If process has a fork call, it will generate a new pid and add a new comm. + if (data[i].pid > 0 && procMap.find(data[i].pid) != procMap.end()) { + data[i].comm = procMap[data[i].pid]->comm; + } else { + data[i].comm = procTopo->comm; + } } if (data[i].ts == 0) { data[i].ts = this->ts; @@ -229,6 +234,19 @@ int KUNPENG_PMU::EvtListDefault::Read(EventData &eventData) } } + // Due to the enable_on_exec being enabled, before launching, pmuopen will record its own comm,which needs to be replaced. + if (this->pmuEvt->enableOnExec) { + for (auto &pmuData : eventData.data) { + if (procMap.find(pmuData.pid) == procMap.end()) { + continue; + } + auto proc = procMap[pmuData.pid]; + if (proc->execComm != nullptr && pmuData.ts >= proc->execTs && pmuData.comm != proc->execComm) { + pmuData.comm = proc->execComm; + } + } + } + for (auto rowList : this->xyCounterArray) { for (auto evt : rowList) { int err = evt->EndRead(); diff --git a/pmu/perf_counter_default.cpp b/pmu/perf_counter_default.cpp index 4a6e5fe..6700f96 100644 --- a/pmu/perf_counter_default.cpp +++ b/pmu/perf_counter_default.cpp @@ -299,6 +299,10 @@ int KUNPENG_PMU::PerfCounterDefault::MapPerfAttr(const bool groupEnable, const i attr.exclude_kernel = this->evt->excludeKernel; attr.exclude_user = this->evt->excludeUser; + if (this->evt->enableOnExec) { + attr.enable_on_exec = 1; + } + /** * if no permission try setting exclude_kernel=1. */ @@ -324,7 +328,6 @@ int KUNPENG_PMU::PerfCounterDefault::MapPerfAttr(const bool groupEnable, const i * and any child events are initialized with disabled bit set to 0. Despite disabled bit being set to 0, * the child events will not start counting until the group leader is enabled. */ - if (groupFd != -1) { attr.disabled = 0; groupStatus = GroupStatus::GROUP_MEMBER; diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 6fd27d4..37e0884 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -382,6 +382,19 @@ static int CheckHwMetric(enum PmuTaskType collectType, struct PmuAttr* attr) { return SUCCESS; } +static int CheckEnableOnExec(struct PmuAttr* attr) { + if (!attr->enableOnExec) { + return SUCCESS; + } + + if (!attr->numPid) { + New(LIBPERF_ERR_NOT_SUPPORT_EXEC_ON, "EnableExecOn can't be enabled without specifying a process"); + return LIBPERF_ERR_NOT_SUPPORT_EXEC_ON; + } + + return SUCCESS; +} + static int CheckAttr(enum PmuTaskType collectType, struct PmuAttr *attr) { auto err = CheckUserAccess(collectType, attr); @@ -438,6 +451,11 @@ static int CheckAttr(enum PmuTaskType collectType, struct PmuAttr *attr) return err; } + err = CheckEnableOnExec(attr); + if (err != SUCCESS) { + return err; + } + return SUCCESS; } @@ -1093,6 +1111,7 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att } taskParam->pmuEvt->numEvent = attr->numEvt; taskParam->pmuEvt->enableBpf = attr->enableBpf; + taskParam->pmuEvt->enableOnExec = attr->enableOnExec; return taskParam.release(); } diff --git a/pmu/pmu_event.cpp b/pmu/pmu_event.cpp index 046e4c5..9925ce1 100644 --- a/pmu/pmu_event.cpp +++ b/pmu/pmu_event.cpp @@ -38,4 +38,22 @@ namespace KUNPENG_PMU { return UNKNOWN_ERROR; } } + + /** + * Currently, read timestamp from perf record sample, and this interface can be extendes as needes. + */ + struct PerfSampleInfo GetPerfSampleInfo(__u64 sampleType, PerfEvent* event) + { + const __u64* arr = event->sample.array; + arr += ((event->header.size - sizeof(event->header))) / sizeof(__u64) - 1; + __u64 skipList[4] = {PERF_SAMPLE_IDENTIFIER, PERF_SAMPLE_CPU, PERF_SAMPLE_STREAM_ID, PERF_SAMPLE_ID}; + for (int i = 0; i < 4; i++) { + if (sampleType & skipList[i]) { + arr--; + } + } + struct PerfSampleInfo info = {0}; + info.time = *arr; + return info; + } } // namespace KUNPENG_PMU diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h index 09a4a9c..da31525 100644 --- a/pmu/pmu_event.h +++ b/pmu/pmu_event.h @@ -55,6 +55,7 @@ struct PmuEvt { unsigned numEvent; // pmu event number for bpf cgroup init unsigned enableBpf : 1; // enable bpf mode in counting mode unsigned enableHwMetric : 1; // enable hw_metric=1 in sampling mode + unsigned enableOnExec : 1; // set enable_on_exec = 1 }; namespace KUNPENG_PMU { @@ -73,6 +74,10 @@ struct PerfRawSample { unsigned long ips[]; }; +struct PerfSampleInfo { + __u64 time; +}; + struct TraceRawData { __u32 size; char data[]; @@ -206,6 +211,7 @@ struct EventData { }; int MapErrno(int sysErr); +struct PerfSampleInfo GetPerfSampleInfo(__u64 sampleType, PerfEvent* event); } // namespace KUNPENG_PMU #endif #endif diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index c8d1b1d..95cf503 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -1174,6 +1174,11 @@ namespace KUNPENG_PMU { return SUCCESS; } + // avoid loading the symbols of the process itself + if (taskParam->pmuEvt->enableOnExec) { + return SUCCESS; + } + if (this->symModeList[pd] == RESOLVE_ELF) { for (const auto& pid: pidList) { int rt = SymResolverRecordModuleNoDwarf(pid); diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index 865c979..5bd2ebd 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -37,6 +37,8 @@ using namespace std; +static const __u64 SAMPLING_SAMPLE_TYPE = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD | PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_RAW; + int KUNPENG_PMU::PerfSampler::MapPerfAttr(const bool groupEnable, const int groupFd) { struct perf_event_attr attr; @@ -45,8 +47,7 @@ int KUNPENG_PMU::PerfSampler::MapPerfAttr(const bool groupEnable, const int grou attr.config = this->evt->config; attr.config2 = this->evt->config2; attr.size = sizeof(struct perf_event_attr); - attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_ID | - PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD | PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_RAW; + attr.sample_type = SAMPLING_SAMPLE_TYPE; // if the branch sample type is not nullptr, set the branch sample type. if (branchSampleFilter != KPERF_NO_BRANCH_SAMPLE) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -75,6 +76,11 @@ int KUNPENG_PMU::PerfSampler::MapPerfAttr(const bool groupEnable, const int grou attr.task = 1; attr.sample_id_all = 1; attr.exclude_guest = 1; + + if (this->evt->enableOnExec) { + attr.enable_on_exec = 1; + } + if ((this->evt->blockedSample == 1) && (this->evt->name == "context-switches")) { attr.exclude_kernel = 0; // for confrim the reason of entering off cpu, it need to include kernel. attr.context_switch = 1; @@ -152,6 +158,7 @@ void KUNPENG_PMU::PerfSampler::UpdatePidInfo(const int &tid) void KUNPENG_PMU::PerfSampler::UpdateCommInfo(KUNPENG_PMU::PerfEvent *event) { + auto sampleInfo = GetPerfSampleInfo(SAMPLING_SAMPLE_TYPE, event); auto findProc = procMap.find(event->comm.tid); if (findProc == procMap.end()) { std::shared_ptr procTopo(new ProcTopology{0}, FreeProcTopo); @@ -164,6 +171,13 @@ void KUNPENG_PMU::PerfSampler::UpdateCommInfo(KUNPENG_PMU::PerfEvent *event) strcpy(procTopo->comm, event->comm.comm); DBG_PRINT("Add to proc map: %d\n", event->comm.tid); procMap[event->comm.tid] = procTopo; + } else { + findProc->second->execComm = static_cast(malloc(strlen(event->comm.comm) + 1)); + if (findProc->second->execComm == nullptr) { + return; + } + strcpy(findProc->second->execComm, event->comm.comm); + findProc->second->execTs = sampleInfo.time; } } diff --git a/pmu/spe.cpp b/pmu/spe.cpp index ca4f9ed..5f178cf 100644 --- a/pmu/spe.cpp +++ b/pmu/spe.cpp @@ -47,7 +47,7 @@ struct AuxContext { size_t auxSize; }; -static int OpenSpeEvent(PmuEvt *pmuAttr, int cpu) +static int OpenSpeEvent(PmuEvt *pmuAttr, int cpu, int pid) { struct perf_event_attr attr = {0}; @@ -66,8 +66,14 @@ static int OpenSpeEvent(PmuEvt *pmuAttr, int cpu) attr.exclude_kernel = pmuAttr->excludeKernel; attr.exclude_user = pmuAttr->excludeUser; + if (pmuAttr->enableOnExec) { + attr.enable_on_exec = 1; + attr.inherit = 1; + } else { + pid = -1; + } + unsigned flags = 0; - pid_t pid = -1; if (!pmuAttr->cgroupName.empty()) { flags = PERF_FLAG_PID_CGROUP | PERF_FLAG_FD_CLOEXEC; pid = pmuAttr->cgroupFd; @@ -75,7 +81,7 @@ static int OpenSpeEvent(PmuEvt *pmuAttr, int cpu) return PerfEventOpen(&attr, pid, cpu, -1, flags); } -static int OpenDummyEvent(int cpu) +static int OpenDummyEvent(PmuEvt *pmuAttr, int cpu, int pid) { struct perf_event_attr attr = {0}; @@ -94,7 +100,14 @@ static int OpenDummyEvent(int cpu) attr.inherit = 1; attr.exclude_guest = 1; - return PerfEventOpen(&attr, -1, cpu, -1, 0); + if (pmuAttr->enableOnExec) { + attr.enable_on_exec = 1; + attr.comm = 1; + } else { + pid = -1; + } + + return PerfEventOpen(&attr, pid, cpu, -1, 0); } static int PerfReadTscConversion(const struct perf_event_mmap_page *pc, struct PerfTscConversion *tc) @@ -168,13 +181,13 @@ static int CoreSpeOpenFailed(struct SpeCoreContext **ctx, struct SpeContext *spe return LIBPERF_ERR_SPE_UNAVAIL; } -static int CoreSpeOpen(struct SpeCoreContext **ctx, struct SpeContext *speCtx, PmuEvt *attr, int cpu) +static int CoreSpeOpen(struct SpeCoreContext **ctx, struct SpeContext *speCtx, PmuEvt *attr, int cpu, int pid) { int ret = -1; struct perf_event_mmap_page *mp = nullptr; (*ctx)->cpu = cpu; - (*ctx)->speFd = OpenSpeEvent(attr, cpu); + (*ctx)->speFd = OpenSpeEvent(attr, cpu, pid); if ((*ctx)->speFd < 0) { auto err = MapErrno(errno); ERR_PRINT("failed to open spe\n"); @@ -199,7 +212,7 @@ static int CoreSpeOpen(struct SpeCoreContext **ctx, struct SpeContext *speCtx, P return CoreSpeOpenFailed(ctx, speCtx); } - (*ctx)->dummyFd = OpenDummyEvent(cpu); + (*ctx)->dummyFd = OpenDummyEvent(attr, cpu, pid); if ((*ctx)->dummyFd < 0) { ERR_PRINT("failed to open dummy event fd\n"); return CoreSpeOpenFailed(ctx, speCtx); @@ -214,7 +227,7 @@ static int CoreSpeOpen(struct SpeCoreContext **ctx, struct SpeContext *speCtx, P return SUCCESS; } -int SpeOpen(PmuEvt *attr, int cpu, SpeContext *ctx) +int SpeOpen(PmuEvt *attr, int cpu, SpeContext *ctx, int pid) { int pageSize = sysconf(_SC_PAGESIZE); @@ -241,7 +254,7 @@ int SpeOpen(PmuEvt *attr, int cpu, SpeContext *ctx) ctx->coreCtxes->mask = ctx->auxMmapSize - 1; ctx->coreCtxes->prev = 0; - auto err = CoreSpeOpen(&ctx->coreCtxes, ctx, attr, cpu); + auto err = CoreSpeOpen(&ctx->coreCtxes, ctx, attr, cpu, pid); if (err != 0) { free(ctx->coreCtxes); ctx->coreCtxes = nullptr; @@ -339,6 +352,32 @@ void Spe::UpdateProcMap(__u32 ppid, __u32 pid) } } +void Spe::UpdateCommProcMap(struct KUNPENG_PMU::PerfRecordComm* recordComm) +{ + union KUNPENG_PMU::PerfEvent *event = (union KUNPENG_PMU::PerfEvent *)recordComm; + auto sampleInfo = GetPerfSampleInfo(PERF_SAMPLE_TIME, event); + auto findProc = procMap.find(recordComm->tid); + if (findProc == procMap.end()) { + std::shared_ptr procTopo(new ProcTopology{0}, FreeProcTopo); + procTopo->tid = recordComm->tid; + procTopo->pid = recordComm->pid; + procTopo->comm = static_cast(malloc(strlen(recordComm->comm) + 1)); + if (procTopo->comm == nullptr) { + return; + } + strcpy(procTopo->comm, recordComm->comm); + DBG_PRINT("Add to proc map: %d\n", recordComm->tid); + procMap[recordComm->tid] = procTopo; + } else { + findProc->second->execComm = static_cast(malloc(strlen(recordComm->comm) + 1)); + if (findProc->second->execComm == nullptr) { + return; + } + strcpy(findProc->second->execComm, recordComm->comm); + findProc->second->execTs = sampleInfo.time; + } +} + static void ParseContextSwitch(PerfEventSampleContextSwitch *contextSample, ContextSwitchData *data, uint64_t *num, ContextSwitchData *lastSwitchOut) { @@ -400,17 +439,23 @@ int Spe::CoreDummyData(struct SpeCoreContext *context, struct ContextSwitchData if (header->type == PERF_RECORD_FORK) { struct PerfRecordFork *sample = (struct PerfRecordFork *)header; DBG_PRINT("Fork pid: %d tid: %d\n", sample->pid, sample->tid); - if (sample->pid == sample->tid) { - // A new process is forked and the parent pid is ppid. - UpdateProcMap(sample->ppid, sample->tid); - } else { - // A new thread is created and the parent pid is pid(process id). - UpdateProcMap(sample->pid, sample->tid); - } + if (sample->pid == sample->tid) { + // A new process is forked and the parent pid is ppid. + UpdateProcMap(sample->ppid, sample->tid); + } else { + // A new thread is created and the parent pid is pid(process id). + UpdateProcMap(sample->pid, sample->tid); + } dataTail += header->size; continue; } + if (header->type == PERF_RECORD_COMM) { + struct PerfRecordComm *sample = (struct PerfRecordComm *)header; + DBG_PRINT("Comm exec pid: %d tid: %d\n", sample->pid, sample->tid); + UpdateCommProcMap(sample); + } + if ((off + header->size) > mpage->data_size || header->type != PERF_RECORD_SWITCH_CPU_WIDE) { /* skip the wrap record or invalid record */ dataTail += header->size; @@ -572,14 +617,14 @@ int Spe::SpeReadData(struct SpeContext *context, struct SpeRecord *buf, int size return size - remainSize; } -int Spe::Open(PmuEvt *attr) +int Spe::Open(PmuEvt *attr, int pid) { if (status == NONE) { ctx = (struct SpeContext *)malloc(sizeof(struct SpeContext)); if (!ctx) { return COMMON_ERR_NOMEM; } - auto err = SpeOpen(attr, cpu, ctx); + auto err = SpeOpen(attr, cpu, ctx, pid); if (err != SUCCESS) { return err; } diff --git a/pmu/spe.h b/pmu/spe.h index dd23d22..424aede 100644 --- a/pmu/spe.h +++ b/pmu/spe.h @@ -25,6 +25,7 @@ #include #include #include +#include "process_map.h" #include "pmu_event.h" #include "symbol.h" @@ -158,7 +159,7 @@ public: * @return true * @return false */ - int Open(PmuEvt *attr); + int Open(PmuEvt *attr, int pid); /** * @brief Start collect. @@ -213,6 +214,7 @@ private: int SpeReadData(struct SpeContext *context, struct SpeRecord *buf, int size); int CoreDummyData(struct SpeCoreContext *context, struct ContextSwitchData *data, int size, int pageSize); void UpdateProcMap(__u32 ppid, __u32 pid); + void UpdateCommProcMap(struct KUNPENG_PMU::PerfRecordComm* recordComm); const unsigned short NONE = 0; const unsigned short OPENED = 1 << 0; diff --git a/pmu/spe_sampler.cpp b/pmu/spe_sampler.cpp index 1e2f139..e0a3295 100644 --- a/pmu/spe_sampler.cpp +++ b/pmu/spe_sampler.cpp @@ -49,7 +49,7 @@ namespace KUNPENG_PMU { } findSpe = speSet.emplace(this->cpu, Spe(this->cpu, procMap, symMode)).first; - auto err = findSpe->second.Open(evt); + auto err = findSpe->second.Open(evt, pid); if (err != SUCCESS) { speSet.erase(this->cpu); return err; diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index 426db73..8f3f10e 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -127,6 +127,8 @@ class CtypesPmuAttr(ctypes.Structure): unsigned numCgroup; // length of cgroup list unsigned enableUserAccess : 1; // enable user access counting for current process unsigned enableBpf : 1; // enable bpf mode for counting + unsigned enableHwMetric : 1; // enable hw metric + unsigned enableOnExec: 1; // enable enable_on_exec, after PmuOpen is called, if the load is started, enabling enable_on_exec will automatically enable the performance event after the load starts,withoud the need to call PmuEnable }; """ @@ -156,6 +158,7 @@ class CtypesPmuAttr(ctypes.Structure): ('enableUserAccess', ctypes.c_uint, 1), ('enableBpf', ctypes.c_uint, 1), ('enableHwMetric', ctypes.c_uint, 1), + ('enableOnExec', ctypes.c_uint, 1), ] def __init__(self, @@ -180,6 +183,7 @@ class CtypesPmuAttr(ctypes.Structure): enableUserAccess=False, enableBpf=False, enableHwMetric=False, + enableOnExec=False, *args, **kw): super(CtypesPmuAttr, self).__init__(*args, **kw) @@ -244,6 +248,7 @@ class CtypesPmuAttr(ctypes.Structure): self.enableUserAccess = enableUserAccess self.enableBpf = enableBpf self.enableHwMetric = enableHwMetric + self.enableOnExec = enableOnExec class PmuAttr(object): __slots__ = ['__c_pmu_attr'] @@ -268,7 +273,8 @@ class PmuAttr(object): cgroupNameList=None, enableUserAccess=False, enableBpf=False, - enableHwMetric=False): + enableHwMetric=False, + enableOnExec=False): self.__c_pmu_attr = CtypesPmuAttr( evtList=evtList, @@ -291,6 +297,7 @@ class PmuAttr(object): enableUserAccess=enableUserAccess, enableBpf=enableBpf, enableHwMetric=enableHwMetric, + enableOnExec=enableOnExec, ) @property @@ -316,6 +323,14 @@ class PmuAttr(object): @enableHwMetric.setter def enableHwMetric(self, enableHwMetric): self.c_pmu_attr.enableHwMetric = int(enableHwMetric) + + @property + def enableOnExec(self): + return bool(self.c_pmu_attr.enableOnExec) + + @enableOnExec.setter + def enableOnExec(self, enableOnExec): + self.c_pmu_attr.enableOnExec = int(enableOnExec) @property def c_pmu_attr(self): diff --git a/python/modules/_libkperf/Symbol.py b/python/modules/_libkperf/Symbol.py index 5f874c9..97819f1 100644 --- a/python/modules/_libkperf/Symbol.py +++ b/python/modules/_libkperf/Symbol.py @@ -486,149 +486,6 @@ class StackAsm: stack_asm.__c_stack_asm = c_stack_asm return stack_asm - -class CtypesProcTopology(ctypes.Structure): - """ - struct ProcTopology { - int pid; - int tid; - int ppid; - int numChild; - int* childPid; - char* comm; - char* exe; - bool kernel; - }; - """ - - _fields_ = [ - ('pid', ctypes.c_int), - ('tid', ctypes.c_int), - ('ppid', ctypes.c_int), - ('numChild', ctypes.c_int), - ('childPid', ctypes.POINTER(ctypes.c_int)), - ('comm', ctypes.c_char_p), - ('exe', ctypes.c_char_p), - ('kernel', ctypes.c_bool), - ] - - def __init__(self, - pid= 0, - tid= 0, - ppid= 0, - childPid= None, - comm= '', - exe= '', - kernel= False, - *args, **kw): - super(CtypesProcTopology, self).__init__(*args, **kw) - self.pid = ctypes.c_int(pid) - self.tid = ctypes.c_int(tid) - self.ppid = ctypes.c_int(ppid) - if childPid: - numChildPid = len(childPid) - self.childPid = (ctypes.c_int * numChildPid)(*childPid) - self.numChild = ctypes.c_int(numChildPid) - else: - self.childPid = None - self.numChild = ctypes.c_int(0) - self.comm = ctypes.c_char_p(comm.encode(UTF_8)) - self.exe = ctypes.c_char_p(exe.encode(UTF_8)) - self.kernel = ctypes.c_bool(kernel) - - -class ProcTopology: - - __slots__ = ['__c_proc_topology'] - - def __init__(self, - pid= 0, - tid= 0, - ppid= 0, - childPid= None, - comm= '', - exe= '', - kernel= False): - self.__c_proc_topology = CtypesProcTopology( - pid = pid, - tid=tid, - ppid=ppid, - childPid=childPid, - comm=comm, - exe=exe, - kernel=kernel - ) - - @property - def c_proc_topology(self): - return self.__c_proc_topology - - @property - def pid(self): - return self.c_proc_topology.pid - - @pid.setter - def pid(self, pid): - self.c_proc_topology.pid = ctypes.c_int(pid) - - @property - def tid(self): - return self.c_proc_topology.tid - - @tid.setter - def tid(self, tid): - self.c_proc_topology.tid = ctypes.c_int(tid) - - - @property - def ppid(self): - return self.c_proc_topology.ppid - - @ppid.setter - def ppid(self, ppid): - self.c_proc_topology.ppid = ctypes.c_int(ppid) - - @property - def numChild(self): - return self.c_proc_topology.numChild - - @property - def childPid(self): - return [self.c_proc_topology.childPid[i] for i in range(self.numChild)] - - @childPid.setter - def childPid(self, childPid): - if childPid: - numChildPid = len(childPid) - self.c_proc_topology.childPid = (ctypes.c_int * numChildPid)(*childPid) - self.c_proc_topology.numChild = ctypes.c_int(numChildPid) - else: - self.c_proc_topology.childPid = None - self.c_proc_topology.numChild = ctypes.c_int(0) - - @property - def comm(self): - return self.c_proc_topology.comm.decode(UTF_8) - - @comm.setter - def comm(self, comm): - self.c_proc_topology.comm = ctypes.c_char_p(comm.encode(UTF_8)) - - @property - def exe(self): - return self.c_proc_topology.exe.decode(UTF_8) - - @exe.setter - def exe(self, exe): - self.c_proc_topology.exe = ctypes.c_char_p(exe.encode(UTF_8)) - - @classmethod - def from_c_proc_topology(cls, c_proc_topology): - proc_topology = cls() - proc_topology.__c_proc_topology = c_proc_topology - return proc_topology - - def SymResolverRecordKernel(): """ int SymResolverRecordKernel(); @@ -735,8 +592,6 @@ __all__ = [ 'AsmCode', 'CtypesStackAsm', 'StackAsm', - 'CtypesProcTopology', - 'ProcTopology', 'SymResolverRecordKernel', 'SymResolverRecordModule', 'SymResolverRecordModuleNoDwarf', diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index fa41a4d..d6a708a 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -131,6 +131,7 @@ class Error: LIBPERF_ERR_INVALID_MIN_LATENCY = 1085 LIBPERF_ERR_INVALID_EVT_FILTER = 1086 LIBPERF_ERR_INVALID_DATA_FILTER = 1087 + LIBPERF_ERR_NOT_SUPPORT_EXEC_ON = 1088 UNKNOWN_ERROR = 9999 diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index bfdcc3f..98033f9 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -346,7 +346,8 @@ class PmuAttr(_libkperf.PmuAttr): branchSampleFilter = 0, cgroupNameList = None, enableUserAccess = False, - enableBpf = False): + enableBpf = False, + enableOnExec = False): super(PmuAttr, self).__init__( evtList=evtList, pidList=pidList, @@ -367,6 +368,7 @@ class PmuAttr(_libkperf.PmuAttr): cgroupNameList=cgroupNameList, enableUserAccess=enableUserAccess, enableBpf=enableBpf, + enableOnExec=enableOnExec, ) diff --git a/symbol/symbol.h b/symbol/symbol.h index bab9951..235df4b 100644 --- a/symbol/symbol.h +++ b/symbol/symbol.h @@ -128,18 +128,6 @@ void FreeModuleData(int pid); void FreeAsmStack(struct StackAsm* stackAsm); int SymGetBuildId(const char *moduleName, char **buildId); - -struct ProcTopology { - int pid; - int tid; - int ppid; - int numChild; - int* childPid; - char* comm; - char* exe; - bool kernel; - bool isMain; -}; #ifdef __cplusplus } #endif diff --git a/test/test_perf/test_api.cpp b/test/test_perf/test_api.cpp index 8d5215b..ee574f2 100644 --- a/test/test_perf/test_api.cpp +++ b/test/test_perf/test_api.cpp @@ -797,3 +797,26 @@ TEST_F(TestAPI, InvalidBpfAttr) ASSERT_EQ(pd, -1); #endif } + +TEST_F(TestAPI, TestEnableExeOnFailed) +{ + PmuAttr attr = {0}; + char *evtList[1]; + evtList[0] = (char *)"cycles"; + attr.evtList = evtList; + attr.numEvt = 1; + attr.enableOnExec = 1; + pd = PmuOpen(COUNTING, &attr); + ASSERT_EQ(pd, -1); + ASSERT_EQ(LIBPERF_ERR_NOT_SUPPORT_EXEC_ON, Perrorno()); + pd = PmuOpen(SAMPLING, &attr); + ASSERT_EQ(pd, -1); + ASSERT_EQ(LIBPERF_ERR_NOT_SUPPORT_EXEC_ON, Perrorno()); + auto speAttr = GetSpeAttribute(); + speAttr.pidList = nullptr; + speAttr.numPid = 0; + speAttr.enableOnExec = 1; + pd = PmuOpen(SPE_SAMPLING, &attr); + ASSERT_EQ(pd, -1); + ASSERT_EQ(LIBPERF_ERR_NOT_SUPPORT_EXEC_ON, Perrorno()); +} \ No newline at end of file diff --git a/util/process_map.cpp b/util/process_map.cpp index 538617c..e7554ac 100644 --- a/util/process_map.cpp +++ b/util/process_map.cpp @@ -43,6 +43,12 @@ void FreeProcTopo(struct ProcTopology *procTopo) free(procTopo->exe); procTopo->exe = nullptr; } + + if (procTopo->execComm != nullptr) { + free(procTopo->execComm); + procTopo->execComm = nullptr; + } + delete procTopo; } diff --git a/util/process_map.h b/util/process_map.h index f771d4c..b9ff11d 100644 --- a/util/process_map.h +++ b/util/process_map.h @@ -20,6 +20,20 @@ extern "C" { #endif +struct ProcTopology { + int pid; + int tid; + int ppid; + int numChild; + int* childPid; + char* comm; + char* exe; + bool kernel; + bool isMain; + char* execComm; + unsigned long execTs; +}; + struct ProcTopology* GetProcTopology(pid_t pid); void FreeProcTopo(struct ProcTopology *procTopo); int* GetChildTid(int pid, int* numChild); -- Gitee