From 19b591aa5571ab12e2d16b55b0a3099432931830 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 11 Apr 2025 16:47:33 +0800 Subject: [PATCH 01/48] =?UTF-8?q?=E9=80=82=E9=85=8Dx86,=E4=BB=85=E6=94=AF?= =?UTF-8?q?=E6=8C=81count=E5=92=8Csampling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/pcerrc.h | 2 +- pmu/evt.cpp | 7 ++ pmu/pfm/core.cpp | 139 ++++++++++++++++++++++++++++++++- pmu/pfm/pfm.cpp | 21 ++--- pmu/pfm/pfm_name.cpp | 9 +++ pmu/pfm/pfm_name.h | 9 +++ pmu/pfm/trace.cpp | 1 - pmu/pfm/uncore.cpp | 57 ++++++++++++-- pmu/pmu.cpp | 32 ++++++-- pmu/pmu_event.h | 2 +- pmu/pmu_event_list.cpp | 15 ++++ pmu/pmu_metric.cpp | 20 +++++ pmu/pmu_trace_analysis.cpp | 16 ++++ pmu/sample_process.cpp | 11 +++ pmu/sampler.cpp | 6 ++ pmu/spe.h | 6 ++ python/modules/kperf/perror.py | 2 +- util/common.h | 8 ++ util/cpu_map.cpp | 4 + util/cpu_map.h | 1 + util/pcerr.cpp | 1 + 21 files changed, 341 insertions(+), 28 deletions(-) diff --git a/include/pcerrc.h b/include/pcerrc.h index 767a48b..3a71491 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -106,7 +106,7 @@ extern "C" { #define LIBPERF_ERR_CPUFREQ_NOT_CONFIG 1062 #define LIBPERF_ERR_CLUSTER_ID_OVERSIZE 1063 #define LIBPERF_ERR_INVALID_PMU_BDF_TYPE 1064 -#define LIBPERF_ERR_NOT_SUPPORT_METRIC 1065 +#define LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 1065 #define UNKNOWN_ERROR 9999 diff --git a/pmu/evt.cpp b/pmu/evt.cpp index db27e0c..4d3b934 100644 --- a/pmu/evt.cpp +++ b/pmu/evt.cpp @@ -113,10 +113,17 @@ __u64 KUNPENG_PMU::ReadOnce(__u64 *head) : "memory"); break; case HEAD_SIZE::HEAD_SIZE_EIGHT: +#ifdef IS_X86 + asm volatile("mov %0, %1" + : "=r"(*(__u64 __attribute__((__may_alias__)) *)pointerUnion.charHead) + : "Q"(*head) + : "memory"); +#else asm volatile("ldar %0, %1" : "=r"(*(__u64 __attribute__((__may_alias__)) *)pointerUnion.charHead) : "Q"(*head) : "memory"); +#endif break; default: break; diff --git a/pmu/pfm/core.cpp b/pmu/pfm/core.cpp index 35fd1a7..32cd3a9 100644 --- a/pmu/pfm/core.cpp +++ b/pmu/pfm/core.cpp @@ -255,6 +255,33 @@ namespace HARDWARE_EVENT { KUNPENG_PMU::COMMON::BUS_CYCLES } }; + + PMU_PAIR REF_CYCLES = { + KUNPENG_PMU::COMMON::REF_CYCLES, + { + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_REF_CPU_CYCLES, + KUNPENG_PMU::COMMON::REF_CYCLES + } + }; + + PMU_PAIR BRANCHES = { + KUNPENG_PMU::COMMON::BRANCHES, + { + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + KUNPENG_PMU::COMMON::BRANCHES + } + }; + + PMU_PAIR BRANCH_INSTRUCTIONS = { + KUNPENG_PMU::COMMON::BRANCH_INSTRUCTIONS, + { + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + KUNPENG_PMU::COMMON::BRANCH_INSTRUCTIONS + } + }; } // namespace hardware event namespace HW_CACHE_EVENT { @@ -312,6 +339,26 @@ namespace HW_CACHE_EVENT { } }; + + PMU_PAIR LLC_STORE_MISSES = { + KUNPENG_PMU::COMMON::LLC_STORE_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10102, + KUNPENG_PMU::COMMON::LLC_STORE_MISSES + } + }; + + PMU_PAIR LLC_STORES = { + KUNPENG_PMU::COMMON::LLC_STORES, + { + PERF_TYPE_HW_CACHE, + 0x102, + KUNPENG_PMU::COMMON::LLC_STORES + } + }; + + PMU_PAIR BRANCH_LOAD_MISSES = { KUNPENG_PMU::COMMON::BRANCH_LOAD_MISSES, { @@ -366,6 +413,42 @@ namespace HW_CACHE_EVENT { } }; + PMU_PAIR NODE_LOAD_MISSES = { + KUNPENG_PMU::COMMON::NODE_LOAD_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10006, + KUNPENG_PMU::COMMON::NODE_LOAD_MISSES + } + }; + + PMU_PAIR NODE_LOADS = { + KUNPENG_PMU::COMMON::NODE_LOADS, + { + PERF_TYPE_HW_CACHE, + 0x6, + KUNPENG_PMU::COMMON::NODE_LOADS + } + }; + + PMU_PAIR NODE_STORE_MISSES = { + KUNPENG_PMU::COMMON::NODE_STORE_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10106, + KUNPENG_PMU::COMMON::NODE_STORE_MISSES + } + }; + + PMU_PAIR NODE_STORES = { + KUNPENG_PMU::COMMON::NODE_STORES, + { + PERF_TYPE_HW_CACHE, + 0x106, + KUNPENG_PMU::COMMON::NODE_STORES + } + }; + PMU_PAIR L1_DCACHE_STORE_MISSES = { KUNPENG_PMU::COMMON::L1_DCACHE_STORE_MISSES, { @@ -872,12 +955,58 @@ const std::unordered_map HIP_E_CORE_PMU_MA SOFTWARE_EVENT::TASK_CLOCK, }; +const std::unordered_map HIP_X86_CORE_PMU_MAP{ + HARDWARE_EVENT::BRANCH_MISSES, + HARDWARE_EVENT::CACHE_MISSES, + HARDWARE_EVENT::CACHE_REFERENCES, + HARDWARE_EVENT::CPU_CYCLES, + HARDWARE_EVENT::CYCLES, + HARDWARE_EVENT::INSTRUCTIONS, + HARDWARE_EVENT::BUS_CYCLES, + HARDWARE_EVENT::REF_CYCLES, + HARDWARE_EVENT::BRANCH_INSTRUCTIONS, + HARDWARE_EVENT::BRANCHES, + + SOFTWARE_EVENT::ALIGNMENT_FAULTS, + SOFTWARE_EVENT::BPF_OUTPUT, + SOFTWARE_EVENT::CONTEXT_SWITCHES, + SOFTWARE_EVENT::CS, + SOFTWARE_EVENT::CPU_CLOCK, + SOFTWARE_EVENT::CPU_MIGRATIONS, + SOFTWARE_EVENT::MIGRATIONS, + SOFTWARE_EVENT::DUMMY, + SOFTWARE_EVENT::EMULATION_FAULTS, + SOFTWARE_EVENT::MAJOR_FAULTS, + SOFTWARE_EVENT::MINOR_FAULTS, + SOFTWARE_EVENT::PAGE_FAULTS, + SOFTWARE_EVENT::FAULTS, + SOFTWARE_EVENT::TASK_CLOCK, + + HW_CACHE_EVENT::L1_DCACHE_LOAD_MISSES, + HW_CACHE_EVENT::L1_DCACHE_LOADS, + HW_CACHE_EVENT::L1_ICACHE_LOAD_MISSES, + HW_CACHE_EVENT::L1_ICACHE_LOADS, + HW_CACHE_EVENT::LLC_LOAD_MISSES, + HW_CACHE_EVENT::LLC_LOADS, + HW_CACHE_EVENT::LLC_STORE_MISSES, + HW_CACHE_EVENT::LLC_STORES, + HW_CACHE_EVENT::BRANCH_LOAD_MISSES, + HW_CACHE_EVENT::BRANCH_LOADS, + HW_CACHE_EVENT::DTLB_LOAD_MISSES, + HW_CACHE_EVENT::DTLB_LOADS, + HW_CACHE_EVENT::NODE_LOAD_MISSES, + HW_CACHE_EVENT::NODE_LOADS, + HW_CACHE_EVENT::NODE_STORE_MISSES, + HW_CACHE_EVENT::NODE_STORES, +}; + const KUNPENG_PMU::CORE_EVT_MAP KUNPENG_PMU::CORE_EVENT_MAP = { {CHIP_TYPE::HIPA, HIP_A_CORE_PMU_MAP}, {CHIP_TYPE::HIPB, HIP_B_CORE_PMU_MAP}, {CHIP_TYPE::HIPC, HIP_C_CORE_PMU_MAP}, {CHIP_TYPE::HIPF, HIP_F_CORE_PMU_MAP}, {CHIP_TYPE::HIPE, HIP_E_CORE_PMU_MAP}, + {CHIP_TYPE::HIPX86, HIP_X86_CORE_PMU_MAP}, }; static struct PmuEvt* ConstructPmuEvtFromCore(KUNPENG_PMU::CoreConfig config, int collectType) @@ -888,7 +1017,6 @@ static struct PmuEvt* ConstructPmuEvtFromCore(KUNPENG_PMU::CoreConfig config, in pmuEvtPtr->type = config.type; pmuEvtPtr->pmuType = KUNPENG_PMU::CORE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } @@ -951,7 +1079,6 @@ static struct PmuEvt* ConstructPmuEvtFromKernel(const char* pmuName, int collect pmuEvtPtr->type = type; pmuEvtPtr->pmuType = KUNPENG_PMU::CORE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } @@ -981,6 +1108,13 @@ std::string GetPmuDevicePath() } struct dirent *dent; while (dent = readdir(dir)) { +#ifdef IS_X86 + // look for devices like /sys/bus/event_source/devices/cpu/events + if (strcmp(dent->d_name, "cpu") == 0) { + pmuDevice = DEVICE_PATH + dent->d_name; + break; + } +#else if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..") || !strcmp(dent->d_name, "cpu")) { continue; } @@ -992,6 +1126,7 @@ std::string GetPmuDevicePath() pmuDevice = DEVICE_PATH + dent->d_name; break; } +#endif } closedir(dir); return pmuDevice; diff --git a/pmu/pfm/pfm.cpp b/pmu/pfm/pfm.cpp index 7d4aa7f..e516dd5 100644 --- a/pmu/pfm/pfm.cpp +++ b/pmu/pfm/pfm.cpp @@ -54,7 +54,6 @@ static struct PmuEvt* GetRawEvent(const char* pmuName, int collectType) pmuEvtPtr->type = PERF_TYPE_RAW; pmuEvtPtr->pmuType = CORE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } @@ -120,26 +119,31 @@ static bool CheckRawEvent(const char *pmuName) static int GetEventType(const char *pmuName) { - if (pmuName[0] == 'r' && CheckRawEvent(pmuName)) { - return RAW_TYPE; - } if (CheckEventInList(CORE_EVENT, pmuName)) { return CORE_TYPE; } - std::string strName(pmuName); - // Kernel trace point event name like 'block:block_bio_complete' - if (CheckEventInList(TRACE_EVENT, pmuName)) { - return TRACE_TYPE; + + if (pmuName[0] == 'r' && CheckRawEvent(pmuName)) { + return RAW_TYPE; } + std::string strName(pmuName); // Parse uncore event name like 'hisi_sccl3_ddrc0/flux_rd/' if (CheckEventInList(UNCORE_EVENT, pmuName)) { return UNCORE_TYPE; } +#ifdef IS_X86 + return -1; +#else + // Kernel trace point event name like 'block:block_bio_complete' + if (CheckEventInList(TRACE_EVENT, pmuName)) { + return TRACE_TYPE; + } // Parse uncore event raw name like 'hisi_sccl3_ddrc0/config=0x0/' // or smmuv3_pmcg_100020/transaction,filter_enable=1,filter_stream_id=0x7d/ if (CheckUncoreRawEvent(pmuName)) { return UNCORE_RAW_TYPE; } +#endif return -1; } @@ -175,7 +179,6 @@ struct PmuEvt* PfmGetSpeEvent( evt->config = dataFilter; evt->config1 = eventFilter; evt->config2 = minLatency; - evt->cpumask = -1; return evt; } diff --git a/pmu/pfm/pfm_name.cpp b/pmu/pfm/pfm_name.cpp index 0ef9a90..7baa0f6 100644 --- a/pmu/pfm/pfm_name.cpp +++ b/pmu/pfm/pfm_name.cpp @@ -24,6 +24,9 @@ const char* KUNPENG_PMU::COMMON::BRANCH_MISSES = "branch-misses"; const char* KUNPENG_PMU::COMMON::BUS_CYCLES = "bus-cycles"; const char* KUNPENG_PMU::COMMON::CACHE_MISSES = "cache-misses"; +const char* KUNPENG_PMU::COMMON::REF_CYCLES = "ref-cycles"; +const char* KUNPENG_PMU::COMMON::BRANCHES = "branches"; +const char* KUNPENG_PMU::COMMON::BRANCH_INSTRUCTIONS = "branch-instructions"; const char* KUNPENG_PMU::COMMON::CACHE_REFERENCES = "cache-references"; const char* KUNPENG_PMU::COMMON::CPU_CYCLES = "cpu-cycles"; const char* KUNPENG_PMU::COMMON::CYCLES = "cycles"; @@ -40,12 +43,18 @@ const char* KUNPENG_PMU::COMMON::IDLE_CYCLES_FRONTEND = "idle-cycles-frontend"; const char* KUNPENG_PMU::COMMON::L1_ICACHE_LOADS = "L1-icache-loads"; const char* KUNPENG_PMU::COMMON::LLC_LOAD_MISSES = "LLC-load-misses"; const char* KUNPENG_PMU::COMMON::LLC_LOADS = "LLC-loads"; +const char* KUNPENG_PMU::COMMON::LLC_STORE_MISSES = "LLC-store-misses"; +const char* KUNPENG_PMU::COMMON::LLC_STORES = "LLC-stores"; const char* KUNPENG_PMU::COMMON::BRANCH_LOAD_MISSES = "branch-load-misses"; const char* KUNPENG_PMU::COMMON::BRANCH_LOADS = "branch-loads"; const char* KUNPENG_PMU::COMMON::DTLB_LOAD_MISSES = "dTLB-load-misses"; const char* KUNPENG_PMU::COMMON::DTLB_LOADS = "dTLB-loads"; const char* KUNPENG_PMU::COMMON::ITLB_LOAD_MISSES = "iTLB-load-misses"; const char* KUNPENG_PMU::COMMON::ITLB_LOADS = "iTLB-loads"; +const char* KUNPENG_PMU::COMMON::NODE_LOAD_MISSES = "node-load-misses"; +const char* KUNPENG_PMU::COMMON::NODE_LOADS = "node-loads"; +const char* KUNPENG_PMU::COMMON::NODE_STORE_MISSES = "node-store-misses"; +const char* KUNPENG_PMU::COMMON::NODE_STORES = "node-stores"; const char* KUNPENG_PMU::COMMON::ALIGNMENT_FAULTS = "alignment-faults"; const char* KUNPENG_PMU::COMMON::BPF_OUTPUT = "bpf-output"; const char* KUNPENG_PMU::COMMON::CONTEXT_SWITCHES = "context-switches"; diff --git a/pmu/pfm/pfm_name.h b/pmu/pfm/pfm_name.h index ec528d5..c5e5905 100644 --- a/pmu/pfm/pfm_name.h +++ b/pmu/pfm/pfm_name.h @@ -25,6 +25,9 @@ extern const char* BRANCH_MISSES; extern const char* BUS_CYCLES; extern const char* CACHE_MISSES; extern const char* CACHE_REFERENCES; +extern const char* REF_CYCLES; +extern const char* BRANCHES; +extern const char* BRANCH_INSTRUCTIONS; extern const char* CPU_CYCLES; extern const char* CYCLES; extern const char* INSTRUCTIONS; @@ -40,12 +43,18 @@ extern const char* IDLE_CYCLES_FRONTEND; extern const char* L1_ICACHE_LOADS; extern const char* LLC_LOAD_MISSES; extern const char* LLC_LOADS; +extern const char* LLC_STORE_MISSES; +extern const char* LLC_STORES; extern const char* BRANCH_LOAD_MISSES; extern const char* BRANCH_LOADS; extern const char* DTLB_LOAD_MISSES; extern const char* DTLB_LOADS; extern const char* ITLB_LOAD_MISSES; extern const char* ITLB_LOADS; +extern const char* NODE_LOAD_MISSES; +extern const char* NODE_LOADS; +extern const char* NODE_STORE_MISSES; +extern const char* NODE_STORES; // Software event extern const char* ALIGNMENT_FAULTS; extern const char* BPF_OUTPUT; diff --git a/pmu/pfm/trace.cpp b/pmu/pfm/trace.cpp index a435635..54c5bec 100644 --- a/pmu/pfm/trace.cpp +++ b/pmu/pfm/trace.cpp @@ -58,6 +58,5 @@ struct PmuEvt* GetKernelTraceEvent(const char* pmuName, int collectType) pmuEvtPtr->type = PERF_TYPE_TRACEPOINT; pmuEvtPtr->pmuType = TRACE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } diff --git a/pmu/pfm/uncore.cpp b/pmu/pfm/uncore.cpp index a3e655b..e6fca1a 100644 --- a/pmu/pfm/uncore.cpp +++ b/pmu/pfm/uncore.cpp @@ -44,23 +44,49 @@ static int GetDeviceType(const string &devName) return stoi(typeStr); } -static int GetCpuMask(const string &devName) +static std::vector GetCpuMask(const string &devName) { + std::vector maskList; string maskPath = "/sys/devices/" + devName + "/cpumask"; std::string realPath = GetRealPath(maskPath); if (!IsValidPath(realPath)) { - return -1; + return maskList; } ifstream maskIn(realPath); if (!maskIn.is_open()) { - return -1; + return maskList; } // Cpumask is a comma-separated list of integers, // but now make it simple for ddrc event. - string maskStr; + char maskStr[1024]; maskIn >> maskStr; - return stoi(maskStr); + char *tokStr = strtok(maskStr, ","); + while (tokStr != nullptr) { + if (strstr(tokStr, "-") != nullptr) { + int minCpu, maxCpu; + if (sscanf(tokStr, "%d-%d", &minCpu, &maxCpu) != 2) { + continue; + } + for (int i = minCpu; i <= maxCpu; i++) { + maskList.push_back(i); + } + } else { + int aloneNumber; + if (sscanf(tokStr, "%d", &aloneNumber) == 1) { + maskList.push_back(aloneNumber); + } + } + tokStr = strtok(nullptr, ","); + } + return maskList; +} + +static int64_t TransferStrToHex(const std::string& str) { + int64_t intData; + std::istringstream iss(str); + iss >> std::hex >> intData; + return intData; } static int64_t GetUncoreEventConfig(const char* pmuName) @@ -85,10 +111,25 @@ static int64_t GetUncoreEventConfig(const char* pmuName) if (findEq == string::npos) { return -1; } + +#ifdef IS_X86 + auto umaskEq = configStr.find("umask"); + if (umaskEq != string::npos) { + auto CommaEq = configStr.find(","); + if (CommaEq == string::npos) { + return -1; + } + auto lowStr = configStr.substr(findEq + 1, CommaEq - findEq); + int64_t low = TransferStrToHex(lowStr); + auto highStr = configStr.substr(umaskEq + 6, configStr.size() - umaskEq - 6); + int64_t high = TransferStrToHex(highStr); + config = (high << 8) + low; + return config; + } +#endif auto subStr = configStr.substr(findEq + 1, configStr.size() - findEq); std::istringstream iss(subStr); iss >> std::hex >> config; - return config; } @@ -103,8 +144,8 @@ int FillUncoreFields(const char* pmuName, PmuEvt *evt) return UNKNOWN_ERROR; } evt->type = devType; - int cpuMask = GetCpuMask(devName); - evt->cpumask = cpuMask; + std::vector cpuMaskList = GetCpuMask(devName); + evt->cpuMaskList = cpuMaskList; evt->name = pmuName; return SUCCESS; } diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 44414ca..c033dfc 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -174,6 +174,12 @@ static int CheckCollectTypeConfig(enum PmuTaskType collectType, struct PmuAttr * New(LIBPERF_ERR_INVALID_TASK_TYPE); return LIBPERF_ERR_INVALID_TASK_TYPE; } +#ifdef IS_X86 + if (collectType != COUNTING && collectType != SAMPLING) { + New(LIBPERF_ERR_INVALID_TASK_TYPE, "The x86 architecture supports only the COUTING mode and SMAPLING mode"); + return LIBPERF_ERR_INVALID_TASK_TYPE; + } +#endif if ((collectType == COUNTING) && attr->evtList == nullptr) { New(LIBPERF_ERR_INVALID_EVTLIST, "Counting mode requires a non-null event list."); return LIBPERF_ERR_INVALID_EVTLIST; @@ -205,7 +211,6 @@ static int CheckCollectTypeConfig(enum PmuTaskType collectType, struct PmuAttr * New(LIBPERF_ERR_INVALID_GROUP_SPE); return LIBPERF_ERR_INVALID_GROUP_SPE; } - return SUCCESS; } @@ -789,10 +794,12 @@ static struct PmuEvt* GetPmuEvent(const char* pmuName, int collectType) static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt) { - if (pmuEvt->cpumask >= 0) { - taskParam->numCpu = 1; - taskParam->cpuList = new int[1]; - taskParam->cpuList[0] = pmuEvt->cpumask; + if (!pmuEvt->cpuMaskList.empty()) { + taskParam->numCpu = pmuEvt->cpuMaskList.size(); + taskParam->cpuList = new int[pmuEvt->cpuMaskList.size()]; + for(int i = 0; i < pmuEvt->cpuMaskList.size(); i++) { + taskParam->cpuList[i] = pmuEvt->cpuMaskList[i]; + } } else if (attr->cpuList == nullptr && attr->pidList != nullptr && pmuEvt->collectType == COUNTING) { // For counting with pid list for system wide, open fd with cpu -1 and specific pid. taskParam->numCpu = 1; @@ -839,7 +846,11 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att } else { pmuEvt = GetPmuEvent(evtName, collectType); if (pmuEvt == nullptr) { +#ifdef IS_X86 + New(LIBPERF_ERR_INVALID_EVENT, "Invalid event: " + string(evtName) + ";x86 just supports core event and raw event"); +#else New(LIBPERF_ERR_INVALID_EVENT, "Invalid event: " + string(evtName)); +#endif return nullptr; } } @@ -953,22 +964,33 @@ int PmuDumpData(struct PmuData *pmuData, unsigned len, char *filepath, int dumpD } int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *value, uint32_t vSize) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86; +#else if (rawData == nullptr) { New(LIBPERF_ERR_INVALID_FIELD_ARGS, "rawData cannot be nullptr."); return LIBPERF_ERR_INVALID_FIELD_ARGS; } return PointerPasser::ParsePointer(rawData->data, fieldName, value, vSize); +#endif } struct SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char *fieldName) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return nullptr; +#else if (rawData == nullptr) { New(LIBPERF_ERR_INVALID_FIELD_ARGS, "rawData cannot be nullptr."); return nullptr; } + SampleRawField *rt = PointerPasser::GetSampleRawField(rawData->data, fieldName); if (rt) { New(SUCCESS); } return rt; +#endif } diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h index 8455377..6fc6a72 100644 --- a/pmu/pmu_event.h +++ b/pmu/pmu_event.h @@ -38,7 +38,7 @@ struct PmuEvt { int pmuType; // if pmu is CORE/UNCORE/SPE and etc (to be implemented) int collectType; std::string name; // string name of this pmu event - int cpumask; // a representative CPU number for each socket (package) in the motherboard. + std::vector cpuMaskList; // representative CPU number list for each socket (package) in the motherboard. unsigned excludeUser : 1; // don't count user unsigned excludeKernel : 1; // don't count kernel unsigned callStack : 1; // collect complete call stack diff --git a/pmu/pmu_event_list.cpp b/pmu/pmu_event_list.cpp index c85bc52..3a3bc0d 100644 --- a/pmu/pmu_event_list.cpp +++ b/pmu/pmu_event_list.cpp @@ -37,7 +37,11 @@ static const string EVENT_DIR = "/events/"; static std::mutex pmuEventListMtx; +#ifdef IS_X86 +static vector supportDevPrefixs = {"uncore_iio", "uncore_imc"}; +#else static vector supportDevPrefixs = {"hisi", "smmuv3", "hns3", "armv8"}; +#endif static vector uncoreEventList; static vector traceEventList; @@ -57,6 +61,12 @@ static void GetEventName(const string& devName, vector& eventList) continue; } string fileName(entry->d_name); +#ifdef IS_X86 + // Included in x86 .scale .unit files not for events + if (fileName.find('.') != string::npos) { + continue; + } +#endif auto eventName = devName; eventName += SLASH + fileName; eventName += SLASH; @@ -182,6 +192,10 @@ const char** QueryUncoreEvent(unsigned *numEvt) const char** QueryTraceEvent(unsigned *numEvt) { +#ifdef IS_X86 + *numEvt = 0; + return nullptr; +#else if (!traceEventList.empty()) { *numEvt = traceEventList.size(); return traceEventList.data(); @@ -207,6 +221,7 @@ const char** QueryTraceEvent(unsigned *numEvt) closedir(dir); *numEvt = traceEventList.size(); return traceEventList.data(); +#endif } const char** QueryAllEvent(unsigned *numEvt) { diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index a2163a1..eedf8dd 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -1271,6 +1271,10 @@ using namespace KUNPENG_PMU; const char** PmuDeviceBdfList(enum PmuBdfType bdfType, unsigned *numBdf) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return nullptr; +#else try { lock_guard lg(pmuBdfListMtx); SetWarn(SUCCESS); @@ -1302,6 +1306,7 @@ const char** PmuDeviceBdfList(enum PmuBdfType bdfType, unsigned *numBdf) New(UNKNOWN_ERROR, ex.what()); return nullptr; } +#endif } static void PmuBdfListFreeSingle(vector &bdfList) @@ -1324,6 +1329,10 @@ void PmuDeviceBdfListFree() int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else SetWarn(SUCCESS); try { if (CheckPmuDeviceAttr(attr, len) != SUCCESS) { @@ -1357,6 +1366,7 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } static int CheckPmuDeviceVar(struct PmuData *pmuData, unsigned len, @@ -1384,6 +1394,10 @@ int PmuGetDevMetric(struct PmuData *pmuData, unsigned len, struct PmuDeviceAttr *attr, unsigned attrLen, struct PmuDeviceData **data) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else SetWarn(SUCCESS); try { if (CheckPmuDeviceVar(pmuData, len, attr, attrLen) != SUCCESS) { @@ -1436,6 +1450,7 @@ int PmuGetDevMetric(struct PmuData *pmuData, unsigned len, New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } void DevDataFree(struct PmuDeviceData *data) @@ -1482,6 +1497,10 @@ static void InitializeCoreArray() int PmuGetClusterCore(unsigned clusterId, unsigned **coreList) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else try { lock_guard lg(pmuCoreListMtx); InitializeCoreArray(); @@ -1513,6 +1532,7 @@ int PmuGetClusterCore(unsigned clusterId, unsigned **coreList) New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } int PmuGetNumaCore(unsigned nodeId, unsigned **coreList) diff --git a/pmu/pmu_trace_analysis.cpp b/pmu/pmu_trace_analysis.cpp index 7dbf4da..c1c6834 100644 --- a/pmu/pmu_trace_analysis.cpp +++ b/pmu/pmu_trace_analysis.cpp @@ -29,6 +29,11 @@ static vector SysCallFuncList; const char** PmuSysCallFuncList(unsigned *numFuncs) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + *numFuncs = 0; + return nullptr; +#else lock_guard lg(SysCallListMtx); SetWarn(SUCCESS); try { @@ -57,6 +62,7 @@ const char** PmuSysCallFuncList(unsigned *numFuncs) New(SUCCESS); *numFuncs = SysCallFuncList.size(); return SysCallFuncList.data(); +#endif } void PmuSysCallFuncListFree() @@ -172,6 +178,10 @@ static char **GeneratePmuAttrEvtList(const char **sysCallFuncs, const unsigned n int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else SetWarn(SUCCESS); auto err = CheckTraceAttr(traceType, traceAttr); if (err != SUCCESS) { @@ -199,6 +209,7 @@ int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr) } return pd; +#endif } int PmuTraceEnable(int pd) @@ -213,6 +224,10 @@ int PmuTraceDisable(int pd) int PmuTraceRead(int pd, struct PmuTraceData **pmuTraceData) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else PmuData *pmuData = nullptr; unsigned len = PmuRead(pd, &pmuData); if (len == -1) { @@ -246,6 +261,7 @@ int PmuTraceRead(int pd, struct PmuTraceData **pmuTraceData) New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } void PmuTraceClose(int pd) diff --git a/pmu/sample_process.cpp b/pmu/sample_process.cpp index fcd1bfa..778b622 100644 --- a/pmu/sample_process.cpp +++ b/pmu/sample_process.cpp @@ -22,6 +22,16 @@ #define PAGE_SIZE (sysconf(_SC_PAGESIZE)) #define MB() asm volatile("dmb ish" ::: "memory") static constexpr int MAX_DATA_SIZE = 8192; +#ifdef IS_X86 +#define PerfRingbufferSmpStoreRelease(p, v) \ + ({ \ + union { \ + typeof(*p) val; \ + char charHead[1]; \ + } pointerUnion = {.val = (v)}; \ + asm volatile("mov %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ + }) +#else #define PerfRingbufferSmpStoreRelease(p, v) \ ({ \ union { \ @@ -30,6 +40,7 @@ static constexpr int MAX_DATA_SIZE = 8192; } pointerUnion = {.val = (v)}; \ asm volatile("stlr %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ }) +#endif void KUNPENG_PMU::PerfMmapConsume(PerfMmap &map) { diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index 57682fb..6cfd684 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -55,7 +55,13 @@ int KUNPENG_PMU::PerfSampler::MapPerfAttr(const bool groupEnable, const int grou attr.read_format = PERF_FORMAT_ID; attr.exclude_kernel = this->evt->excludeKernel; attr.exclude_user = this->evt->excludeUser; +#ifdef IS_X86 + if (this->pid == -1) { + attr.pinned = 0; + } +#else attr.pinned = 1; +#endif attr.disabled = 1; attr.inherit = 1; attr.mmap = 1; diff --git a/pmu/spe.h b/pmu/spe.h index aa4ba0e..f1fe64c 100644 --- a/pmu/spe.h +++ b/pmu/spe.h @@ -28,9 +28,15 @@ #include "pmu_event.h" #include "symbol.h" +#ifdef IS_X86 +#define MB() asm volatile("mfence":::"memory") +#define RMB() asm volatile("lfence":::"memory") +#define WMB() asm volatile("sfence":::"memory") +#else #define MB() asm volatile("dsb sy") #define RMB() asm volatile("dsb ld") #define WMB() asm volatile("dsb st") +#endif #define EVENT_EXCEPTION_GEN 0x1 #define EVENT_RETIRED 0x2 diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index bc8c8e2..f8dcb9c 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -105,7 +105,7 @@ class Error: LIBPERF_ERR_CPUFREQ_NOT_CONFIG = 1062 LIBPERF_ERR_CLUSTER_ID_OVERSIZE = 1063 LIBPERF_ERR_INVALID_PMU_BDF_TYPE = 1064 - LIBPERF_ERR_NOT_SUPPORT_METRIC = 1065 + LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 =1065 UNKNOWN_ERROR = 9999 diff --git a/util/common.h b/util/common.h index 4ffd0c8..f634e4e 100644 --- a/util/common.h +++ b/util/common.h @@ -19,6 +19,14 @@ #include #include +#ifdef __x86_64__ +#define IS_X86 1 +#elif defined(__aarch64__) +#define IS_ARM 1 +#else +#error "Only the x86_64 and aarch64 architecture are supported." +#endif + const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/"; const std::string TRACE_DEBUG_EVENT_PATH = "/sys/kernel/debug/tracing/events/"; diff --git a/util/cpu_map.cpp b/util/cpu_map.cpp index c8e0c9e..4c1577d 100644 --- a/util/cpu_map.cpp +++ b/util/cpu_map.cpp @@ -99,10 +99,14 @@ bool InitCpuType() CHIP_TYPE GetCpuType() { +#ifdef IS_X86 + return HIPX86; +#else if (g_chipType == UNDEFINED_TYPE && !InitCpuType()) { return UNDEFINED_TYPE; } return g_chipType; +#endif } set GetOnLineCpuIds() diff --git a/util/cpu_map.h b/util/cpu_map.h index 6279954..df16320 100644 --- a/util/cpu_map.h +++ b/util/cpu_map.h @@ -27,6 +27,7 @@ enum CHIP_TYPE { HIPC = 3, HIPF = 4, HIPE = 5, + HIPX86 = 6, }; struct CpuTopology* GetCpuTopology(int coreId); diff --git a/util/pcerr.cpp b/util/pcerr.cpp index ab1c083..7d70a3e 100644 --- a/util/pcerr.cpp +++ b/util/pcerr.cpp @@ -53,6 +53,7 @@ namespace pcerr { {LIBPERF_ERR_BRANCH_JUST_SUPPORT_SAMPLING, "branch filter just support sampling mode"}, {LIBPERF_ERR_RESET_FD, "failed to reset fd output"}, {LIBPERF_ERR_SET_FD_RDONLY_NONBLOCK, "failed to set fd readonly and nonbolock"}, + {LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86, "the current interface does not support x86"}, }; static std::unordered_map warnMsgs = { {LIBPERF_WARN_CTXID_LOST, "Some SPE context packets are not found in the traces."}, -- Gitee From 9f86ea51fcfd18543b7aff98d9073bef2e9212d0 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Sat, 19 Apr 2025 10:41:48 +0800 Subject: [PATCH 02/48] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E9=81=97=E6=BC=8F?= =?UTF-8?q?=E4=BA=8B=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/perf_counter.cpp | 7 ++++++- pmu/pfm/core.cpp | 22 ++++++++++++++++++++++ pmu/pfm/pfm_name.cpp | 2 ++ pmu/pfm/pfm_name.h | 2 ++ pmu/pmu_event_list.cpp | 2 +- util/common.cpp | 7 +++++++ util/common.h | 1 + 7 files changed, 41 insertions(+), 2 deletions(-) diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter.cpp index a6417f7..bf801b0 100644 --- a/pmu/perf_counter.cpp +++ b/pmu/perf_counter.cpp @@ -126,8 +126,13 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou attr.disabled = 0; this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } else { - if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE) { +#ifdef IS_X86 + if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "cpu/")) { this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); +#else + if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "armv8_")) { + this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); +#endif } else { this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } diff --git a/pmu/pfm/core.cpp b/pmu/pfm/core.cpp index 32cd3a9..c177fb3 100644 --- a/pmu/pfm/core.cpp +++ b/pmu/pfm/core.cpp @@ -395,6 +395,24 @@ namespace HW_CACHE_EVENT { } }; + PMU_PAIR DTLB_STORE_MISSES = { + KUNPENG_PMU::COMMON::DTLB_STORE_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10103, + KUNPENG_PMU::COMMON::DTLB_STORE_MISSES + } + }; + + PMU_PAIR DTLB_STORES = { + KUNPENG_PMU::COMMON::DTLB_STORES, + { + PERF_TYPE_HW_CACHE, + 0x103, + KUNPENG_PMU::COMMON::DTLB_STORES + } + }; + PMU_PAIR ITLB_LOAD_MISSES = { KUNPENG_PMU::COMMON::ITLB_LOAD_MISSES, { @@ -994,6 +1012,10 @@ const std::unordered_map HIP_X86_CORE_PMU_ HW_CACHE_EVENT::BRANCH_LOADS, HW_CACHE_EVENT::DTLB_LOAD_MISSES, HW_CACHE_EVENT::DTLB_LOADS, + HW_CACHE_EVENT::DTLB_STORE_MISSES, + HW_CACHE_EVENT::DTLB_STORES, + HW_CACHE_EVENT::ITLB_LOADS, + HW_CACHE_EVENT::ITLB_LOAD_MISSES, HW_CACHE_EVENT::NODE_LOAD_MISSES, HW_CACHE_EVENT::NODE_LOADS, HW_CACHE_EVENT::NODE_STORE_MISSES, diff --git a/pmu/pfm/pfm_name.cpp b/pmu/pfm/pfm_name.cpp index 7baa0f6..8686ecd 100644 --- a/pmu/pfm/pfm_name.cpp +++ b/pmu/pfm/pfm_name.cpp @@ -49,6 +49,8 @@ const char* KUNPENG_PMU::COMMON::BRANCH_LOAD_MISSES = "branch-load-misses"; const char* KUNPENG_PMU::COMMON::BRANCH_LOADS = "branch-loads"; const char* KUNPENG_PMU::COMMON::DTLB_LOAD_MISSES = "dTLB-load-misses"; const char* KUNPENG_PMU::COMMON::DTLB_LOADS = "dTLB-loads"; +const char* KUNPENG_PMU::COMMON::DTLB_STORE_MISSES = "dTLB-store-misses"; +const char* KUNPENG_PMU::COMMON::DTLB_STORES = "dTLB-stores"; const char* KUNPENG_PMU::COMMON::ITLB_LOAD_MISSES = "iTLB-load-misses"; const char* KUNPENG_PMU::COMMON::ITLB_LOADS = "iTLB-loads"; const char* KUNPENG_PMU::COMMON::NODE_LOAD_MISSES = "node-load-misses"; diff --git a/pmu/pfm/pfm_name.h b/pmu/pfm/pfm_name.h index c5e5905..1563590 100644 --- a/pmu/pfm/pfm_name.h +++ b/pmu/pfm/pfm_name.h @@ -49,6 +49,8 @@ extern const char* BRANCH_LOAD_MISSES; extern const char* BRANCH_LOADS; extern const char* DTLB_LOAD_MISSES; extern const char* DTLB_LOADS; +extern const char* DTLB_STORE_MISSES; +extern const char* DTLB_STORES; extern const char* ITLB_LOAD_MISSES; extern const char* ITLB_LOADS; extern const char* NODE_LOAD_MISSES; diff --git a/pmu/pmu_event_list.cpp b/pmu/pmu_event_list.cpp index 3a3bc0d..ab83ffd 100644 --- a/pmu/pmu_event_list.cpp +++ b/pmu/pmu_event_list.cpp @@ -38,7 +38,7 @@ static const string EVENT_DIR = "/events/"; static std::mutex pmuEventListMtx; #ifdef IS_X86 -static vector supportDevPrefixs = {"uncore_iio", "uncore_imc"}; +static vector supportDevPrefixs = {"uncore_iio", "uncore_imc", "cpu"}; #else static vector supportDevPrefixs = {"hisi", "smmuv3", "hns3", "armv8"}; #endif diff --git a/util/common.cpp b/util/common.cpp index 647119a..67c4a66 100644 --- a/util/common.cpp +++ b/util/common.cpp @@ -155,4 +155,11 @@ std::string GetTraceEventDir() return TRACE_DEBUG_EVENT_PATH; } return ""; +} + +bool StartWith(const std::string& str, const std::string& prefix) { + if (str.size() < prefix.size()) { + return false; + } + return str.substr(0, prefix.size()) == prefix; } \ No newline at end of file diff --git a/util/common.h b/util/common.h index f634e4e..1c3bcb7 100644 --- a/util/common.h +++ b/util/common.h @@ -41,5 +41,6 @@ std::vector SplitStringByDelimiter(const std::string& str, char del int RaiseNumFd(uint64_t numFd); bool ExistPath(const std::string& filePath); std::string GetTraceEventDir(); +bool StartWith(const std::string& str, const std::string& prefix); #endif // LIBKPROF_COMMON_H -- Gitee From e532bd879762ce2f5703018c285e333ecd024ff8 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 18 Apr 2025 15:32:32 +0800 Subject: [PATCH 03/48] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbdf=E4=B8=BA=E7=A9=BA?= =?UTF-8?q?=E6=97=B6bdf=E6=A0=A1=E9=AA=8C=E5=87=BA=E9=94=99=E9=97=AE?= =?UTF-8?q?=E9=A2=98,=E5=A2=9E=E5=8A=A0=E8=B5=84=E6=96=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Details_Usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 289c03c..815f4d8 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -1078,4 +1078,4 @@ pmu_hotspot 5 1 1 1、只支持SAMPLING模式采集 -2、只支持对进程分析,不支持对系统分析 +2、只支持对进程分析,不支持对系统分析 \ No newline at end of file -- Gitee From 03ba56e894ee62b2c52948de60b1ba9183646e49 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 25 Apr 2025 10:07:24 +0800 Subject: [PATCH 04/48] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dpython=20ksym.get=5Fsta?= =?UTF-8?q?ck=E5=92=8Cget=5Fsymbol=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/modules/_libkperf/Symbol.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/modules/_libkperf/Symbol.py b/python/modules/_libkperf/Symbol.py index 7b3242e..705f1a3 100644 --- a/python/modules/_libkperf/Symbol.py +++ b/python/modules/_libkperf/Symbol.py @@ -645,7 +645,7 @@ def SymResolverRecordModuleNoDwarf(pid: int) -> None: c_SymResolverRecordModuleNoDwarf(c_pid) -def StackToHash(pid: int, stackList: List[int]) -> Iterator[Stack]: +def StackToHash(pid: int, stackList: List[int]) -> Stack: """ struct Stack* StackToHash(int pid, unsigned long* stack, int nr); """ @@ -659,10 +659,9 @@ def StackToHash(pid: int, stackList: List[int]) -> Iterator[Stack]: c_nr = ctypes.c_int(stack_len) c_stack = c_StackToHash(c_pid, c_stack_list, c_nr) - while c_stack: - stack = Stack.from_c_stack(c_stack) - yield stack - c_stack = c_stack.contents.next + if not c_stack: + return None + return Stack.from_c_stack(c_stack.contents) def SymResolverMapAddr(pid: int, addr: int) -> Symbol: @@ -677,8 +676,9 @@ def SymResolverMapAddr(pid: int, addr: int) -> Symbol: c_addr = ctypes.c_ulong(addr) c_sym = c_SymResolverMapAddr(c_pid, c_addr) - - return Symbol.from_c_sym(c_sym) + if not c_sym: + return None + return Symbol.from_c_sym(c_sym.contents) def FreeModuleData(pid: int) -> None: -- Gitee From 0f42d83e4bf98ca8e99217544c9ed9b6d8b77bd2 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Sun, 27 Apr 2025 14:11:32 +0800 Subject: [PATCH 05/48] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=B5=84=E6=96=99?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 10 ++++------ README.md | 10 +++++----- docs/Details_Usage.md | 2 +- docs/Go_API.md | 10 +++++----- go/src/libkperf_test/libkperf_test.go | 2 +- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/README.en.md b/README.en.md index f6a888e..e1f8ee9 100644 --- a/README.en.md +++ b/README.en.md @@ -123,7 +123,7 @@ All pmu functions are accomplished by the following interfaces: Here are some examples: -* Get pmu count for a process. +* Get pmu count for a process ```C++ int pidList[1]; @@ -204,8 +204,7 @@ PmuDataFree(data); PmuClose(pd); ``` -Python examples: - +* Python examples ```python import time from collections import defaultdict @@ -236,8 +235,7 @@ def Counting(): kperf.close(pd) ``` -Go example - +* Go example ```go import "libkperf/kperf" import "fmt" @@ -289,7 +287,7 @@ python example.py ``` * **For Go example Code:** - You can directly go to the go/src/libkperf/libkperf_test directory. + You can directly go to the go/src/libkperf_test directory. ```shell go test -v # run all diff --git a/README.md b/README.md index 0d5c764..007a586 100644 --- a/README.md +++ b/README.md @@ -105,8 +105,7 @@ Go API文档可以参考GO_API.md: 以下是一些示例: -- 获取进程的pmu计数。 - +- 获取进程的pmu计数 ```C++ int pidList[1]; pidList[0] = pid; @@ -181,7 +180,7 @@ PmuDataFree(data); PmuClose(pd); ``` -Python 例子: +- Python 例子 ```python import time from collections import defaultdict @@ -213,7 +212,8 @@ def Counting(): kperf.close(pd) ``` -Go 例子 + +- Go 例子 ```go import "libkperf/kperf" import "fmt" @@ -264,7 +264,7 @@ python example.py ``` * **针对Go示例代码:** -可以直接跳转到 go/src/libkperf/libkperf_test目录下 +可以直接跳转到 go/src/libkperf_test目录下 ```shell go test -v # 全部运行 go test -v -test.run TestCount #指定运行的用例 diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 815f4d8..5c9f6e8 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -297,7 +297,7 @@ c --> d(......) Symbol的字段信息受PmuAttr影响: - PmuAttr.callStack会决定Stack是完整的调用栈,还是只有一层调用栈(即Stack链表只有一个元素)。 - PmuAttr.symbolMode如果等于NO_SYMBOL_RESOLVE,那么PmuData的stack是空指针。 -- PmuAttr.symbolMode如果等于RESOLVE_ELF,那么Symbol的fileName和lineNum没有数据,都等于0,因为没有解析dwarf信息。 +- PmuAttr.symbolMode如果等于RESOLVE_ELF,那么Symbol的fileName和lineNum没有数据,都等于0,因为没有解析dwarf信息(注:kernel的fileName为'[kernel]')。 - PmuAttr.symbolMode如果等于RESOLVE_ELF_DWARF,那么Symbol的所有信息都有效。 ### 采集uncore事件 diff --git a/docs/Go_API.md b/docs/Go_API.md index fa1ba8f..de71c2f 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -340,7 +340,7 @@ func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事 * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count - * PMU_L3_LAT 采集每个numa的L3的总时延,单位:cycles + * PMU_L3_LAT 采集每个cluster的L3的总时延,单位:cycles * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/ns * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns @@ -395,7 +395,7 @@ import "libkperf/kperf" import "fmt" func main() { - clusterId := uint(1) + clusterId := uint(1) coreList, err := kperf.PmuGetClusterCore(clusterId) if err != nil { fmt.Printf("kperf PmuGetClusterCore failed, expect err is nil, but is %v\n", err) @@ -419,7 +419,7 @@ import "libkperf/kperf" import "fmt" func main() { - nodeId := uint(0) + nodeId := uint(0) coreList, err := kperf.PmuGetNumaCore(nodeId) if err != nil { fmt.Printf("kperf PmuGetNumaCore failed, expect err is nil, but is %v\n", err) @@ -436,14 +436,14 @@ func main() { func PmuGetCpuFreq(core uint) (int64, error) 查询当前系统指定core的实时CPU频率 * core cpu coreId -* 返回值为int64, 时当前cpu core的实时频率,出现错误频率为-1,且error不为空 +* 返回值为int64, 为当前cpu core的实时频率,出现错误频率为-1,且error不为空 ```go import "libkperf/kperf" import "fmt" func main() { - coreId := uint(0) + coreId := uint(0) freq, err := kperf.PmuGetCpuFreq(coreId) if err != nil { fmt.Printf("kperf PmuGetCpuFreq failed, expect err is nil, but is %v\n", err) diff --git a/go/src/libkperf_test/libkperf_test.go b/go/src/libkperf_test/libkperf_test.go index 2f55951..e64ea6a 100644 --- a/go/src/libkperf_test/libkperf_test.go +++ b/go/src/libkperf_test/libkperf_test.go @@ -22,7 +22,7 @@ func TestCount(t *testing.T) { } for _, o := range dataVo.GoData { - t.Logf("================================Get Couting data success================================") + t.Logf("================================Get Counting data success================================") t.Logf("count base info comm=%v, evt=%v, pid=%v, tid=%v, coreId=%v, numaId=%v, sockedId=%v", o.Comm, o.Evt, o.Pid, o.Tid, o.CpuTopo.CoreId, o.CpuTopo.NumaId, o.CpuTopo.SocketId) t.Logf("count info count=%v, countPercent=%v", o.Count, o.CountPercent) } -- Gitee From a18ea996500e9fa52deaac9f7fd39fa525fb050b Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Tue, 29 Apr 2025 10:28:48 +0800 Subject: [PATCH 06/48] =?UTF-8?q?=E9=87=8D=E7=BD=AElibsym=E5=BC=82?= =?UTF-8?q?=E5=B8=B8=E4=B8=BAwarning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/spe.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pmu/spe.cpp b/pmu/spe.cpp index a075bb8..0d1c7d9 100644 --- a/pmu/spe.cpp +++ b/pmu/spe.cpp @@ -362,9 +362,19 @@ void Spe::CoreDummyData(struct SpeCoreContext *context, struct ContextSwitchData if (header->type == PERF_RECORD_MMAP && symbolMode != NO_SYMBOL_RESOLVE) { struct PerfRecordMmap *sample = (struct PerfRecordMmap *)header; if (symbolMode == RESOLVE_ELF_DWARF) { - SymResolverUpdateModule(sample->tid, sample->filename, sample->addr); + int ret = SymResolverUpdateModule(sample->tid, sample->filename, sample->addr); + if (ret != SUCCESS) { + // if the module fails to be updated, a warning is recorded to overwrite the failure error code. + SetWarn(ret, Perror()); + New(SUCCESS); + } } else if (symbolMode == RESOLVE_ELF) { - SymResolverUpdateModuleNoDwarf(sample->tid, sample->filename, sample->addr); + int ret = SymResolverUpdateModuleNoDwarf(sample->tid, sample->filename, sample->addr); + if (ret != SUCCESS) { + // if the module fails to be updated, a warning is recorded to overwrite the failure error code. + SetWarn(ret, Perror()); + New(SUCCESS); + } } dataTail += header->size; continue; -- Gitee From 204d6be61d64876523349ee376cf9da8c9aa6d23 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Wed, 7 May 2025 15:03:26 +0800 Subject: [PATCH 07/48] =?UTF-8?q?=E7=89=B9=E6=80=A7=E6=96=B0=E5=A2=9E:?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0cpu=E4=B8=BB=E9=A2=91=E7=9A=84=E9=87=87?= =?UTF-8?q?=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go/src/libkperf/kperf/kperf.go | 55 +++++++++ go/src/libkperf_test/libkperf_test.go | 14 +++ include/pcerrc.h | 2 + include/pmu.h | 26 ++++ pmu/cpu_freq.cpp | 171 ++++++++++++++++++++++++++ pmu/cpu_freq.h | 66 ++++++++++ python/modules/_libkperf/Pmu.py | 136 ++++++++++++++++++++ python/modules/kperf/perror.py | 2 + python/modules/kperf/pmu.py | 16 +++ test/test_perf/test_api.cpp | 17 +++ 10 files changed, 505 insertions(+) create mode 100644 pmu/cpu_freq.cpp create mode 100644 pmu/cpu_freq.h diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 2b5958c..4511baa 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -413,6 +413,13 @@ type PmuDeviceDataVo struct { cDeviceData *C.struct_PmuDeviceData } +type PmuCpuFreqDetail struct { + CpuId int // core id + MinFreq uint64 // minimum frequency of core + MaxFreq uint64 // maximum frequency of core + AvgFreq uint64 // average frequency of core +} + // Initialize the collection target // On success, a task id is returned which is the unique identity for the task // On error, -1 is returned @@ -1059,6 +1066,54 @@ func PmuGetCpuFreq(core uint) (int64, error) { return int64(freq), nil } + +// open cpu core freq sampling +// period unit ms +// return error or nil +func PmuOpenCpuFreqSampling(period uint) (error) { + c_period := C.uint32_t(period) + ret := C.PmuOpenCpuFreqSampling(c_period) + if int(ret) == -1 { + return errors.New(C.GoString(C.Perror())) + } + return nil +} + +// close cpu freq sampling +func PmuCloseCpuFreqSampling() { + C.PmuCloseCpuFreqSampling() +} + +// get the maximum frequency,minimum frequency,and average frequency of each core +// param cpuNum +// return PmuCpuFreqDetail array +func PmuReadCpuFreqDetail() ([]PmuCpuFreqDetail) { + cpuNum := C.uint32_t(0) + cpuFreqList := C.PmuReadCpuFreqDetail(&cpuNum) + + if (uint32(cpuNum) == 0) { + return nil + } + + ptr := unsafe.Pointer(cpuFreqList) + slice := reflect.SliceHeader{ + Data: uintptr(ptr), + Len: int(cpuNum), + Cap: int(cpuNum), + } + + cCpuFreqList := *(*[]C.struct_PmuCpuFreqDetail)(unsafe.Pointer(&slice)) + goCpuFreqList := make([]PmuCpuFreqDetail, int(cpuNum)) + + for i, v := range cCpuFreqList { + goCpuFreqList[i].CpuId = int(v.cpuId) + goCpuFreqList[i].MinFreq = uint64(v.minFreq) + goCpuFreqList[i].MaxFreq = uint64(v.maxFreq) + goCpuFreqList[i].AvgFreq = uint64(v.avgFreq) + } + return goCpuFreqList +} + func transferCPmuDataToGoData(cPmuData *C.struct_PmuData, dataLen int, fd int) []PmuData { ptr := unsafe.Pointer(cPmuData) slice := reflect.SliceHeader { diff --git a/go/src/libkperf_test/libkperf_test.go b/go/src/libkperf_test/libkperf_test.go index e64ea6a..28848fe 100644 --- a/go/src/libkperf_test/libkperf_test.go +++ b/go/src/libkperf_test/libkperf_test.go @@ -273,3 +273,17 @@ func TestPmuGetNumaCore(t *testing.T) { t.Logf("coreId has:%v", v) } } + +func TestPmuGetCpuFreqDetail(t *testing.T) { + err := kperf.PmuOpenCpuFreqSampling(100) + if err != nil { + t.Fatalf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v", err) + } + + freqList := kperf.PmuReadCpuFreqDetail() + for _, v := range freqList { + t.Logf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) + } + + kperf.PmuCloseCpuFreqSampling() +} diff --git a/include/pcerrc.h b/include/pcerrc.h index 3a71491..259befa 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -107,6 +107,8 @@ extern "C" { #define LIBPERF_ERR_CLUSTER_ID_OVERSIZE 1063 #define LIBPERF_ERR_INVALID_PMU_BDF_TYPE 1064 #define LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 1065 +#define LIBPERF_ERR_NOT_SUPPORT_METRIC 1066 +#define LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD 1067 #define UNKNOWN_ERROR 9999 diff --git a/include/pmu.h b/include/pmu.h index af9bb2a..747259d 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -265,6 +265,13 @@ struct PmuTraceData { const char *comm; // process command }; +struct PmuCpuFreqDetail { + int cpuId; // cpu core id + uint64_t minFreq; // minimum frequency of core + uint64_t maxFreq; // maximum frequency of core + uint64_t avgFreq; // average frequency of core +}; + /** * @brief * Initialize the collection target. @@ -618,6 +625,25 @@ const char** PmuSysCallFuncList(unsigned *numFunc); */ int64_t PmuGetCpuFreq(unsigned core); +/** + * @brief get the maximum frequency,minimum frequency,and average frequency of each core + * @param cpuNum + * @return PmuCpuFreqDetail array of pointers + */ +struct PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum); + +/** + * @brief open cpu core freq sampling + * @param time period unit ms + * @return -1 or 0 + */ +int PmuOpenCpuFreqSampling(unsigned period); + +/** + * @brief close cpu freq sampling + */ +void PmuCloseCpuFreqSampling(); + #pragma GCC visibility pop #ifdef __cplusplus } diff --git a/pmu/cpu_freq.cpp b/pmu/cpu_freq.cpp new file mode 100644 index 0000000..2d2b033 --- /dev/null +++ b/pmu/cpu_freq.cpp @@ -0,0 +1,171 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Li + * Create: 2025-05-07 + * Description: sample cpu freq. + ******************************************************************************/ +#include "cpu_freq.h" +#include "pmu.h" +#include "pcerr.h" + +using namespace pcerr; + +CpuFreqManager* CpuFreqManager::instance = nullptr; +std::mutex CpuFreqManager::singleMutex; +std::mutex CpuFreqManager::initMutex; +std::vector CpuFreqManager::freqDetailList; +bool CpuFreqManager::hasInit = false; + +PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum) { + auto& ds = CpuFreqManager::GetCpuFreqDetail(); + *cpuNum = ds.size(); + return ds.data(); +} + +int PmuOpenCpuFreqSampling(unsigned period) { + return CpuFreqManager::GetInstance()->InitCpuFreqSampling(period); +} + +void PmuCloseCpuFreqSampling() { + CpuFreqManager::Clear(); +} + +void CpuFreqManager::Clear() { + std::lock_guard lock(singleMutex); + if (instance == nullptr) { + return; + } + delete instance; + instance = nullptr; +} + +CpuFreqManager* CpuFreqManager::GetInstance() { + if (instance == nullptr) { + std::lock_guard lock(singleMutex); + if(instance == nullptr) { + instance = new CpuFreqManager(); + } + } + return instance; +} + +int CpuFreqManager::CheckCpuFreqIsExist() { + for(int cpuId = 0; cpuId < MAX_CPU_NUM; cpuId++) { + int64_t freq = PmuGetCpuFreq(cpuId); + if (freq == -1 ) { + return -1; + } + } + return 0; +} + +int CpuFreqManager::CheckSleepPeriod(unsigned period) { + if (period == 0 || period > 10000) { + New(LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD, "invalid period, the period must be less than 10000ms and greater than 0ms"); + return LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD; + } + return SUCCESS; +} + +int CpuFreqManager::InitCpuFreqSampling(unsigned period) { + this->isEnable = true; + if (hasInit) { + return 0; + } + std::lock_guard lock(initMutex); + + if (CheckSleepPeriod(period) != 0) { + return -1; + } + + if (CheckCpuFreqIsExist() != 0) { + return -1; + } + + this->sleepPeriod = static_cast(period) / 1000; + this->cpuFreqThread = std::thread([this]() { + while (!isEnd) { + if (!isEnable) { + continue; + } + std::lock_guard lock(mapMutex); + for (int cpu = 0; cpu < MAX_CPU_NUM; cpu++) { + int64_t freq = PmuGetCpuFreq(cpu); + if (freq == -1) { + continue; + } + if (this->freqListMap.find(cpu) != this->freqListMap.end()) { + this->freqListMap[cpu].push_back(freq); + } else { + std::vector freqList = {freq}; + this->freqListMap.insert({cpu, freqList}); + } + } + sleep(this->sleepPeriod); + } + }); + hasInit = true; + return 0; +} + +void CpuFreqManager::CalFreqDetail() { + isEnable = false; + std::lock_guard lock(mapMutex); + + if(!this->freqListMap.empty()) { + uint64_t maxFreq, minFreq, sumFreq; + for (int cpuId = 0; cpuId < MAX_CPU_NUM; cpuId++) { + std::vector freqList; + minFreq = 0; + maxFreq = 0; + sumFreq = 0; + if (this->freqListMap.find(cpuId) != this->freqListMap.end()) { + minFreq = UINT64_MAX; + freqList = freqListMap[cpuId]; + } + for (const auto& curFreq: freqList) { + minFreq = minFreq > curFreq ? curFreq : minFreq; + maxFreq = maxFreq > curFreq ? maxFreq : curFreq; + sumFreq += curFreq; + } + uint64_t avgFreq = sumFreq / freqList.size(); + PmuCpuFreqDetail detail = {.cpuId=cpuId, .minFreq=minFreq, .maxFreq=maxFreq, .avgFreq=avgFreq}; + freqDetailList.push_back(detail); + } + freqListMap.clear(); + } else { + GetCurFreqDetail(); + } + + isEnable = true; +} + +void CpuFreqManager::GetCurFreqDetail() { + for(int cpuId = 0; cpuId < MAX_CPU_NUM; cpuId++) { + uint64_t freq = PmuGetCpuFreq(cpuId); + if (freq == -1) { + freq = 0; + } + PmuCpuFreqDetail detail = {.cpuId=cpuId, .minFreq=freq, .maxFreq=freq, .avgFreq=freq}; + freqDetailList.push_back(detail); + } +} + +std::vector& CpuFreqManager::GetCpuFreqDetail() { + std::lock_guard lock(initMutex); + freqDetailList.clear(); + if (!hasInit) { + CpuFreqManager::GetCurFreqDetail(); + } else { + CpuFreqManager::GetInstance()->CalFreqDetail(); + } + return freqDetailList; +} \ No newline at end of file diff --git a/pmu/cpu_freq.h b/pmu/cpu_freq.h new file mode 100644 index 0000000..e6aaf96 --- /dev/null +++ b/pmu/cpu_freq.h @@ -0,0 +1,66 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Li + * Create: 2025-05-07 + * Description: sample cpu freq. + ******************************************************************************/ +#ifndef LIBKPERF_CPU_FREQ_H +#define LIBKPERF_CPU_FREQ_H + +#include +#include +#include +#include +#include + +#include "cpu_map.h" + +class CpuFreqManager { +public: + CpuFreqManager(): isEnable(false), isEnd(false), sleepPeriod(0.1) {}; + ~CpuFreqManager() { + std::lock_guard lock(initMutex); + if(!hasInit) { + return; + } + isEnable = false; + isEnd = true; + cpuFreqThread.join(); + hasInit = false; + } + static void Clear(); + static CpuFreqManager* GetInstance(); + static std::vector& GetCpuFreqDetail(); + static void GetCurFreqDetail(); + + int InitCpuFreqSampling(unsigned period); + void CalFreqDetail(); + +private: + static CpuFreqManager* instance; + static std::mutex singleMutex; + static std::mutex initMutex; + static std::vector freqDetailList; + static bool hasInit; + + std::mutex mapMutex; + std::thread cpuFreqThread; + volatile bool isEnable; + volatile bool isEnd; + double sleepPeriod; + std::map> freqListMap; + + int CheckCpuFreqIsExist(); + static int CheckSleepPeriod(unsigned period); +}; + + +#endif //LIBKPERF_CPU_FREQ_H \ No newline at end of file diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index 74f2c9d..229edc1 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -1857,6 +1857,138 @@ def PmuSysCallFuncListFree() -> None: c_PmuSysCallFuncListFree() +class CtypesPmuCpuFreqDetail(ctypes.Structure): + """ + struct PmuCpuFreqDetail { + int cpuId; // cpu core id + uint64_t minFreq; // minimum frequency of core + uint64_t maxFreq; // maximum frequency of core + uint64_t avgFreq; // average frequency of core + } + """ + _fields_ = [ + ('cpuId', ctypes.c_int), + ('minFreq', ctypes.c_uint64), + ('maxFreq', ctypes.c_uint64), + ('avgFreq', ctypes.c_uint64), + ] + + def __init__(self, + cpuId: int = 0, + minFreq: int = 0, + maxFreq: int = 0, + avgFreq: int = 0, + *args:Any, **kw: Any) -> None: + super().__init__(*args, **kw) + self.cpuId = ctypes.c_int(cpuId) + self.minFreq = ctypes.c_uint64(minFreq) + self.maxFreq = ctypes.c_uint64(maxFreq) + self.avgFreq = ctypes.c_uint64(avgFreq) + + +class ImplPmuCpuFreqDetail: + __slots__ = ['__c_pmu_cpu_freq_detail'] + def __init__(self, + cpuId: int = 0, + minFreq: int = 0, + maxFreq: int = 0, + avgFreq: int = 0, + *args:Any, **kw: Any) -> None: + self.__c_pmu_cpu_freq_detail = CtypesPmuCpuFreqDetail( + cpuId=cpuId, + minFreq=minFreq, + maxFreq=maxFreq, + avgFreq=avgFreq + ) + + @property + def c_pmu_cpu_freq_detail(self) -> CtypesPmuCpuFreqDetail: + return self.__c_pmu_cpu_freq_detail + + @property + def cpuId(self) -> int: + return self.__c_pmu_cpu_freq_detail.cpuId + + @cpuId.setter + def cpuId(self, cpuId: int) -> None: + self.__c_pmu_cpu_freq_detail.cpuId = ctypes.c_int(cpuId) + + @property + def minFreq(self) -> int: + return self.__c_pmu_cpu_freq_detail.minFreq + + @minFreq.setter + def minFreq(self, minFreq: int) -> None: + self.__c_pmu_cpu_freq_detail.minFreq = ctypes.c_uint64(minFreq) + + @property + def maxFreq(self) -> int: + return self.__c_pmu_cpu_freq_detail.maxFreq + + @maxFreq.setter + def maxFreq(self, maxFreq: int) -> None: + self.__c_pmu_cpu_freq_detail.maxFreq = ctypes.c_uint64(maxFreq) + + @property + def avgFreq(self) -> int: + return self.__c_pmu_cpu_freq_detail.avgFreq + + @avgFreq.setter + def avgFreq(self, avgFreq: int) -> None: + self.__c_pmu_cpu_freq_detail.avgFreq = ctypes.c_uint64(avgFreq) + + @classmethod + def from_c_pmu_cpu_freq_detail(cls, c_pmu_cpu_freq_detail: CtypesPmuCpuFreqDetail) -> 'ImplPmuCpuFreqDetail': + freq_detail = cls() + freq_detail.__c_pmu_cpu_freq_detail = c_pmu_cpu_freq_detail + return freq_detail + + +class PmuCpuFreqDetail: + __slots__ = ['__pointer', '__iter', '__len'] + + def __init__(self, pointer: ctypes.POINTER(CtypesPmuCpuFreqDetail) = None, len: int = 0) -> None: + self.__pointer = pointer + self.__len = len + self.__iter = (ImplPmuCpuFreqDetail.from_c_pmu_cpu_freq_detail(self.__pointer[i]) for i in range(self.__len)) + + @property + def len(self) -> int: + return self.__len + + @property + def iter(self) -> Iterator[ImplPmuCpuFreqDetail]: + return self.__iter + + +def PmuReadCpuFreqDetail() -> PmuCpuFreqDetail: + """ + struct PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum); + """ + c_PmuGetCpuFreqDetail = kperf_so.PmuReadCpuFreqDetail + c_PmuGetCpuFreqDetail.argtypes = [] + c_PmuGetCpuFreqDetail.restype = ctypes.POINTER(CtypesPmuCpuFreqDetail) + c_cpu_len = ctypes.c_uint(0) + c_freq_detail_pointer = c_PmuGetCpuFreqDetail(ctypes.byref(c_cpu_len)) + + return PmuCpuFreqDetail(c_freq_detail_pointer, c_cpu_len.value) + +def PmuOpenCpuFreqSampling(period: int) -> None: + """ + int PmuOpenCpuFreqSampling(unsigned period); + """ + c_PmuOpenCpuFreqSampling = kperf_so.PmuOpenCpuFreqSampling + + c_period = ctypes.c_uint(period) + return c_PmuOpenCpuFreqSampling(c_period) + +def PmuCloseCpuFreqSampling() -> None: + """ + void PmuCloseCpuFreqSampling(); + """ + c_PmuCloseCpuFreqSampling = kperf_so.PmuCloseCpuFreqSampling + c_PmuCloseCpuFreqSampling() + __all__ = [ 'CtypesEvtAttr', @@ -1903,4 +2035,8 @@ __all__ = [ 'PmuTraceDataFree', 'PmuSysCallFuncList', 'PmuSysCallFuncListFree', + 'PmuOpenCpuFreqSampling', + 'PmuReadCpuFreqDetail', + 'PmuCloseCpuFreqSampling', + 'PmuCpuFreqDetail', ] diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index f8dcb9c..4ba96b2 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -106,6 +106,8 @@ class Error: LIBPERF_ERR_CLUSTER_ID_OVERSIZE = 1063 LIBPERF_ERR_INVALID_PMU_BDF_TYPE = 1064 LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 =1065 + LIBPERF_ERR_NOT_SUPPORT_METRIC = 1066 + LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD = 1067 UNKNOWN_ERROR = 9999 diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 975b122..90aeaf9 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -526,6 +526,18 @@ def sys_call_func_list() -> Iterator[str]: """ return _libkperf.PmuSysCallFuncList() +class CpuFreqDetail(_libkperf.PmuCpuFreqDetail): + pass + +def open_cpu_freq_sampling(period: int) -> None: + return _libkperf.PmuOpenCpuFreqSampling(period) + +def close_cpu_freq_sampling() -> None: + return _libkperf.PmuCloseCpuFreqSampling() + +def read_cpu_freq_detail() -> CpuFreqDetail: + return _libkperf.PmuReadCpuFreqDetail() + __all__ = [ 'PmuTaskType', 'PmuEventType', @@ -572,4 +584,8 @@ __all__ = [ 'trace_close', 'sys_call_func_list', 'BranchSampleFilter', + 'CpuFreqDetail', + 'open_cpu_freq_sampling', + 'close_cpu_freq_sampling', + 'read_cpu_freq_detail', ] diff --git a/test/test_perf/test_api.cpp b/test/test_perf/test_api.cpp index 6ae2eea..edb4311 100644 --- a/test/test_perf/test_api.cpp +++ b/test/test_perf/test_api.cpp @@ -17,6 +17,7 @@ #include "util_time.h" #include "process_map.h" #include "common.h" +#include "cpu_map.h" #include "test_common.h" using namespace std; @@ -670,3 +671,19 @@ TEST_F(TestAPI, TestBrBeBadMode) { ASSERT_EQ(pd, -1); ASSERT_EQ(Perrorno(), LIBPERF_ERR_BRANCH_JUST_SUPPORT_SAMPLING); } + +TEST_F(TestAPI, TestCpuFreqSampling) { + int ret = PmuOpenCpuFreqSampling(100); + ASSERT_NE(ret, -1); + PmuCloseCpuFreqSampling(); + + unsigned cpuNum = 0; + PmuCpuFreqDetail* pDetail1 = PmuReadCpuFreqDetail(&cpuNum); + ASSERT_EQ(cpuNum, MAX_CPU_NUM); + ret = PmuOpenCpuFreqSampling(100); + ASSERT_NE(ret, -1); + sleep(2); + PmuCpuFreqDetail* pDetail2 = PmuReadCpuFreqDetail(&cpuNum); + ASSERT_EQ(cpuNum, MAX_CPU_NUM); + PmuCloseCpuFreqSampling(); +} \ No newline at end of file -- Gitee From 4acbb6d98ee0d60748053ae8aec3af3ecab14751 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 19 May 2025 09:25:21 +0800 Subject: [PATCH 08/48] collect ddr bandwidth by channel --- go/src/libkperf/kperf/kperf.go | 19 +++++++ include/pmu.h | 9 +++- pmu/pmu_metric.cpp | 95 +++++++++++++++++++++++++++++++-- python/modules/_libkperf/Pmu.py | 49 ++++++++++++++++- python/modules/kperf/pmu.py | 7 +++ python/tests/test_metric.py | 30 +++-------- test/test_perf/test_metric.cpp | 53 ++++-------------- 7 files changed, 188 insertions(+), 74 deletions(-) diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 4511baa..99fb52d 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -37,6 +37,9 @@ struct MetricDataExt { unsigned coreId; unsigned clusterId; char* bdf; + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; }; void SetPeriod(struct PmuAttr* attr, unsigned period) { @@ -122,6 +125,11 @@ void IPmuGetMetricDataExt(struct PmuDeviceData* deviceData, struct MetricDataExt case PMU_METRIC_CLUSTER: metricData->clusterId = deviceData->clusterId; break; + case PMU_METRIC_CHANNEL: + metricData->channelId = deviceData->channelId; + metricData->ddrNumaId = deviceData->ddrNumaId; + metricData->socketId = deviceData->socketId; + break; } } @@ -291,6 +299,7 @@ var ( PMU_METRIC_NUMA C.enum_PmuMetricMode = C.PMU_METRIC_NUMA PMU_METRIC_CLUSTER C.enum_PmuMetricMode = C.PMU_METRIC_CLUSTER PMU_METRIC_BDF C.enum_PmuMetricMode = C.PMU_METRIC_BDF + PMU_METRIC_CHANNEL C.enum_PmuMetricMode = C.PMU_METRIC_CHANNEL ) var fdModeMap map[int]C.enum_PmuTaskType = make(map[int]C.enum_PmuTaskType) @@ -396,6 +405,12 @@ type PmuDeviceAttr struct { Bdf string } +type DdrDataStructure struct { + ChannelId uint32 + DdrNumaId uint32 + SocketId uint32 +} + type PmuDeviceData struct { Metric C.enum_PmuDeviceMetric // The metric value. The meaning of value depends on metric type. @@ -406,6 +421,7 @@ type PmuDeviceData struct { NumaId uint32 // for pernuma metric ClusterId uint32 // for percluster metric Bdf string // for perpcie metric + DdrDataStructure // for perchannel metric } type PmuDeviceDataVo struct { @@ -990,6 +1006,9 @@ func PmuGetDevMetric(dataVo PmuDataVo, deviceAttr []PmuDeviceAttr) (PmuDeviceDat goDeviceList[i].NumaId = uint32(metricDataExt.numaId) goDeviceList[i].ClusterId = uint32(metricDataExt.clusterId) goDeviceList[i].Bdf = C.GoString(metricDataExt.bdf) + goDeviceList[i].ChannelId = uint32(metricDataExt.channelId) + goDeviceList[i].DdrNumaId = uint32(metricDataExt.ddrNumaId) + goDeviceList[i].SocketId = uint32(metricDataExt.socketId) } res.GoDeviceData = goDeviceList res.cDeviceData = metricData diff --git a/include/pmu.h b/include/pmu.h index 747259d..d3af702 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -470,7 +470,8 @@ enum PmuMetricMode { PMU_METRIC_CORE, PMU_METRIC_NUMA, PMU_METRIC_CLUSTER, - PMU_METRIC_BDF + PMU_METRIC_BDF, + PMU_METRIC_CHANNEL, }; /** @@ -509,6 +510,12 @@ struct PmuDeviceData { unsigned clusterId; // for perpcie metric char *bdf; + // for perchannel metric of ddr + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index eedf8dd..c7bdde2 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -86,8 +87,9 @@ namespace KUNPENG_PMU { }; set percoreMetric = {PMU_L3_TRAFFIC, PMU_L3_MISS, PMU_L3_REF}; - set pernumaMetric = {PMU_DDR_READ_BW, PMU_DDR_WRITE_BW, PMU_L3_LAT}; + set pernumaMetric = {PMU_L3_LAT}; set perClusterMetric = {PMU_L3_LAT}; + set perChannelMetric = {PMU_DDR_READ_BW, PMU_DDR_WRITE_BW}; set perpcieMetric = {PMU_PCIE_RX_MRD_BW, PMU_PCIE_RX_MWR_BW, PMU_PCIE_TX_MRD_BW, @@ -888,6 +890,11 @@ namespace KUNPENG_PMU { unsigned numaId; unsigned clusterId; char *bdf; + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; @@ -936,7 +943,7 @@ namespace KUNPENG_PMU { switch(metric) { case PMU_DDR_READ_BW: case PMU_DDR_WRITE_BW: - return PMU_METRIC_NUMA; + return PMU_METRIC_CHANNEL; case PMU_L3_LAT: return PMU_METRIC_CLUSTER; case PMU_L3_TRAFFIC: @@ -1064,6 +1071,82 @@ namespace KUNPENG_PMU { return SUCCESS; } + //920B: Ch0~3 -> ddrc0/2/3/5 TB; Ch4~7 -> ddrc0/2/3/5 TA + //920, 920C/E: Ch0~3 -> ddrc0/1/2/3 TB; Ch4~7 -> ddrc0/1/2/3 TA + static unordered_map> DDRC_CHANNEL_MAP = { + {CHIP_TYPE::HIPA, {0, 1, 2, 3}}, + {CHIP_TYPE::HIPB, {0, 2, 3, 5}} + }; + + static void getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) + { + string devName; + string evtName; + GetDeviceName(evt, devName, evtName); + // ddrc channel index. eg: hisi_sccl3_ddrc3_1 --> 3_1 + string ddrcStr = "ddrc"; + size_t ddrcPos = devName.find(ddrcStr); + size_t channelIndex = ddrcPos + ddrcStr.length(); + string ddrcIndexStr = devName.substr(channelIndex); + // find index in DDRC_CHANNEL_MAP. eg: 3_1 --> 3, corresponds to channel 2 in HIPB + size_t separatorPos = ddrcIndexStr.find("_"); + int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); + + unsigned channelAddNum = 0; + if((ddrNumaId & 1) == 1) { // die B, + channelAddNum = 4; + } + CHIP_TYPE chipType = GetCpuType(); //get channel index + auto ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; + auto it = find(ddrcChannelList.begin(), ddrcChannelList.end(), ddrcIndex); + if (it != ddrcChannelList.end()) { + size_t index = distance(ddrcChannelList.begin(), it); + channelId = index + channelAddNum; + } + } + + struct channelKeyHash { + size_t operator()(const tuple& key) const { + auto channelIdHash = hash{}(get<0>(key)); + auto ddrNumaIdHash = hash{}(get<1>(key)); + auto socketIdHash = hash{}(get<2>(key)); + return channelIdHash ^ (ddrNumaIdHash << 1) ^ (socketIdHash << 2); + } + }; + + int AggregateByChannel(const PmuDeviceMetric metric, const vector &rawData, vector &devData) + { + unordered_map, PmuDeviceData, channelKeyHash> devDataByChannel; //Key: channelId, ddrNumaId, socketId + for (auto &data : rawData) { + unsigned channelId; + getChannelId(data.evtName, data.ddrNumaId, channelId); + auto ddrDatakey = make_tuple(channelId, data.ddrNumaId, data.socketId); + auto findData = devDataByChannel.find(ddrDatakey); + if (findData == devDataByChannel.end()) { + PmuDeviceData outData; + outData.metric = data.metric; + outData.count = data.count; + outData.mode = GetMetricMode(data.metric); + outData.channelId = channelId; + outData.ddrNumaId = data.ddrNumaId; + outData.socketId = data.ddrNumaId < 2 ? 0 : 1; // numa id 0-1 --> socket id 0; numa id 2-3 --> socket id 1 + devDataByChannel[ddrDatakey] = outData; + } else { + findData->second.count += data.count; + } + } + + vector, PmuDeviceData>> sortedVec(devDataByChannel.begin(), devDataByChannel.end()); + sort(sortedVec.begin(), sortedVec.end(), [](const auto& a, const auto& b) { + return a.first < b.first; + }); + for (auto &data : sortedVec) { + devData.push_back(data.second); + } + + return SUCCESS; + } + int PcieBWAggregate(const PmuDeviceMetric metric, const vector &rawData, vector &devData) { const auto& deviceConfig = GetDeviceMtricConfig(); @@ -1146,8 +1229,8 @@ namespace KUNPENG_PMU { {PMU_DDR_WRITE_BW, DDRBw}, {PMU_L3_TRAFFIC, L3Bw}}; map aggregateMap = { - {PMU_DDR_READ_BW, AggregateByNuma}, - {PMU_DDR_WRITE_BW, AggregateByNuma}, + {PMU_DDR_READ_BW, AggregateByChannel}, + {PMU_DDR_WRITE_BW, AggregateByChannel}, {PMU_L3_LAT, AggregateByCluster}, {PMU_PCIE_RX_MRD_BW, PcieBWAggregate}, {PMU_PCIE_RX_MWR_BW, PcieBWAggregate}, @@ -1256,6 +1339,10 @@ namespace KUNPENG_PMU { if (perClusterMetric.find(devAttr.metric) != perClusterMetric.end()) { devData.clusterId = pmuData[i].cpuTopo->coreId / clusterWidth; } + if (perChannelMetric.find(devAttr.metric) != pernumaMetric.end()) { + devData.ddrNumaId = pmuData[i].cpuTopo->numaId; + devData.socketId = pmuData[i].cpuTopo->socketId; + } if (IsBdfMetric(devAttr.metric)) { devData.bdf = devAttr.bdf; } diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index 229edc1..ef6147c 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -467,6 +467,12 @@ class PmuDeviceAttr: pmu_device_attr.__c_pmu_device_attr = c_pmu_device_attr return pmu_device_attr +class DdrDataStructure(ctypes.Structure): + _fields_ = [ + ('channelId', ctypes.c_uint), + ('ddrNumaId', ctypes.c_uint), + ('socketId', ctypes.c_uint) + ] class CtypesPmuDeviceData(ctypes.Structure): """ @@ -479,6 +485,11 @@ class CtypesPmuDeviceData(ctypes.Structure): unsigned numaId; unsigned clusterId; char *bdf; + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; """ @@ -487,7 +498,8 @@ class CtypesPmuDeviceData(ctypes.Structure): ('coreId', ctypes.c_uint), ('numaId', ctypes.c_uint), ('clusterId', ctypes.c_uint), - ('bdf', ctypes.c_char_p) + ('bdf', ctypes.c_char_p), + ('_structure', DdrDataStructure) ] _fields_ = [ @@ -521,6 +533,23 @@ class CtypesPmuDeviceData(ctypes.Structure): return self._union.bdf.decode(UTF_8) return "" + @property + def channelId(self) -> int: + if self.mode == 5 and self._union._structure.channelId: # PMU_METRIC_CHANNEL + return self._union._structure.channelId + return 0 + + @property + def ddrNumaId(self) -> int: + if self.mode == 5 and self._union._structure.ddrNumaId: # PMU_METRIC_CHANNEL + return self._union._structure.ddrNumaId + return 0 + + @property + def socketId(self) -> int: + if self.mode == 5 and self._union._structure.socketId: # PMU_METRIC_CHANNEL + return self._union._structure.socketId + return 0 class ImplPmuDeviceData: __slots__ = ['__c_pmu_device_data'] @@ -574,6 +603,24 @@ class ImplPmuDeviceData: return self.c_pmu_device_data._union.bdf.decode(UTF_8) return "" + @property + def channelId(self) -> int: + if self.mode == 5 and self.c_pmu_device_data._union._structure.channelId: # PMU_METRIC_CHANNEL + return self.c_pmu_device_data._union._structure.channelId + return 0 + + @property + def ddrNumaId(self) -> int: + if self.mode == 5 and self.c_pmu_device_data._union._structure.ddrNumaId: # PMU_METRIC_CHANNEL + return self.c_pmu_device_data._union._structure.ddrNumaId + return 0 + + @property + def socketId(self) -> int: + if self.mode == 5 and self.c_pmu_device_data._union._structure.socketId: # PMU_METRIC_CHANNEL + return self.c_pmu_device_data._union._structure.socketId + return 0 + @classmethod def from_c_pmu_device_data(cls, c_pmu_device_data: CtypesPmuDeviceData) -> 'ImplPmuDeviceData': pmu_device_data = cls() diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 90aeaf9..672a060 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -173,6 +173,7 @@ class PmuMetricMode: PMU_METRIC_NUMA = 2 PMU_METRIC_CLUSTER = 3 PMU_METRIC_BDF = 4 + PMU_METRIC_CHANNEL = 5 class ImplPmuDeviceData(_libkperf.ImplPmuDeviceData): pass @@ -193,6 +194,12 @@ class PmuDeviceData(_libkperf.PmuDeviceData): unsigned numaId; // for perpcie metric char *bdf; + // for perchannel metric of ddr + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; """ diff --git a/python/tests/test_metric.py b/python/tests/test_metric.py index 4a878ae..bf653ce 100644 --- a/python/tests/test_metric.py +++ b/python/tests/test_metric.py @@ -112,7 +112,8 @@ def test_get_numa_cores(): def test_collect_ddr_bandwidth(): dev_attr = [ - kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_READ_BW) + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_READ_BW), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW) ] pd = kperf.device_open(dev_attr) print(kperf.error()) @@ -125,9 +126,10 @@ def test_collect_ddr_bandwidth(): dev_data = None dev_data = kperf.get_device_metric(ori_data, dev_attr) - assert len(dev_data) == 4 - assert dev_data[0].numaId == 0 - assert dev_data[0].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + assert dev_data[0].count != 0 + assert dev_data[0].metric == kperf.PmuDeviceMetric.PMU_DDR_READ_BW + assert dev_data[0].mode == kperf.PmuMetricMode.PMU_METRIC_CHANNEL + assert dev_data[len(dev_data) - 1].metric == kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW print_dev_data_details(dev_data) kperf.close(pd) @@ -150,26 +152,6 @@ def test_collect_l3_latency(): print_dev_data_details(dev_data) kperf.close(pd) -def test_collect_l3_latency_and_ddr(): - dev_attr = [ - kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_L3_LAT), - kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW) - ] - pd = kperf.device_open(dev_attr) - print(kperf.error()) - assert pd != -1, f"Expected non-negative pd, but got {pd}" - kperf.enable(pd) - time.sleep(1) - kperf.disable(pd) - ori_data = kperf.read(pd) - assert len(ori_data) != -1, f"Expected non-negative ori_len, but got {len(ori_data)}" - - dev_data = kperf.get_device_metric(ori_data, dev_attr) - assert len(dev_data) == get_cluster_nums() + 4 - print_dev_data_details(dev_data) - kperf.close(pd) - - def test_collect_l3_traffic(): dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_L3_TRAFFIC) diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index 56bee10..d10ca39 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -104,9 +104,10 @@ TEST_F(TestMetric, GetNumaIdList) TEST_F(TestMetric, CollectDDRBandwidth) { - PmuDeviceAttr devAttr = {}; - devAttr.metric = PMU_DDR_READ_BW; - int pd = PmuDeviceOpen(&devAttr, 1); + PmuDeviceAttr devAttr[2] = {}; + devAttr[0].metric = PMU_DDR_READ_BW; + devAttr[1].metric = PMU_DDR_WRITE_BW; + int pd = PmuDeviceOpen(devAttr, 2); cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); @@ -117,16 +118,11 @@ TEST_F(TestMetric, CollectDDRBandwidth) ASSERT_NE(oriLen, -1); PmuDeviceData *devData = nullptr; - auto len = PmuGetDevMetric(oriData, oriLen, &devAttr, 1, &devData); - ASSERT_EQ(len, 4); - ASSERT_EQ(devData[0].numaId, 0); - ASSERT_EQ(devData[0].mode, PMU_METRIC_NUMA); - ASSERT_EQ(devData[1].numaId, 1); - ASSERT_EQ(devData[1].mode, PMU_METRIC_NUMA); - ASSERT_EQ(devData[2].numaId, 2); - ASSERT_EQ(devData[2].mode, PMU_METRIC_NUMA); - ASSERT_EQ(devData[3].numaId, 3); - ASSERT_EQ(devData[3].mode, PMU_METRIC_NUMA); + auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); + ASSERT_NE(devData[0].count, 0); + ASSERT_EQ(devData[0].mode, PMU_METRIC_CHANNEL); + ASSERT_EQ(devData[0].metric, PMU_DDR_READ_BW); + ASSERT_EQ(devData[len - 1].metric, PMU_DDR_WRITE_BW); DevDataFree(devData); PmuDataFree(oriData); PmuClose(pd); @@ -160,37 +156,6 @@ TEST_F(TestMetric, CollectL3Latency) PmuClose(pd); } -TEST_F(TestMetric, CollectL3LatencyAndDDR) -{ - PmuDeviceAttr devAttr[2] = {}; - devAttr[0].metric = PMU_L3_LAT; - devAttr[1].metric = PMU_DDR_WRITE_BW; - - int pd = PmuDeviceOpen(devAttr, 2); - cout << Perror() << endl; - ASSERT_NE(pd, -1); - PmuEnable(pd); - sleep(1); - PmuDisable(pd); - PmuData* oriData = nullptr; - int oriLen = PmuRead(pd, &oriData); - ASSERT_NE(oriLen, -1); - - PmuDeviceData *devData = nullptr; - auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); - unsigned clusterCount = GetClusterCount(); - unsigned numaCount = GetNumaNodeCount(); - ASSERT_EQ(len, clusterCount + numaCount); - ASSERT_NE(devData[0].count, 0); - ASSERT_EQ(devData[0].metric, PMU_L3_LAT); - ASSERT_EQ(devData[0].mode, PMU_METRIC_CLUSTER); - ASSERT_EQ(devData[clusterCount].metric, PMU_DDR_WRITE_BW); - ASSERT_EQ(devData[clusterCount].mode, PMU_METRIC_NUMA); - DevDataFree(devData); - PmuDataFree(oriData); - PmuClose(pd); -} - TEST_F(TestMetric, CollectL3Traffic) { PmuDeviceAttr devAttr = {}; -- Gitee From aeed6454790fdd0cfd277c3d94e95190cc7da7cf Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 19 May 2025 09:33:09 +0800 Subject: [PATCH 09/48] collect ddr bandwidth by channel --- include/pmu.h | 2 +- pmu/pmu_metric.cpp | 39 ++++++++++++++++++++++++--------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/include/pmu.h b/include/pmu.h index d3af702..5ec1726 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -471,7 +471,7 @@ enum PmuMetricMode { PMU_METRIC_NUMA, PMU_METRIC_CLUSTER, PMU_METRIC_BDF, - PMU_METRIC_CHANNEL, + PMU_METRIC_CHANNEL }; /** diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index c7bdde2..8fc5d12 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -107,7 +107,7 @@ namespace KUNPENG_PMU { if (it != MetricToString.end()) { return it->second; } - return ""; + return ""; } using PMU_METRIC_PAIR = std::pair; @@ -302,7 +302,7 @@ namespace KUNPENG_PMU { { CHIP_TYPE chipType = GetCpuType(); if (UNCORE_METRIC_CONFIG_MAP.find(chipType) == UNCORE_METRIC_CONFIG_MAP.end()) { - return {}; + return {}; } return UNCORE_METRIC_CONFIG_MAP.at(chipType); } @@ -1071,18 +1071,18 @@ namespace KUNPENG_PMU { return SUCCESS; } - //920B: Ch0~3 -> ddrc0/2/3/5 TB; Ch4~7 -> ddrc0/2/3/5 TA - //920, 920C/E: Ch0~3 -> ddrc0/1/2/3 TB; Ch4~7 -> ddrc0/1/2/3 TA static unordered_map> DDRC_CHANNEL_MAP = { {CHIP_TYPE::HIPA, {0, 1, 2, 3}}, {CHIP_TYPE::HIPB, {0, 2, 3, 5}} }; - static void getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) + static bool getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) { string devName; string evtName; - GetDeviceName(evt, devName, evtName); + if (!GetDeviceName(evt, devName, evtName)) { + return false; + } // ddrc channel index. eg: hisi_sccl3_ddrc3_1 --> 3_1 string ddrcStr = "ddrc"; size_t ddrcPos = devName.find(ddrcStr); @@ -1093,34 +1093,41 @@ namespace KUNPENG_PMU { int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); unsigned channelAddNum = 0; - if((ddrNumaId & 1) == 1) { // die B, + if((ddrNumaId & 1) == 1) { // channel id + 4 in sequence channelAddNum = 4; } CHIP_TYPE chipType = GetCpuType(); //get channel index + if (DDRC_CHANNEL_MAP.find(chipType) == DDRC_CHANNEL_MAP.end()) { + return false; + } auto ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; auto it = find(ddrcChannelList.begin(), ddrcChannelList.end(), ddrcIndex); if (it != ddrcChannelList.end()) { size_t index = distance(ddrcChannelList.begin(), it); channelId = index + channelAddNum; + return true; } + return false; } struct channelKeyHash { size_t operator()(const tuple& key) const { - auto channelIdHash = hash{}(get<0>(key)); - auto ddrNumaIdHash = hash{}(get<1>(key)); - auto socketIdHash = hash{}(get<2>(key)); - return channelIdHash ^ (ddrNumaIdHash << 1) ^ (socketIdHash << 2); + auto socketIdHash = hash{}(get<0>(key)); + auto channelIdHash = hash{}(get<1>(key)); + auto ddrNumaIdHash = hash{}(get<2>(key)); + return socketIdHash ^ (channelIdHash << 1) ^ (ddrNumaIdHash << 2); } }; int AggregateByChannel(const PmuDeviceMetric metric, const vector &rawData, vector &devData) { - unordered_map, PmuDeviceData, channelKeyHash> devDataByChannel; //Key: channelId, ddrNumaId, socketId + unordered_map, PmuDeviceData, channelKeyHash> devDataByChannel; //Key: socketId, channelId, ddrNumaId for (auto &data : rawData) { unsigned channelId; - getChannelId(data.evtName, data.ddrNumaId, channelId); - auto ddrDatakey = make_tuple(channelId, data.ddrNumaId, data.socketId); + if (!getChannelId(data.evtName, data.ddrNumaId, channelId)) { + continue; + } + auto ddrDatakey = make_tuple(data.socketId, channelId, data.ddrNumaId); auto findData = devDataByChannel.find(ddrDatakey); if (findData == devDataByChannel.end()) { PmuDeviceData outData; @@ -1137,7 +1144,9 @@ namespace KUNPENG_PMU { } vector, PmuDeviceData>> sortedVec(devDataByChannel.begin(), devDataByChannel.end()); - sort(sortedVec.begin(), sortedVec.end(), [](const auto& a, const auto& b) { + sort(sortedVec.begin(), sortedVec.end(), []( + const pair, PmuDeviceData>& a, + const pair, PmuDeviceData>& b) { return a.first < b.first; }); for (auto &data : sortedVec) { -- Gitee From edab059f4e4c18781d78ad25791a259ee7be306e Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Tue, 20 May 2025 09:14:07 +0800 Subject: [PATCH 10/48] Add docs: collect ddr bandwidth by channel --- docs/Details_Usage.md | 49 ++++++++++++++++++++++++++----------------- docs/Go_API.md | 8 ++++++- docs/Python_API.md | 7 ++++++- 3 files changed, 43 insertions(+), 21 deletions(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 5c9f6e8..58f9a0f 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -600,7 +600,7 @@ pmu_attr = kperf.PmuAttr(evtList=evtList, includeNewFork=True) 注意,该功能是针对Counting模式,因为Sampling和SPE Sampling本身就会采集子线程的数据。 ### 采集DDRC带宽 -鲲鹏上提供了DDRC的pmu设备,用于采集DDR的性能数据,比如带宽等。libkperf提供了API,用于获取每个numa的DDR带宽数据。 +鲲鹏上提供了DDRC的pmu设备,用于采集DDR的性能数据,比如带宽等。libkperf提供了API,用于获取每个channel的DDR带宽数据。 参考代码: ```c++ @@ -620,15 +620,17 @@ PmuData *oriData = nullptr; int oriLen = PmuRead(pd, &oriData); PmuDeviceData *devData = nullptr; auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); -// 对于4个numa的服务器,devData的长度为8.前4个是读带宽,后4个是写带宽。 -for (int i=0;i<4;++i) { - // numaId表示数据对应的numa节点。 +// devData的长度为2 * n (总通道数)。前n个是读带宽,后n个是写带宽。 +for (int i = 0; i < len / 2; ++i) { + // socketId表示数据对应的socket节点。 + // ddrNumaId表示数据对应的numa节点。 + // channelID表示数据对应的通道ID。 // count是距离上次采集的DDR总读/写包长,单位是Byte, // 需要除以时间间隔得到带宽(这里的时间间隔是1秒)。 - cout << "read bandwidth(" << devData[i].numaId << "): " << devData[i].count/1024/1024 << "M/s\n"; + cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } -for (int i=4;i<8;++i) { - cout << "write bandwidth(" << devData[i].numaId << "): " << devData[i].count/1024/1024 << "M/s\n"; +for (int i = len / 2; i < len; ++i) { + cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } DevDataFree(devData); PmuDataFree(oriData); @@ -649,9 +651,9 @@ ori_data = kperf.read(pd) dev_data = kperf.get_device_metric(ori_data, dev_attr) for data in dev_data.iter: if data.metric == kperf.PmuDeviceMetric.PMU_DDR_READ_BW: - print(f"read bandwidth({data.numaId}): {data.count/1024/1024} M/s") + print(f"read bandwidth(Socket: {data.socketId} Numa: {data.ddrNumaId} Channel: {data.channelId}): {data.count/1024/1024} M/s") if data.metric == kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW: - print(f"write bandwidth({data.numaId}): {data.count/1024/1024} M/s") + print(f"write bandwidth(Socket: {data.socketId} Numa: {data.ddrNumaId} Channel: {data.channelId}): {data.count/1024/1024} M/s") ``` ```go @@ -665,10 +667,10 @@ dataVo, _ := kperf.PmuRead(fd) deivceDataVo, _ := kperf.PmuGetDevMetric(dataVo, deviceAttrs) for _, v := range deivceDataVo.GoDeviceData { if v.Metric == kperf.PMU_DDR_READ_BW { - fmt.Printf("read bandwidth(%v): %v M/s\n", v.NumaId, v.Count/1024/1024) + fmt.Printf("read bandwidth(Socket: %v Numa: %v Channel: %v): %v M/s\n", v.SocketId, v.DdrNumaId, v.ChannelId, v.Count/1024/1024) } if v.Metric == kperf.PMU_DDR_WRITE_BW { - fmt.Printf("write bandwidth(%v): %v M/s\n", v.NumaId, v.Count/1024/1024) + fmt.Printf("write bandwidth(Socket: %v Numa: %v Channel: %v): %v M/s\n", v.SocketId, v.DdrNumaId, v.ChannelId, v.Count/1024/1024) } } kperf.DevDataFree(deivceDataVo) @@ -678,14 +680,23 @@ kperf.PmuClose(fd) 执行上述代码,输出的结果类似如下: ``` -read bandwidth(0): 17.32 M/s -read bandwidth(1): 5.43 M/s -read bandwidth(2): 2.83 M/s -read bandwidth(3): 4.09 M/s -write bandwidth(0): 4.35 M/s -write bandwidth(1): 2.29 M/s -write bandwidth(2): 0.84 M/s -write bandwidth(3): 0.97 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 0): 6.08 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 1): 5.66 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 2): 6.23 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 3): 5.30 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 4): 4.21 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 5): 4.06 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 6): 3.99 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 7): 3.89 M/s +... +write bandwidth(Socket: 1 Numa: 2 Channel: 1): 1.49 M/s +write bandwidth(Socket: 1 Numa: 2 Channel: 2): 1.44 M/s +write bandwidth(Socket: 1 Numa: 2 Channel: 3): 1.39 M/s +write bandwidth(Socket: 1 Numa: 2 Channel: 4): 1.22 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 4): 1.44 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 5): 1.43 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 6): 1.40 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 7): 1.38 M/s ``` ### 采集L3 cache的时延 diff --git a/docs/Go_API.md b/docs/Go_API.md index de71c2f..6359685 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -370,14 +370,20 @@ func PmuGetDevMetric(dataVo PmuDataVo, deviceAttr []PmuDeviceAttr) (PmuDeviceDat * []PmuDeviceAttr: 指定需要聚合的指标参数 * typ PmuDeviceDataVo struct: * GoDeviceData []PmuDeviceData +* type DdrDataStructure struct { + ChannelId uint32 ddr数据的channel编号 + DdrNumaId uint32 ddr数据的numa编号 + SocketId uint32 ddr数据的socket编号 + } * type PmuDeviceData struct: * Metric C.enum_PmuDeviceMetric 采集的指标 * Count float64 指标的计数值 - * Mode C.enum_PmuMetricMode 指标的采集类型,按core、按numa还是按bdf号 + * Mode C.enum_PmuMetricMode 指标的采集类型,按core、按numa、按channel还是按bdf号 * CoreId uint32 数据的core编号 * NumaId uint32 数据的numa编号 * ClusterId uint32 簇ID * Bdf string 数据的bdf编号 + * DdrDataStructure ddr相关的统计数据 ### kperf.DevDataFree diff --git a/docs/Python_API.md b/docs/Python_API.md index 1ed8766..3234f77 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -365,14 +365,19 @@ kperf.get_device_metric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) 对 * len: 数据长度 * iter: 返回iterator[ImplPmuDeviceData] * free: 释放当前PmuDeviceData +* class DdrDataStructure: + * channelId: ddr数据的channel编号 + * ddrNumaId: ddr数据的numa编号 + * socketId: ddr数据的socket编号 * class ImplPmuDeviceData: * metric: 采集的指标 * count:指标的计数值 - * mode: 指标的采集类型,按core、按numa还是按bdf号 + * mode: 指标的采集类型,按core、按numa、按channel还是按bdf号 * union: * coreId: 数据的core编号 * numaId: 数据的numa编号 * bdf: 数据的bdf编号 + * DdrDataStructure: ddr相关的统计数据 ### kperf.device_bdf_list -- Gitee From c71a9dd5f8eabea85a4e55cae22b5027abbbb064 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Wed, 21 May 2025 14:38:19 +0800 Subject: [PATCH 11/48] =?UTF-8?q?uncore=E4=BA=8B=E4=BB=B6umask=E8=B4=9F?= =?UTF-8?q?=E6=95=B0=E6=83=85=E5=86=B5=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/cpu_freq.cpp | 4 ++-- pmu/cpu_freq.h | 4 ++-- pmu/pfm/uncore.cpp | 4 ++++ util/pcerr.cpp | 5 +++++ 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pmu/cpu_freq.cpp b/pmu/cpu_freq.cpp index 2d2b033..6f5a031 100644 --- a/pmu/cpu_freq.cpp +++ b/pmu/cpu_freq.cpp @@ -90,7 +90,7 @@ int CpuFreqManager::InitCpuFreqSampling(unsigned period) { return -1; } - this->sleepPeriod = static_cast(period) / 1000; + this->sleepPeriod = period * 1000; this->cpuFreqThread = std::thread([this]() { while (!isEnd) { if (!isEnable) { @@ -109,7 +109,7 @@ int CpuFreqManager::InitCpuFreqSampling(unsigned period) { this->freqListMap.insert({cpu, freqList}); } } - sleep(this->sleepPeriod); + usleep(this->sleepPeriod); } }); hasInit = true; diff --git a/pmu/cpu_freq.h b/pmu/cpu_freq.h index e6aaf96..0e23e44 100644 --- a/pmu/cpu_freq.h +++ b/pmu/cpu_freq.h @@ -25,7 +25,7 @@ class CpuFreqManager { public: - CpuFreqManager(): isEnable(false), isEnd(false), sleepPeriod(0.1) {}; + CpuFreqManager(): isEnable(false), isEnd(false), sleepPeriod(100) {}; ~CpuFreqManager() { std::lock_guard lock(initMutex); if(!hasInit) { @@ -55,7 +55,7 @@ private: std::thread cpuFreqThread; volatile bool isEnable; volatile bool isEnd; - double sleepPeriod; + unsigned int sleepPeriod; std::map> freqListMap; int CheckCpuFreqIsExist(); diff --git a/pmu/pfm/uncore.cpp b/pmu/pfm/uncore.cpp index e6fca1a..f02e079 100644 --- a/pmu/pfm/uncore.cpp +++ b/pmu/pfm/uncore.cpp @@ -61,6 +61,10 @@ static std::vector GetCpuMask(const string &devName) char maskStr[1024]; maskIn >> maskStr; + if (maskStr[0] == '0' || maskStr[0] == '-') { + return maskList; + } + char *tokStr = strtok(maskStr, ","); while (tokStr != nullptr) { if (strstr(tokStr, "-") != nullptr) { diff --git a/util/pcerr.cpp b/util/pcerr.cpp index 7d70a3e..3efc851 100644 --- a/util/pcerr.cpp +++ b/util/pcerr.cpp @@ -14,6 +14,7 @@ ******************************************************************************/ #include #include +#include #include "pcerrc.h" #include "pcerr.h" @@ -64,6 +65,8 @@ namespace pcerr { static std::string warnMsg = ""; static int errCode = SUCCESS; static std::string errMsg = ""; + static std::mutex errMutex; + static std::mutex warnMutex; static std::string GetCustomMsg(int code) { std::string msg; @@ -92,6 +95,7 @@ namespace pcerr { void New(int code, const std::string& msg) { + std::lock_guard lock(errMutex); errCode = code; errMsg = msg; } @@ -108,6 +112,7 @@ namespace pcerr { void SetWarn(int code, const std::string& msg) { + std::lock_guard lock(warnMutex); warnCode = code; warnMsg = msg; } -- Gitee From c652a50270c9a01b6044a2c24719af8ea733c55b Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Thu, 22 May 2025 14:50:35 +0800 Subject: [PATCH 12/48] corrected information and spelling errors --- README.en.md | 2 +- README.md | 2 +- docs/Details_Usage.md | 4 ++-- docs/Go_API.md | 4 ++-- docs/Python_API.md | 8 ++++---- pmu/pmu.cpp | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.en.md b/README.en.md index e1f8ee9..dbc30fd 100644 --- a/README.en.md +++ b/README.en.md @@ -56,7 +56,7 @@ Minimum required GCC version: Minimum required Python version: -- python-3.7. +- python-3.6. To build a library with C API: diff --git a/README.md b/README.md index 007a586..b3e124a 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ v1.0: - gcc-4.8.5 和 glibc-2.17 最低依赖python版本: -- python-3.7 +- python-3.6 编译生成动态库和C的API: ```shell diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 58f9a0f..fe89cc6 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -41,7 +41,7 @@ func main() { attr := kperf.PmuAttr{EvtList:[]string{"cycles", "branch-misses"}} pd, err := kperf.PmuOpen(kperf.COUNT, attr) if err != nil { - fmt.Printf("kperf pmuopen couting failed, expect err is nil, but is %v", err) + fmt.Printf("kperf pmuopen counting failed, expect err is nil, but is %v", err) return } } @@ -331,7 +331,7 @@ func main() { attr := kperf.PmuAttr{EvtList:evtList} pd, err := kperf.PmuOpen(kperf.COUNT, attr) if err != nil { - fmt.Printf("kperf pmuopen couting failed, expect err is nil, but is %v\n", err) + fmt.Printf("kperf pmuopen counting failed, expect err is nil, but is %v\n", err) return } } diff --git a/docs/Go_API.md b/docs/Go_API.md index 6359685..26ae464 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -335,8 +335,8 @@ func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事 * type PmuDeviceAttr struct: * Metic: 指定需要采集的指标 - * PMU_DDR_READ_BW 采集每个numa的ddrc的读带宽,单位:Bytes - * PMU_DDR_WRITE_BW 采集每个numa的ddrc的写带宽,单位:Bytes + * PMU_DDR_READ_BW 采集每个channel的ddrc的读带宽,单位:Bytes + * PMU_DDR_WRITE_BW 采集每个channel的ddrc的写带宽,单位:Bytes * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count diff --git a/docs/Python_API.md b/docs/Python_API.md index 3234f77..ddf8d38 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -5,7 +5,7 @@ kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) * class PmuTaskType - * COUTING PMU计数模式 + * COUNTING PMU计数模式 * SAMPLING PMU采样模式 * SPE_SAMPLING SPE采样模式 * class PmuAttr @@ -51,7 +51,7 @@ kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) * SPE_EVENT_MISPREDICTED = 0x80 # mispredict * minLatency 仅收集该latency或者更高的样本数据 * includeNewFork - 是否支持子线程拆分,仅在COUTING模式中支持 + 是否支持子线程拆分,仅在COUNTING模式中支持 * branchSampleFilter * KPERF_NO_BRANCH_SAMPLE = 0 不采集branch sample stack数据 * KPERF_SAMPLE_BRANCH_USER = 1 << 0 分支目标位于用户空间 @@ -330,8 +330,8 @@ for func_name in kperf.sys_call_func_list(): kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指标的能力 * class PmuDeviceAttr: * metic: 指定需要采集的指标 - * PMU_DDR_READ_BW 采集每个numa的ddrc的读带宽,单位:Bytes - * PMU_DDR_WRITE_BW 采集每个numa的ddrc的写带宽,单位:Bytes + * PMU_DDR_READ_BW 采集每个channel的ddrc的读带宽,单位:Bytes + * PMU_DDR_WRITE_BW 采集每个channel的ddrc的写带宽,单位:Bytes * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index c033dfc..4d55b86 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -176,7 +176,7 @@ static int CheckCollectTypeConfig(enum PmuTaskType collectType, struct PmuAttr * } #ifdef IS_X86 if (collectType != COUNTING && collectType != SAMPLING) { - New(LIBPERF_ERR_INVALID_TASK_TYPE, "The x86 architecture supports only the COUTING mode and SMAPLING mode"); + New(LIBPERF_ERR_INVALID_TASK_TYPE, "The x86 architecture supports only the COUNTING mode and SMAPLING mode"); return LIBPERF_ERR_INVALID_TASK_TYPE; } #endif -- Gitee From db9ffd6a34e3de760e04bc3a963c44cb124cd582 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Thu, 22 May 2025 17:30:46 +0800 Subject: [PATCH 13/48] =?UTF-8?q?=E5=8E=BB=E9=99=A4umask=3D0=E6=97=B6?= =?UTF-8?q?=E7=9A=84=E5=88=A4=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/pfm/uncore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pmu/pfm/uncore.cpp b/pmu/pfm/uncore.cpp index f02e079..6ae72cc 100644 --- a/pmu/pfm/uncore.cpp +++ b/pmu/pfm/uncore.cpp @@ -61,7 +61,7 @@ static std::vector GetCpuMask(const string &devName) char maskStr[1024]; maskIn >> maskStr; - if (maskStr[0] == '0' || maskStr[0] == '-') { + if (maskStr[0] == '-') { return maskList; } -- Gitee From 270164ae37f48a818c8a076bd22f959646949de4 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 23 May 2025 11:10:46 +0800 Subject: [PATCH 14/48] =?UTF-8?q?=E5=A2=9E=E5=8A=A0tracepointer=E5=88=A4?= =?UTF-8?q?=E6=96=AD,=E5=87=8F=E5=B0=91=E6=96=87=E4=BB=B6=E5=88=A4?= =?UTF-8?q?=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/pmu.cpp | 6 +++--- pmu/pmu_list.cpp | 6 +++--- pmu/sampler.cpp | 7 +++++-- ...pointer_parser.cpp => trace_point_parser.cpp} | 16 ++++++++-------- ...ace_pointer_parser.h => trace_point_parser.h} | 12 ++++++------ 5 files changed, 25 insertions(+), 22 deletions(-) rename pmu/{trace_pointer_parser.cpp => trace_point_parser.cpp} (94%) rename pmu/{trace_pointer_parser.h => trace_point_parser.h} (89%) diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 4d55b86..1fd80ea 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -27,7 +27,7 @@ #include "pcerr.h" #include "safe_handler.h" #include "pmu_metric.h" -#include "trace_pointer_parser.h" +#include "trace_point_parser.h" #include "pmu.h" using namespace pcerr; @@ -972,7 +972,7 @@ int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *valu New(LIBPERF_ERR_INVALID_FIELD_ARGS, "rawData cannot be nullptr."); return LIBPERF_ERR_INVALID_FIELD_ARGS; } - return PointerPasser::ParsePointer(rawData->data, fieldName, value, vSize); + return TraceParser::ParseTraceData(rawData->data, fieldName, value, vSize); #endif } @@ -986,7 +986,7 @@ struct SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char return nullptr; } - SampleRawField *rt = PointerPasser::GetSampleRawField(rawData->data, fieldName); + SampleRawField *rt = TraceParser::GetSampleRawField(rawData->data, fieldName); if (rt) { New(SUCCESS); } diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 360b875..c0470c0 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -23,7 +23,7 @@ #include "pcerr.h" #include "util_time.h" #include "log.h" -#include "trace_pointer_parser.h" +#include "trace_point_parser.h" #include "pmu_event_list.h" #include "pmu_list.h" #include "pfm_event.h" @@ -457,7 +457,7 @@ namespace KUNPENG_PMU { EraseParentEventMap(pd); SymResolverDestroy(); PmuEventListFree(); - PointerPasser::FreeRawFieldMap(); + TraceParser::FreeRawFieldMap(); } int PmuList::NewPd() @@ -757,7 +757,7 @@ namespace KUNPENG_PMU { for (auto pd: findData->second.data) { if (pd.rawData != nullptr) { - PointerPasser::FreePointerData(pd.rawData->data); + TraceParser::FreeTraceData(pd.rawData->data); free(pd.rawData); pd.rawData = nullptr; } diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index 6cfd684..a12e709 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -31,7 +31,8 @@ #include "process_map.h" #include "log.h" #include "sampler.h" -#include "trace_pointer_parser.h" +#include "pfm_event.h" +#include "trace_point_parser.h" #include "common.h" using namespace std; @@ -239,7 +240,9 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( current->tid = static_cast(sample->tid); current->period = static_cast(sample->period); current->ts = static_cast(sample->time); - PointerPasser::ParserRawFormatData(current, sample, event, this->evt->name); + if (this->evt->pmuType == TRACE_TYPE) { + TraceParser::ParserRawFormatData(current, sample, event, this->evt->name); + } ParseBranchSampleData(current, sample, event, extPool); } diff --git a/pmu/trace_pointer_parser.cpp b/pmu/trace_point_parser.cpp similarity index 94% rename from pmu/trace_pointer_parser.cpp rename to pmu/trace_point_parser.cpp index 0e84fda..3e2b79b 100644 --- a/pmu/trace_pointer_parser.cpp +++ b/pmu/trace_point_parser.cpp @@ -13,7 +13,7 @@ * Description: Provides the capability of parsing pointer events. ******************************************************************************/ -#include "trace_pointer_parser.h" +#include "trace_point_parser.h" using namespace KUNPENG_PMU; using namespace pcerr; @@ -41,7 +41,7 @@ static std::string GetFormatRealPath(const std::string &evtName) { return GetRealPath(formatPath); } -bool PointerPasser::IsNeedFormat(std::ifstream &file, const std::string &evtName) { +bool TraceParser::IsNeedFormat(std::ifstream &file, const std::string &evtName) { std::string realPath; if (formatMap.find(evtName) != formatMap.end()) { realPath = formatMap.at(evtName); @@ -90,7 +90,7 @@ void ParseFormatFile(ifstream &file, const std::string &evtName) { efMap.insert({evtName, fnMap}); } -void PointerPasser::ParserRawFormatData(struct PmuData *pd, KUNPENG_PMU::PerfRawSample *sample, +void TraceParser::ParserRawFormatData(struct PmuData *pd, KUNPENG_PMU::PerfRawSample *sample, union KUNPENG_PMU::PerfEvent *event, const std::string &evtName) { ifstream file; @@ -171,7 +171,7 @@ int CheckFieldArgs(char *data, const string &fieldName, T *value, uint32_t vSize } template -int PointerPasser::ParseField(char *data, const std::string &fieldName, T *value, uint32_t vSize) { +int TraceParser::ParseField(char *data, const std::string &fieldName, T *value, uint32_t vSize) { int rt = CheckFieldArgs(data, fieldName, value, vSize); if (rt != SUCCESS) { return rt; @@ -201,12 +201,12 @@ int PointerPasser::ParseField(char *data, const std::string &fieldName, T *value return SUCCESS; } -int PointerPasser::ParsePointer(char *data, const std::string &fieldName, void *value, +int TraceParser::ParseTraceData(char *data, const std::string &fieldName, void *value, uint32_t vSize) { return ParseField(data, fieldName, value, vSize); } -void PointerPasser::FreePointerData(char *data) { +void TraceParser::FreeTraceData(char *data) { if (data == nullptr) { return; } @@ -217,7 +217,7 @@ void PointerPasser::FreePointerData(char *data) { data = nullptr; } -SampleRawField *PointerPasser::GetSampleRawField(char *data, const std::string &fieldName) { +SampleRawField *TraceParser::GetSampleRawField(char *data, const std::string &fieldName) { int ret = CheckFieldArgs(data, fieldName); if (ret != SUCCESS) { return nullptr; @@ -238,7 +238,7 @@ SampleRawField *PointerPasser::GetSampleRawField(char *data, const std::string & return fsrMap.at(field); } -void PointerPasser::FreeRawFieldMap() { +void TraceParser::FreeRawFieldMap() { for (auto it = fsrMap.begin(); it != fsrMap.end(); ++it) { if (!it->second) { continue; diff --git a/pmu/trace_pointer_parser.h b/pmu/trace_point_parser.h similarity index 89% rename from pmu/trace_pointer_parser.h rename to pmu/trace_point_parser.h index 701836f..a101d31 100644 --- a/pmu/trace_pointer_parser.h +++ b/pmu/trace_point_parser.h @@ -12,8 +12,8 @@ * Create: 2024-07-04 * Description: Provides the capability of parsing pointer events. ******************************************************************************/ -#ifndef LIBKPERF_TRACE_POINTER_PARSER_H -#define LIBKPERF_TRACE_POINTER_PARSER_H +#ifndef LIBKPERF_TRACE_POINT_PARSER_H +#define LIBKPERF_TRACE_POINT_PARSER_H #include #include @@ -46,7 +46,7 @@ namespace KUNPENG_PMU { } }; - class PointerPasser { + class TraceParser { public: /** * @brief determine whether the event is a pointer event. @@ -68,12 +68,12 @@ namespace KUNPENG_PMU { /** * @brief the method of parsing field. */ - static int ParsePointer(char *data, const string &fieldName, void *value, uint32_t vSize); + static int ParseTraceData(char *data, const string &fieldName, void *value, uint32_t vSize); /** * @brief free the data. */ - static void FreePointerData(char *data); + static void FreeTraceData(char *data); /** * @brief get the field named fieldName of this event. @@ -89,4 +89,4 @@ namespace KUNPENG_PMU { } -#endif //LIBKPERF_TRACE_POINTER_PARSER_H +#endif //LIBKPERF_TRACE_POINT_PARSER_H -- Gitee From b3a75a866d95646047d90036a25cad20a6e9da4e Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 26 May 2025 09:43:17 +0800 Subject: [PATCH 15/48] corrected spelling errors --- docs/Python_API.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Python_API.md b/docs/Python_API.md index ddf8d38..de323e9 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -329,7 +329,7 @@ for func_name in kperf.sys_call_func_list(): kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指标的能力 * class PmuDeviceAttr: - * metic: 指定需要采集的指标 + * metric: 指定需要采集的指标 * PMU_DDR_READ_BW 采集每个channel的ddrc的读带宽,单位:Bytes * PMU_DDR_WRITE_BW 采集每个channel的ddrc的写带宽,单位:Bytes * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes -- Gitee From 1901d106cd3651a744c3bfbfb2c3adfff16ade23 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 26 May 2025 11:00:13 +0800 Subject: [PATCH 16/48] collected spelling error --- docs/Go_API.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Go_API.md b/docs/Go_API.md index 26ae464..7199142 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -334,7 +334,7 @@ func main() { func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事件指标的能力 * type PmuDeviceAttr struct: - * Metic: 指定需要采集的指标 + * Metric: 指定需要采集的指标 * PMU_DDR_READ_BW 采集每个channel的ddrc的读带宽,单位:Bytes * PMU_DDR_WRITE_BW 采集每个channel的ddrc的写带宽,单位:Bytes * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes -- Gitee From ea62ba980b09d634b139b63353130537d55bf440 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Mon, 26 May 2025 16:39:35 +0800 Subject: [PATCH 17/48] =?UTF-8?q?unknow=20error=20=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E4=BF=A1=E6=81=AF=E8=AE=BE=E7=BD=AE=E4=BB=A5=E5=8F=8ApmuAppend?= =?UTF-8?q?Data=20=E6=9F=A5=E8=AF=A2=E5=88=A4=E6=96=AD=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/evt_list.cpp | 7 ++++++- pmu/pmu_list.cpp | 2 +- pmu/pmu_metric.cpp | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pmu/evt_list.cpp b/pmu/evt_list.cpp index 2af3e2f..bee7fa8 100644 --- a/pmu/evt_list.cpp +++ b/pmu/evt_list.cpp @@ -97,10 +97,10 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrIsMainPid()) { continue; } + if (err == LIBPERF_ERR_INVALID_EVENT) { if (branchSampleFilter != KPERF_NO_BRANCH_SAMPLE) { pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", PMU Hardware or event type doesn't support branch stack sampling"); @@ -108,6 +108,11 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrGetEvtName() + ", " + std::string{strerror(errno)}); } } + + if (err == UNKNOWN_ERROR) { + pcerr::SetCustomErr(err, std::string{strerror(errno)}); + } + return err; } fdList.insert(perfEvt->GetFd()); diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index c0470c0..31cc35e 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -413,7 +413,7 @@ namespace KUNPENG_PMU { } auto findToData = userDataList.find(*toData); - if (findFromData == userDataList.end()) { + if (findToData == userDataList.end()) { return LIBPERF_ERR_INVALID_PMU_DATA; } // For non-null target data list, append source list to end of target vector. diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 8fc5d12..d196c2f 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -1565,7 +1565,7 @@ int64_t PmuGetCpuFreq(unsigned core) cpuPath << SYS_CPU_INFO_PATH << core << "/cpufreq/scaling_cur_freq"; if (!ExistPath(cpuPath.str())) { - New(LIBPERF_ERR_CPUFREQ_NOT_CONFIG, "Kernel not config cpuFreq Or core exceed cpuNums. Not exist " + cpuPath.str()); + New(LIBPERF_ERR_CPUFREQ_NOT_CONFIG, "Kernel not config cpuFreq or core exceed cpuNums. Not exist " + cpuPath.str()); return -1; } std::string curFreqStr = ReadFileContent(cpuPath.str()); -- Gitee From 58ddf7013f6bd1a787ff62a7a11d4da2be999cdd Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Fri, 23 May 2025 16:16:59 +0800 Subject: [PATCH 18/48] the logic of symbol parsing is optimized for NO_SYMBOL_RESOLVE --- include/pcerrc.h | 1 + include/pmu.h | 8 ++++++ pmu/pmu.cpp | 5 ++++ pmu/pmu_list.cpp | 39 +++++++++++++++++++++----- pmu/pmu_list.h | 1 + pmu/sampler.cpp | 26 ++++++++---------- pmu/spe.cpp | 4 +-- python/modules/kperf/perror.py | 1 + symbol/symbol_resolve.cpp | 50 +++++++++++++++------------------- 9 files changed, 84 insertions(+), 51 deletions(-) diff --git a/include/pcerrc.h b/include/pcerrc.h index 259befa..909ae4e 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -109,6 +109,7 @@ extern "C" { #define LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 1065 #define LIBPERF_ERR_NOT_SUPPORT_METRIC 1066 #define LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD 1067 +#define LIBPERF_ERR_PMU_DATA_NO_FOUND 1068 #define UNKNOWN_ERROR 9999 diff --git a/include/pmu.h b/include/pmu.h index 5ec1726..9d5e81b 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -352,6 +352,14 @@ void PmuStop(int pd); */ int PmuRead(int pd, struct PmuData** pmuData); +/** +* @brief +* When symbol mode is SNO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function +* @param pmuData the data from PmuRead +* @return 0 indicates resolve success, otherwise return error code +*/ +int ResolvePmuDataSymbol(struct PmuData* pmuData); + /** * @brief * Append data list to another data list <*toData>. diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 1fd80ea..b8a7a87 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -769,6 +769,11 @@ int PmuRead(int pd, struct PmuData** pmuData) } } +int ResolvePmuDataSymbol(struct PmuData* pmuData) +{ + return PmuList::GetInstance()->ResolvePmuDataSymbol(pmuData); +} + void PmuClose(int pd) { SetWarn(SUCCESS); diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 31cc35e..2e6c4c3 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -418,7 +418,9 @@ namespace KUNPENG_PMU { } // For non-null target data list, append source list to end of target vector. auto& dataVec = findToData->second.data; + auto& ipsVec = findToData->second.sampleIps; dataVec.insert(dataVec.end(), findFromData->second.data.begin(), findFromData->second.data.end()); + ipsVec.insert(ipsVec.end(), findFromData->second.sampleIps.begin(), findFromData->second.sampleIps.end()); len = dataVec.size(); if (*toData != dataVec.data()) { @@ -625,9 +627,6 @@ namespace KUNPENG_PMU { void PmuList::FillStackInfo(EventData& eventData) { auto symMode = symModeList[eventData.pd]; - if (symMode == NO_SYMBOL_RESOLVE) { - return; - } // Parse dwarf and elf info of each pid and get stack trace for each pmu data. for (size_t i = 0; i < eventData.data.size(); ++i) { auto& pmuData = eventData.data[i]; @@ -636,15 +635,44 @@ namespace KUNPENG_PMU { SymResolverRecordModuleNoDwarf(pmuData.pid); } else if (symMode == RESOLVE_ELF_DWARF) { SymResolverRecordModule(pmuData.pid); + } else if (symMode == NO_SYMBOL_RESOLVE) { + SymResolverRecordModule(pmuData.pid); + continue; } else { continue; } + if (pmuData.stack == nullptr) { pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); } } } + int PmuList::ResolvePmuDataSymbol(struct PmuData* iPmuData) + { + if (iPmuData == nullptr) { + New(LIBPERF_ERR_INVALID_PMU_DATA, "ipmuData is nullptr"); + return LIBPERF_ERR_INVALID_PMU_DATA; + } + auto userData = userDataList.find(iPmuData); + if (userData == userDataList.end()) { + New(LIBPERF_ERR_PMU_DATA_NO_FOUND, "ipmuData isn't in userDataList"); + return LIBPERF_ERR_PMU_DATA_NO_FOUND; + } + + auto& eventData = userDataList[iPmuData]; + auto symMode = symModeList[eventData.pd]; + for (size_t i = 0; i < eventData.data.size(); ++i) { + auto& pmuData = eventData.data[i]; + auto& ipsData = eventData.sampleIps[i]; + if (pmuData.stack == nullptr) { + pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); + } + } + New(SUCCESS); + return SUCCESS; + } + void PmuList::AggregateData(const std::vector& evData, std::vector& newEvData) { // Acccumulate stat data in previous PmuCollect for convenient use. @@ -1025,9 +1053,6 @@ namespace KUNPENG_PMU { int PmuList::InitSymbolRecordModule(const unsigned pd, PmuTaskAttr* taskParam) { SymbolMode symMode = GetSymbolMode(pd); - if (symMode == NO_SYMBOL_RESOLVE) { - return SUCCESS; - } if (taskParam->pmuEvt->collectType == COUNTING) { return SUCCESS; @@ -1053,7 +1078,7 @@ namespace KUNPENG_PMU { } } - if (this->symModeList[pd] == RESOLVE_ELF_DWARF) { + if (this->symModeList[pd] == RESOLVE_ELF_DWARF || this->symModeList[pd] == NO_SYMBOL_RESOLVE) { for (const auto& pid: pidList) { int rt = SymResolverRecordModule(pid); if (rt != SUCCESS) { diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h index b44ff55..523e2ad 100644 --- a/pmu/pmu_list.h +++ b/pmu/pmu_list.h @@ -76,6 +76,7 @@ public: void StoreSplitData(unsigned pd, std::pair& previousEventList, std::unordered_map& eventSplitMap); bool IsAllPidExit(const unsigned pd); + int ResolvePmuDataSymbol(struct PmuData* iPmuData); private: using ProcPtr = std::shared_ptr; diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index a12e709..cab1556 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -217,22 +217,20 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( return; } KUNPENG_PMU::PerfRawSample *sample = (KUNPENG_PMU::PerfRawSample *)event->sample.array; - if (symMode != NO_SYMBOL_RESOLVE) { - // Copy ips from ring buffer and get stack info later. - if (evt->callStack == 0) { - int i = 0; - while (i < sample->nr && !IsValidIp(sample->ips[i])) { - i++; - } - if (i < sample->nr) { + // Copy ips from ring buffer and get stack info later. + if (evt->callStack == 0) { + int i = 0; + while (i < sample->nr && !IsValidIp(sample->ips[i])) { + i++; + } + if (i < sample->nr) { + ips->ips.push_back(sample->ips[i]); + } + } else { + for (int i = sample->nr - 1; i >= 0; --i) { + if (IsValidIp(sample->ips[i])) { ips->ips.push_back(sample->ips[i]); } - } else { - for (int i = sample->nr - 1; i >= 0; --i) { - if (IsValidIp(sample->ips[i])) { - ips->ips.push_back(sample->ips[i]); - } - } } } current->cpu = sample->cpu; diff --git a/pmu/spe.cpp b/pmu/spe.cpp index 0d1c7d9..2bc42c4 100644 --- a/pmu/spe.cpp +++ b/pmu/spe.cpp @@ -359,9 +359,9 @@ void Spe::CoreDummyData(struct SpeCoreContext *context, struct ContextSwitchData uint64_t off = dataTail % mpage->data_size; struct perf_event_header *header = (struct perf_event_header *)(ringBuf + off); - if (header->type == PERF_RECORD_MMAP && symbolMode != NO_SYMBOL_RESOLVE) { + if (header->type == PERF_RECORD_MMAP) { struct PerfRecordMmap *sample = (struct PerfRecordMmap *)header; - if (symbolMode == RESOLVE_ELF_DWARF) { + if (symbolMode == RESOLVE_ELF_DWARF || symbolMode == NO_SYMBOL_RESOLVE) { int ret = SymResolverUpdateModule(sample->tid, sample->filename, sample->addr); if (ret != SUCCESS) { // if the module fails to be updated, a warning is recorded to overwrite the failure error code. diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index 4ba96b2..20776de 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -108,6 +108,7 @@ class Error: LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 =1065 LIBPERF_ERR_NOT_SUPPORT_METRIC = 1066 LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD = 1067 + LIBPERF_ERR_PMU_DATA_NO_FOUND = 1068 UNKNOWN_ERROR = 9999 diff --git a/symbol/symbol_resolve.cpp b/symbol/symbol_resolve.cpp index 46f6ad5..2f971f4 100644 --- a/symbol/symbol_resolve.cpp +++ b/symbol/symbol_resolve.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include "name_resolve.h" #include "pcerr.h" #include "symbol_resolve.h" @@ -70,16 +71,15 @@ namespace { flag = false; } - static inline bool CheckIfFile(std::string mapline) + static inline bool CheckIfFile(const std::string& mapline) { - return (!((mapline.find(HUGEPAGE) != std::string::npos) || (mapline.find(DEV_ZERO) != std::string::npos) || - (mapline.find(ANON) != std::string::npos) || (mapline.find(STACK) != std::string::npos) || - (mapline.find(SOCKET) != std::string::npos) || (mapline.find(VSYSCALL) != std::string::npos) || - (mapline.find(HEAP) != std::string::npos) || (mapline.find(VDSO) != std::string::npos) || - (mapline.find(SYSV) != std::string::npos) || (mapline.find(VVAR) != std::string::npos)) && - (mapline.find(R_XP) != std::string::npos)) - ? true - : false; + const std::vector patterns = {HUGEPAGE, DEV_ZERO, ANON, STACK, SOCKET, VSYSCALL, HEAP ,VDSO, SYSV, VVAR}; + for (const auto& pattern :patterns) { + if (mapline.find(pattern) != std::string::npos) { + return false; + } + } + return mapline.find(R_XP) != std::string::npos; } static inline char* InitChar(int len) @@ -398,7 +398,7 @@ bool MyElf::IsExecFile() void MyElf::Emplace(unsigned long addr, const ELF_SYM& elfSym) { - this->symTab.insert({addr, elfSym}); + this->symTab.emplace(addr, elfSym); } ELF_SYM* MyElf::FindSymbol(unsigned long addr) @@ -548,15 +548,11 @@ int SymbolResolve::RecordModule(int pid, RecordModuleType recordModuleType) moduleSafeHandler.releaseLock(pid); return 0; } - char mapFile[MAP_LEN]; - if (snprintf(mapFile, MAP_LEN, "/proc/%d/maps", pid) < 0) { - moduleSafeHandler.releaseLock(pid); - return LIBSYM_ERR_SNPRINF_OPERATE_FAILED; - } + std::string mapFile = "/proc/" + std::to_string(pid) + "/maps"; std::ifstream file(mapFile); if (!file.is_open()) { pcerr::New(LIBSYM_ERR_OPEN_FILE_FAILED, - "libsym can't open file named " + std::string{mapFile} + " because of " + std::string{strerror(errno)}); + "libsym can't open file named " + mapFile + " because of " + std::string{strerror(errno)}); moduleSafeHandler.releaseLock(pid); return LIBSYM_ERR_OPEN_FILE_FAILED; } @@ -588,15 +584,11 @@ int SymbolResolve::UpdateModule(int pid, RecordModuleType recordModuleType) return SUCCESS; } // Get memory maps of pid. - char mapFile[MAP_LEN]; - if (snprintf(mapFile, MAP_LEN, "/proc/%d/maps", pid) < 0) { - moduleSafeHandler.releaseLock(pid); - return LIBSYM_ERR_SNPRINF_OPERATE_FAILED; - } + std::string mapFile = "/proc/" + std::to_string(pid) + "/maps"; std::ifstream file(mapFile); if (!file.is_open()) { pcerr::New(LIBSYM_ERR_OPEN_FILE_FAILED, - "libsym can't open file named " + std::string{mapFile} + " because of " + std::string{strerror(errno)}); + "libsym can't open file named " + mapFile + " because of " + std::string{strerror(errno)}); moduleSafeHandler.releaseLock(pid); return LIBSYM_ERR_OPEN_FILE_FAILED; } @@ -618,8 +610,8 @@ int SymbolResolve::UpdateModule(int pid, RecordModuleType recordModuleType) this->RecordDwarf(item->moduleName.c_str()); } } - for (auto mod : diffModVec) { - oldModVec.push_back(mod); + for (auto& mod : diffModVec) { + oldModVec.emplace_back(mod); } pcerr::New(SUCCESS); moduleSafeHandler.releaseLock(pid); @@ -1176,11 +1168,13 @@ std::vector> SymbolResolve::FindDiffMaps( const std::vector>& newMaps) const { std::vector> diffMaps; + std::set oldStarts; + for (const auto& oldMod : oldMaps) { + oldStarts.insert(oldMod->start); + } for (auto newMod : newMaps) { - for (auto oldMod : oldMaps) { - if (newMod->start != oldMod->start) { - diffMaps.push_back(newMod); - } + if (oldStarts.find(newMod->start) == oldStarts.end()) { + diffMaps.emplace_back(newMod); } } -- Gitee From 2ffa31823b8078ebd19e28747fd8259ff5dba188 Mon Sep 17 00:00:00 2001 From: glx Date: Wed, 28 May 2025 11:18:29 +0800 Subject: [PATCH 19/48] Fix compile error --- util/common.h | 1 + 1 file changed, 1 insertion(+) diff --git a/util/common.h b/util/common.h index 1c3bcb7..77b8a9b 100644 --- a/util/common.h +++ b/util/common.h @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef __x86_64__ #define IS_X86 1 -- Gitee From 8edaec1c1777ee1d1f907cb4c6652c025d7c29a5 Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Thu, 29 May 2025 10:35:58 +0800 Subject: [PATCH 20/48] =?UTF-8?q?libkperf=20=E6=94=AF=E6=8C=81=E7=BC=96?= =?UTF-8?q?=E8=AF=91=E9=9D=99=E6=80=81=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/pmu.h | 2 +- pmu/CMakeLists.txt | 3 +++ symbol/CMakeLists.txt | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/pmu.h b/include/pmu.h index 9d5e81b..1063cdb 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -354,7 +354,7 @@ int PmuRead(int pd, struct PmuData** pmuData); /** * @brief -* When symbol mode is SNO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function +* When symbol mode is NO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function * @param pmuData the data from PmuRead * @return 0 indicates resolve success, otherwise return error code */ diff --git a/pmu/CMakeLists.txt b/pmu/CMakeLists.txt index c68bfe0..4af6e76 100644 --- a/pmu/CMakeLists.txt +++ b/pmu/CMakeLists.txt @@ -31,8 +31,11 @@ include_directories(${SYMBOL_FILE_DIR}) include_directories(${PMU_DECODER_DIR}) ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC}) +ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC}) +set_target_properties(kperf_static PROPERTIES OUTPUT_NAME "kperf") target_link_libraries(kperf numa sym) target_compile_options(kperf PRIVATE -fPIC) install(TARGETS kperf DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) +install(TARGETS kperf_static DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) file(GLOB HEADER_FILES ${PROJECT_TOP_DIR}/include/*.h) install(FILES ${HEADER_FILES} DESTINATION ${CMAKE_INSTALL_PREFIX}/include) diff --git a/symbol/CMakeLists.txt b/symbol/CMakeLists.txt index 920d59e..aaa8988 100644 --- a/symbol/CMakeLists.txt +++ b/symbol/CMakeLists.txt @@ -16,6 +16,9 @@ include_directories(${INCLUDE_DIR}) message(${THIRD_PARTY}/elfin-parser/elf) ADD_LIBRARY(sym SHARED ${SYMBOL_SRC}) +ADD_LIBRARY(sym_static STATIC ${SYMBOL_SRC}) +set_target_properties(sym_static PROPERTIES OUTPUT_NAME "sym") target_link_libraries(sym elf_static dwarf_static pthread) install(TARGETS sym DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) +install(TARGETS sym_static DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) install(FILES ${SYMBOL_FILE_DIR}/symbol.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include) -- Gitee From 7e6e99a225e7c869fe9724b1447059223a656e26 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Thu, 29 May 2025 15:23:29 +0800 Subject: [PATCH 21/48] =?UTF-8?q?=E9=80=82=E9=85=8D=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E4=BF=A1=E6=81=AF,=E5=BD=93=E6=97=A0=E6=9D=83=E9=99=90?= =?UTF-8?q?=E6=97=B6=E6=8A=9B=E5=87=BA=E5=AF=B9=E5=BA=94=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/pmu.cpp | 33 +++++++++++++++++++++++++++++++++ pmu/pmu_event_list.cpp | 27 +++++---------------------- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index b8a7a87..cba1713 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -830,6 +830,24 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt } } +static bool PerfEventSupported(__u64 type, __u64 config) +{ + perf_event_attr attr{}; + memset(&attr, 0, sizeof(attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = type; + attr.config = config; + attr.disabled = 1; + attr.inherit = 1; + attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; + int fd = KUNPENG_PMU::PerfEventOpen(&attr, -1, 0, -1, 0); + if (fd < 0) { + return false; + } + close(fd); + return true; +} + static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int group_id) { unique_ptr taskParam(CreateNode(), PmuTaskAttrFree); @@ -851,6 +869,9 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att } else { pmuEvt = GetPmuEvent(evtName, collectType); if (pmuEvt == nullptr) { + if (Perrorno() != SUCCESS) { + return nullptr; + } #ifdef IS_X86 New(LIBPERF_ERR_INVALID_EVENT, "Invalid event: " + string(evtName) + ";x86 just supports core event and raw event"); #else @@ -858,6 +879,18 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att #endif return nullptr; } + + if (!PerfEventSupported(pmuEvt->type, pmuEvt->config)) { + int err = MapErrno(errno); + if (err == LIBPERF_ERR_NO_PERMISSION) { + New(LIBPERF_ERR_NO_PERMISSION, "Current user does not have the permission to collect the event.Swtich to the root user and run the 'echo -1 > /proc/sys/kernel/perf_event_paranoid'"); + } else if(err == UNKNOWN_ERROR) { + New(UNKNOWN_ERROR, std::string{strerror(errno)}); + } else { + New(err); + } + return nullptr; + } } /** * Assign cpus to collect diff --git a/pmu/pmu_event_list.cpp b/pmu/pmu_event_list.cpp index ab83ffd..547ccb2 100644 --- a/pmu/pmu_event_list.cpp +++ b/pmu/pmu_event_list.cpp @@ -101,24 +101,6 @@ static void GetTraceSubFolder(const std::string& traceFolder, const string& devN closedir(dir); } -static bool PerfEventSupported(__u64 type, __u64 config) -{ - perf_event_attr attr{}; - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(struct perf_event_attr); - attr.type = type; - attr.config = config; - attr.disabled = 1; - attr.inherit = 1; - attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; - int fd = KUNPENG_PMU::PerfEventOpen(&attr, -1, 0, -1, 0); - if (fd < 0) { - return false; - } - close(fd); - return true; -} - const char** QueryCoreEvent(unsigned *numEvt) { if (!coreEventList.empty()) { @@ -128,9 +110,6 @@ const char** QueryCoreEvent(unsigned *numEvt) auto coreEventMap = KUNPENG_PMU::CORE_EVENT_MAP.at(GetCpuType()); for (auto& pair : coreEventMap) { auto eventName = pair.first; - if (!PerfEventSupported(pair.second.type, pair.second.config)) { - continue; - } char* eventNameCopy = new char[eventName.length() + 1]; strcpy(eventNameCopy, eventName.c_str()); coreEventList.emplace_back(eventNameCopy); @@ -203,6 +182,11 @@ const char** QueryTraceEvent(unsigned *numEvt) struct dirent *entry; const string &traceFolder = GetTraceEventDir(); if (traceFolder.empty()) { + if (errno == EACCES) { + New(LIBPERF_ERR_NO_PERMISSION, "no permission to access '/sys/kernel/tracing/events/' or '/sys/kernel/debug/tracing/events/'"); + } else { + New(LIBPERF_ERR_INVALID_EVENT, "can't find '/sys/kernel/tracing/events/' or '/sys/kernel/debug/tracing/events/'"); + } return traceEventList.data(); } DIR *dir = opendir(traceFolder.c_str()); @@ -282,7 +266,6 @@ const char** PmuEventList(enum PmuEventType eventType, unsigned *numEvt) New(LIBPERF_ERR_QUERY_EVENT_LIST_FAILED, "Query event failed."); return nullptr; } - New(SUCCESS); return eventList; } -- Gitee From 34ea6897caf18c110cfc7bed740962bf40d4c6ff Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 30 May 2025 10:47:23 +0800 Subject: [PATCH 22/48] =?UTF-8?q?go=E9=80=82=E9=85=8DResolvePmuDataSymbol?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go/src/libkperf/kperf/kperf.go | 25 +++++++++++++++++++ go/src/libkperf_test/libkperf_test.go | 36 +++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 99fb52d..b21f304 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -728,6 +728,31 @@ func PmuDumpData(dataVo PmuDataVo, filePath string, dumpDwf bool) error { } return nil } + +// When symbol mode is SNO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function +// param PmuDataVo the data from PmuRead +// return nil indicates resolve success, otherwise return error code +func ResolvePmuDataSymbol(dataVo PmuDataVo) error { + err := C.ResolvePmuDataSymbol(dataVo.cData) + if int(err) != 0 { + return errors.New(C.GoString(C.Perror())) + } + dataLen := len(dataVo.GoData) + ptr := unsafe.Pointer(dataVo.cData) + slice := reflect.SliceHeader { + Data: uintptr(ptr), + Len: dataLen, + Cap: dataLen, + } + cPmuDatas := *(*[]C.struct_PmuData)(unsafe.Pointer(&slice)) + for i := 0; i < dataLen; i++ { + dataObj := cPmuDatas[i] + if dataObj.stack != nil { + dataVo.GoData[i].appendSymbols(dataObj) + } + } + return nil +} // Initialize the trace collection target // On success, a trace collect task id is returned which is the unique identity for the task diff --git a/go/src/libkperf_test/libkperf_test.go b/go/src/libkperf_test/libkperf_test.go index 28848fe..d06683a 100644 --- a/go/src/libkperf_test/libkperf_test.go +++ b/go/src/libkperf_test/libkperf_test.go @@ -287,3 +287,39 @@ func TestPmuGetCpuFreqDetail(t *testing.T) { kperf.PmuCloseCpuFreqSampling() } + +func TestResolvePmuDataSymbol(t *testing.T) { + attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} + fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) + if err != nil { + t.Fatalf("kperf pmuopen sample failed, expect err is nil, but is %v", err) + } + + kperf.PmuEnable(fd) + time.Sleep(time.Second) + kperf.PmuDisable(fd) + + dataVo, err := kperf.PmuRead(fd) + if err != nil { + t.Fatalf("kperf pmuread failed, expect err is nil, but is %v", err) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) != 0 { + t.Fatalf("expect symbol data is empty, but is not") + } + } + + parseErr := kperf.ResolvePmuDataSymbol(dataVo) + if parseErr != nil { + t.Fatalf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v", parseErr) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) == 0 { + t.Fatalf("expect symbol data is not empty, but is empty") + } + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(fd) +} \ No newline at end of file -- Gitee From e6c55dec1f3483bf934c51457f4b9ba3e9639233 Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Fri, 30 May 2025 10:16:28 +0800 Subject: [PATCH 23/48] =?UTF-8?q?=E6=8F=90=E4=BE=9BResolvePmuDataSymbol?= =?UTF-8?q?=E7=9A=84python=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/modules/_libkperf/Pmu.py | 10 ++++++++++ python/modules/kperf/pmu.py | 9 +++++++++ 2 files changed, 19 insertions(+) diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index ef6147c..1fa750c 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -1619,6 +1619,15 @@ def PmuRead(pd: int) -> PmuData: c_data_len = c_PmuRead(c_pd, ctypes.byref(c_data_pointer)) return PmuData(c_data_pointer, c_data_len) +def ResolvePmuDataSymbol(pmuData: ctypes.POINTER(CtypesPmuData)) -> int: + """ + int ResolvePmuDataSymbol(struct PmuData* pmuData); + """ + c_ResolvePmuDataSymbol = kperf_so.ResolvePmuDataSymbol + c_ResolvePmuDataSymbol.argtypes = [ctypes.POINTER(CtypesPmuData)] + c_ResolvePmuDataSymbol.restype = ctypes.c_int + + return c_ResolvePmuDataSymbol(pmuData) def PmuAppendData(fromData: ctypes.POINTER(CtypesPmuData), toData: ctypes.POINTER(ctypes.POINTER(CtypesPmuData))) -> int: @@ -2086,4 +2095,5 @@ __all__ = [ 'PmuReadCpuFreqDetail', 'PmuCloseCpuFreqSampling', 'PmuCpuFreqDetail', + 'ResolvePmuDataSymbol' ] diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 672a060..3d33b2e 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -390,6 +390,14 @@ def read(pd: int) -> PmuData: """ return _libkperf.PmuRead(pd) +def resolvePmuDataSymbol(pmuData: PmuData) -> PmuData: + """ + when kperf symbol mode is NO_SYMBOL_RESOLVE during PmuRead(), this function can be used to resolve stack symbols + :param: pmuData + :return: pmu data + """ + return _libkperf.ResolvePmuDataSymbol(pmuData.pointer()) + def stop(pd: int) -> None: """ @@ -595,4 +603,5 @@ __all__ = [ 'open_cpu_freq_sampling', 'close_cpu_freq_sampling', 'read_cpu_freq_detail', + 'resolvePmuDataSymbol' ] -- Gitee From c608b2c4b6a08f7084d5f1c187cedd11d0cb284e Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Fri, 30 May 2025 17:34:05 +0800 Subject: [PATCH 24/48] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=87=BD=E6=95=B0?= =?UTF-8?q?=E8=BF=94=E5=9B=9E=E5=80=BC=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/modules/kperf/pmu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 3d33b2e..2f0a2b5 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -390,7 +390,7 @@ def read(pd: int) -> PmuData: """ return _libkperf.PmuRead(pd) -def resolvePmuDataSymbol(pmuData: PmuData) -> PmuData: +def resolvePmuDataSymbol(pmuData: PmuData) -> int: """ when kperf symbol mode is NO_SYMBOL_RESOLVE during PmuRead(), this function can be used to resolve stack symbols :param: pmuData -- Gitee From a30dd56745f9cdf7cb3e78abebea09ec96e53fe6 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 30 May 2025 17:18:32 +0800 Subject: [PATCH 25/48] =?UTF-8?q?=E4=B8=8D=E7=9B=B4=E6=8E=A5=E6=9A=B4?= =?UTF-8?q?=E9=9C=B2symbol=E6=8E=A5=E5=8F=A3=E4=BA=A7=E7=94=9F=E7=9A=84?= =?UTF-8?q?=E5=BC=82=E5=B8=B8,=20=E8=80=8C=E4=BD=9C=E4=B8=BAwarning?= =?UTF-8?q?=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/pmu_list.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 2e6c4c3..d0a47c4 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -646,6 +646,12 @@ namespace KUNPENG_PMU { pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); } } + //Exceptions generated by the symbol interface are not directly exposed and are processed as warnings. + int err = Perrorno(); + if (err < LIBPERF_ERR_NO_AVAIL_PD && err >= LIBSYM_ERR_BASE) { + pcerr::SetWarn(err, Perror()); + New(SUCCESS); + } } int PmuList::ResolvePmuDataSymbol(struct PmuData* iPmuData) -- Gitee From 08fb45c8af581685cab98c428654c800d149befa Mon Sep 17 00:00:00 2001 From: Galaxy Date: Wed, 4 Jun 2025 08:43:45 +0000 Subject: [PATCH 26/48] =?UTF-8?q?=E9=80=82=E9=85=8Done=20numa=20per=20sock?= =?UTF-8?q?et=E7=9A=84ddrc=E9=87=87=E9=9B=86=E5=8A=9F=E8=83=BD=20=E9=B2=B2?= =?UTF-8?q?=E9=B9=8F=E6=9E=B6=E6=9E=84=E4=B8=8B=EF=BC=8C=E5=AF=B9=E4=BA=8E?= =?UTF-8?q?one=20numa=20per=20socket=E7=9A=84=E5=9C=BA=E6=99=AF=EF=BC=8Cdd?= =?UTF-8?q?rc=20pmu=E8=AE=BE=E5=A4=87=E5=92=8Csocket=E7=9A=84=E6=98=A0?= =?UTF-8?q?=E5=B0=84=E5=85=B3=E7=B3=BB=E4=B8=8D=E5=8F=98=EF=BC=8C=E4=BD=86?= =?UTF-8?q?=E6=98=AF=E5=92=8Cnuma=E7=9A=84=E6=98=A0=E5=B0=84=E5=85=B3?= =?UTF-8?q?=E7=B3=BB=E5=8F=91=E7=94=9F=E4=BA=86=E5=8F=98=E5=8C=96=EF=BC=9A?= =?UTF-8?q?=20hisi=5Fsccl3=5FddrcX=20->=20socket=200=20numa=200=20hisi=5Fs?= =?UTF-8?q?ccl1=5FddrcX=20->=20socket=200=20numa=200=20hisi=5Fsccl11=5Fddr?= =?UTF-8?q?cX=20->=20socket=201=20numa=201=20hisi=5Fsccl9=5FddrcX=20->=20s?= =?UTF-8?q?ocket=201=20numa=201=20=E6=89=80=E4=BB=A5=E6=97=A0=E6=B3=95?= =?UTF-8?q?=E9=80=9A=E8=BF=87ddrcNumaId=E6=9D=A5=E5=88=A4=E6=96=ADpmu?= =?UTF-8?q?=E8=AE=BE=E5=A4=87=E5=92=8Cchannel=20id=E7=9A=84=E6=98=A0?= =?UTF-8?q?=E5=B0=84=E5=85=B3=E7=B3=BB=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 目前借助sccl id和ddrc id共同来决定channel id的映射: sccl 3 ddrc 0 -> socket 0 channel 0 sccl 3 ddrc 2 -> socket 0 channel 1 sccl 3 ddrc 3 -> socket 0 channel 2 sccl 3 ddrc 5 -> socket 0 channel 3 sccl 1 ddrc 0 -> socket 0 channel 4 sccl 1 ddrc 2 -> socket 0 channel 5 sccl 1 ddrc 3 -> socket 0 channel 6 sccl 1 ddrc 5 -> socket 0 channel 7 ... Signed-off-by: Galaxy --- pmu/pmu_metric.cpp | 60 ++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index d196c2f..66c70c4 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -38,6 +38,7 @@ using namespace std; using namespace pcerr; +using IdxMap = unordered_map>; static unsigned maxCpuNum = 0; static vector coreArray; @@ -1071,10 +1072,33 @@ namespace KUNPENG_PMU { return SUCCESS; } - static unordered_map> DDRC_CHANNEL_MAP = { - {CHIP_TYPE::HIPA, {0, 1, 2, 3}}, - {CHIP_TYPE::HIPB, {0, 2, 3, 5}} + static IdxMap DDRC_CHANNEL_MAP_HIPA = { + {1, {{0, 0}, {1, 1}, {2, 2}, {3, 3}}}, + {3, {{0, 4}, {1, 5}, {2, 6}, {3, 7}}}, + {5, {{0, 0}, {1, 1}, {2, 2}, {3, 3}}}, + {7, {{0, 4}, {1, 5}, {2, 6}, {3, 7}}}, }; + static IdxMap DDRC_CHANNEL_MAP_HIPB = { + {3, {{0, 0}, {2, 1}, {3, 2}, {5, 3}}}, + {1, {{0, 4}, {2, 5}, {3, 6}, {5, 7}}}, + {11, {{0, 0}, {2, 1}, {3, 2}, {5, 3}}}, + {9, {{0, 4}, {2, 5}, {3, 6}, {5, 7}}}, + }; + + static unordered_map DDRC_CHANNEL_MAP = { + {HIPA, DDRC_CHANNEL_MAP_HIPA}, + {HIPB, DDRC_CHANNEL_MAP_HIPB}, + }; + + static int ParseDDRIdx(const string &devName, const string prefix) + { + size_t ddrcPos = devName.find(prefix); + size_t channelIndex = ddrcPos + prefix.length(); + string ddrcIndexStr = devName.substr(channelIndex); + size_t separatorPos = ddrcIndexStr.find("_"); + int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); + return ddrcIndex; + } static bool getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) { @@ -1084,28 +1108,22 @@ namespace KUNPENG_PMU { return false; } // ddrc channel index. eg: hisi_sccl3_ddrc3_1 --> 3_1 - string ddrcStr = "ddrc"; - size_t ddrcPos = devName.find(ddrcStr); - size_t channelIndex = ddrcPos + ddrcStr.length(); - string ddrcIndexStr = devName.substr(channelIndex); - // find index in DDRC_CHANNEL_MAP. eg: 3_1 --> 3, corresponds to channel 2 in HIPB - size_t separatorPos = ddrcIndexStr.find("_"); - int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); + int ddrcIndex = ParseDDRIdx(devName, "ddrc"); + int scclIndex = ParseDDRIdx(devName, "sccl"); - unsigned channelAddNum = 0; - if((ddrNumaId & 1) == 1) { // channel id + 4 in sequence - channelAddNum = 4; - } CHIP_TYPE chipType = GetCpuType(); //get channel index if (DDRC_CHANNEL_MAP.find(chipType) == DDRC_CHANNEL_MAP.end()) { return false; } - auto ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; - auto it = find(ddrcChannelList.begin(), ddrcChannelList.end(), ddrcIndex); - if (it != ddrcChannelList.end()) { - size_t index = distance(ddrcChannelList.begin(), it); - channelId = index + channelAddNum; - return true; + + auto &ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; + auto ddrIdxMap = ddrcChannelList.find(scclIndex); + if (ddrIdxMap != ddrcChannelList.end()) { + auto channelIdx = ddrIdxMap->second.find(ddrcIndex); + if (channelIdx != ddrIdxMap->second.end()) { + channelId = channelIdx->second; + return true; + } } return false; } @@ -1136,7 +1154,7 @@ namespace KUNPENG_PMU { outData.mode = GetMetricMode(data.metric); outData.channelId = channelId; outData.ddrNumaId = data.ddrNumaId; - outData.socketId = data.ddrNumaId < 2 ? 0 : 1; // numa id 0-1 --> socket id 0; numa id 2-3 --> socket id 1 + outData.socketId = data.socketId; devDataByChannel[ddrDatakey] = outData; } else { findData->second.count += data.count; -- Gitee From 92867101fa639136ed891aaa9849843b8bf62dd6 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Tue, 3 Jun 2025 18:12:02 +0800 Subject: [PATCH 27/48] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=96=B0=E6=8E=A5?= =?UTF-8?q?=E5=8F=A3=E7=9A=84=E6=96=87=E6=A1=A3=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Go_API.md | 79 +++++++++++++++++++++++++++++++++++++++++++++- docs/Python_API.md | 51 ++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 1 deletion(-) diff --git a/docs/Go_API.md b/docs/Go_API.md index 7199142..fd0a346 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -438,7 +438,8 @@ func main() { ``` -### kperf.PmuGetCpuFreq +### kperf.PmuGetCpuFreq + func PmuGetCpuFreq(core uint) (int64, error) 查询当前系统指定core的实时CPU频率 * core cpu coreId @@ -457,4 +458,80 @@ func main() { } fmt.Printf("coreId %v freq is %v\n", coreId, freq) } +``` + +### kperf.PmuOpenCpuFreqSampling + +func PmuOpenCpuFreqSampling(period uint) (error) 开启cpu频率采集 + +### kperf.PmuCloseCpuFreqSampling + +func PmuCloseCpuFreqSampling() 关闭cpu频率采集 + +### kperf.PmuReadCpuFreqDetail + +func PmuReadCpuFreqDetail() ([]PmuCpuFreqDetail) 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 +```go +import "libkperf/kperf" +import "fmt" + +func main() { + err := kperf.PmuOpenCpuFreqSampling(100) + if err != nil { + fmt.Printf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v", err) + } + + freqList := kperf.PmuReadCpuFreqDetail() + for _, v := range freqList { + fmt.Printf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) + } + + kperf.PmuCloseCpuFreqSampling() +} +``` + +### kperf.ResolvePmuDataSymbol + +func ResolvePmuDataSymbol(dataVo PmuDataVo) error 当SymbolMode不设置或者设置为0时,可通过该接口解析PmuRead返回的PmuData数据中的符号 +```go +import "libkperf/kperf" +import "fmt" + +func main() { + attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} + fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) + if err != nil { + fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v", err) + return + } + + kperf.PmuEnable(fd) + time.Sleep(time.Second) + kperf.PmuDisable(fd) + + dataVo, err := kperf.PmuRead(fd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v", err) + return + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) != 0 { + fmt.Printf("expect symbol data is empty, but is not") + } + } + + parseErr := kperf.ResolvePmuDataSymbol(dataVo) + if parseErr != nil { + fmt.Printf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v", parseErr) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) == 0 { + fmt.Printf("expect symbol data is not empty, but is empty") + } + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(fd) +} ``` \ No newline at end of file diff --git a/docs/Python_API.md b/docs/Python_API.md index de323e9..a0a1968 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -428,4 +428,55 @@ kperf.get_numa_core(numaId: int): 查询指定numaId下对应的core列表 # python代码示例 numaId = 1 numa_cores = kperf.get_numa_core(numaId) +``` + +### kperf.open_cpu_freq_sampling + +def open_cpu_freq_sampling(period: int) 开启cpu频率采集 + +### kperf.close_cpu_freq_sampling + +def close_cpu_freq_sampling() 关闭cpu频率采集 + +### kperf.read_cpu_freq_detail + +def read_cpu_freq_detail() -> CpuFreqDetail 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 +```python +#python代码示例 +err = kperf.open_cpu_freq_sampling(100) +if err != 0: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +dataList = kperf.read_cpu_freq_detail() +for item in dataList.iter: + print(f"cpuId={item.cpuId} minFreq={item.minFreq} maxFreq={item.maxFreq} avgFreq={item.avgFreq}") + +kperf.close_cpu_freq_sampling() +``` + +### kperf.resolvePmuDataSymbol + +def resolvePmuDataSymbol(pmuData: PmuData) -> int: 当SymbolMode不设置或者设置为0时,可通过该接口解析read返回的PmuData数据中的符号 +```python +#python代码示例 +event_name = "cycles" +pmu_attr = kperf.PmuAttr( + evtList=[event_name], + sampleRate=1000, + callStack=True, + useFreq=True, +) +fd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) +if fd == -1: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +kperf.enable(fd) +time.sleep(1) +kperf.disable(fd) +pmu_data = kperf.read(fd) +err = kperf.resolvePmuDataSymbol(pmu_data) +if err != 0: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +kperf.close(fd) ``` \ No newline at end of file -- Gitee From 4757662cfb83564432f2a786371d1b7338481f28 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Thu, 5 Jun 2025 09:41:26 +0800 Subject: [PATCH 28/48] =?UTF-8?q?includeNewFork=3D1=E6=97=B6,=E6=AE=B5?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E5=BC=82=E5=B8=B8=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/dummy_event.cpp | 6 +++-- pmu/dummy_event.h | 2 +- pmu/evt_list.cpp | 56 ++++++++++++++++++++++++++++++++------------- pmu/pmu.cpp | 31 +------------------------ pmu/pmu_list.cpp | 2 +- 5 files changed, 47 insertions(+), 50 deletions(-) diff --git a/pmu/dummy_event.cpp b/pmu/dummy_event.cpp index 723a753..9137db8 100644 --- a/pmu/dummy_event.cpp +++ b/pmu/dummy_event.cpp @@ -76,6 +76,7 @@ namespace KUNPENG_PMU { if (forkPidQueue.empty()) { continue; } + std::lock_guard lg(dummyMutex); auto& pid = forkPidQueue.front(); for (const auto& evtList: evtLists) { auto groupId = evtList->GetGroupId(); @@ -83,7 +84,6 @@ namespace KUNPENG_PMU { DummyContext ctx = {evtList, static_cast(pid), evtGroupInfo.first, evtGroupInfo.second}; forkStrategy.DoHandler(ctx, evtGroupInfo.first, evtGroupInfo.second); } - std::lock_guard lg(dummyMutex); forkPidQueue.pop(); } }); @@ -146,7 +146,9 @@ namespace KUNPENG_PMU { if (header->type == PERF_RECORD_FORK) { auto sample = (KUNPENG_PMU::PerfRecordFork*) header; std::lock_guard lg(dummyMutex); - forkPidQueue.push(sample->tid); + if((uint8_t*)page + MAP_LEN > ringBuf + off + sizeof(KUNPENG_PMU::PerfRecordFork)) { + forkPidQueue.push(sample->tid); + } } if (header->type == PERF_RECORD_EXIT) { auto sample = (KUNPENG_PMU::PerfRecordFork*) header; diff --git a/pmu/dummy_event.h b/pmu/dummy_event.h index 0468677..dc25970 100644 --- a/pmu/dummy_event.h +++ b/pmu/dummy_event.h @@ -65,7 +65,7 @@ namespace KUNPENG_PMU { std::thread dummyThread; std::thread consumeThread; - std::atomic dummyFlag; + volatile std::atomic dummyFlag; std::vector>& evtLists; std::vector ppids; diff --git a/pmu/evt_list.cpp b/pmu/evt_list.cpp index bee7fa8..73ca53a 100644 --- a/pmu/evt_list.cpp +++ b/pmu/evt_list.cpp @@ -109,6 +109,11 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptr /proc/sys/kernel/perf_event_paranoid'"); + } + if (err == UNKNOWN_ERROR) { pcerr::SetCustomErr(err, std::string{strerror(errno)}); } @@ -176,6 +181,9 @@ void KUNPENG_PMU::EvtList::FillFields( int KUNPENG_PMU::EvtList::Read(vector& data, std::vector& sampleIps, std::vector& extPool, std::vector& switchData) { + + std::unique_lock lg(mutex); + for (unsigned int row = 0; row < numCpu; row++) { for (unsigned int col = 0; col < numPid; col++) { int err = this->xyCounterArray[row][col]->BeginRead(); @@ -245,13 +253,15 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons return; } std::unique_lock lock(mutex); + this->pidList.emplace_back(shared_ptr(topology, FreeProcTopo)); + bool hasInitErr = false; + std::map perfEvtMap; for (unsigned int row = 0; row < numCpu; row++) { - this->pidList.emplace_back(shared_ptr(topology, FreeProcTopo)); - procMap[pid] = this->pidList.back(); PerfEvtPtr perfEvt = this->MapPmuAttr(this->cpuList[row]->coreId, this->pidList.back()->tid, this->pmuEvt.get()); if (perfEvt == nullptr) { - return; + hasInitErr = true; + break; } perfEvt->SetSymbolMode(symMode); perfEvt->SetBranchSampleFilter(branchSampleFilter); @@ -263,22 +273,36 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons err = perfEvt->Init(groupEnable, -1, -1); } if (err != SUCCESS) { - return; + hasInitErr = true; + break; } - fdList.insert(perfEvt->GetFd()); + perfEvtMap.emplace(row, perfEvt); + } + + if (!hasInitErr) { + procMap[pid] = this->pidList.back(); numPid++; - this->xyCounterArray[row].emplace_back(perfEvt); - /** - * If the current status is enable, start, read, other existing perfEvt may have been enabled and is counting, - * so the new perfEvt must also be added to enable. If the current status is read, the status of all perfEvt - * may be disable. At this time No need to collect counts. - */ - if (evtStat == ENABLE || evtStat == START) { - perfEvt->Enable(); + for (unsigned int row = 0; row < numCpu; row++) { + auto perfEvt = perfEvtMap[row]; + fdList.insert(perfEvt->GetFd()); + this->xyCounterArray[row].emplace_back(perfEvt); + /** + * If the current status is enable, start, read, other existing perfEvt may have been enabled and is counting, + * so the new perfEvt must also be added to enable. If the current status is read, the status of all perfEvt + * may be disable. At this time No need to collect counts. + */ + if (evtStat == ENABLE || evtStat == START) { + perfEvt->Enable(); + } + if (evtStat == READ && prevStat != DISABLE) { + perfEvt->Enable(); + } } - if (evtStat == READ && prevStat != DISABLE) { - perfEvt->Enable(); + } else { + for (const auto& evtPtr : perfEvtMap) { + close(evtPtr.second->GetFd()); } + this->pidList.erase(this->pidList.end() - 1); } } @@ -302,7 +326,7 @@ void KUNPENG_PMU::EvtList::ClearExitFd() int pid = it->get()->GetPid(); if (exitPidVet.find(pid) != exitPidVet.end()) { int fd = it->get()->GetFd(); - this->fdList.erase(fd); + this->fdList.erase(this->fdList.find(fd)); close(fd); it = perfVet.erase(it); continue; diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index cba1713..4ffd5d7 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -427,6 +427,7 @@ static void PmuTaskAttrFree(PmuTaskAttr *taskAttr) int PmuOpen(enum PmuTaskType collectType, struct PmuAttr *attr) { SetWarn(SUCCESS); + New(SUCCESS); PmuAttr copiedAttr = *attr; pair previousEventList = {0, nullptr}; try { @@ -830,24 +831,6 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt } } -static bool PerfEventSupported(__u64 type, __u64 config) -{ - perf_event_attr attr{}; - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(struct perf_event_attr); - attr.type = type; - attr.config = config; - attr.disabled = 1; - attr.inherit = 1; - attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; - int fd = KUNPENG_PMU::PerfEventOpen(&attr, -1, 0, -1, 0); - if (fd < 0) { - return false; - } - close(fd); - return true; -} - static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int group_id) { unique_ptr taskParam(CreateNode(), PmuTaskAttrFree); @@ -879,18 +862,6 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att #endif return nullptr; } - - if (!PerfEventSupported(pmuEvt->type, pmuEvt->config)) { - int err = MapErrno(errno); - if (err == LIBPERF_ERR_NO_PERMISSION) { - New(LIBPERF_ERR_NO_PERMISSION, "Current user does not have the permission to collect the event.Swtich to the root user and run the 'echo -1 > /proc/sys/kernel/perf_event_paranoid'"); - } else if(err == UNKNOWN_ERROR) { - New(UNKNOWN_ERROR, std::string{strerror(errno)}); - } else { - New(err); - } - return nullptr; - } } /** * Assign cpus to collect diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index d0a47c4..d82e33e 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -444,6 +444,7 @@ namespace KUNPENG_PMU { void PmuList::Close(const int pd) { + EraseDummyEvent(pd); auto evtList = GetEvtList(pd); for (auto item: evtList) { item->Close(); @@ -455,7 +456,6 @@ namespace KUNPENG_PMU { EraseDataEvtGroupList(pd); RemoveEpollFd(pd); EraseSpeCpu(pd); - EraseDummyEvent(pd); EraseParentEventMap(pd); SymResolverDestroy(); PmuEventListFree(); -- Gitee From 8cfbd857846795a1f57889e9bc9011f8a6af4f23 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Wed, 11 Jun 2025 14:50:33 +0800 Subject: [PATCH 29/48] =?UTF-8?q?=E5=AE=8C=E5=96=84=E7=A4=BA=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 158 +++++++++++++++----------- README.md | 150 ++++++++++++++----------- docs/Details_Usage.md | 252 +++++++++++++++++++++++++++++++++++++++--- docs/Go_API.md | 16 +-- docs/Python_API.md | 16 ++- 5 files changed, 434 insertions(+), 158 deletions(-) diff --git a/README.en.md b/README.en.md index dbc30fd..7a344b5 100644 --- a/README.en.md +++ b/README.en.md @@ -126,82 +126,106 @@ Here are some examples: * Get pmu count for a process ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// Initialize event list and pid list in PmuAttr. -// There is one event in list, named 'cycles'. -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// Call PmuOpen and pmu descriptor is return. -// is an identity for current task. -int pd = PmuOpen(COUNTING, &attr); -// Start collection. -PmuEnable(pd); -// Collect for one second. -sleep(1); -// Stop collection. -PmuDisable(pd); -PmuData *data = NULL; -// Read pmu data. You can also read data before PmuDisable. -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // Initialize event list and pid list in PmuAttr. + // There is one event in list, named 'cycles'. + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // Call PmuOpen and pmu descriptor is return. + // is an identity for current task. + int pd = PmuOpen(COUNTING, &attr); + // Start collection. + PmuEnable(pd); + // Collect for one second. + sleep(1); + // Stop collection. + PmuDisable(pd); + PmuData *data = NULL; + // Read pmu data. You can also read data before PmuDisable. + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + PmuData *d = &data[i]; + std::cout << "evt=" << d->evt << "count=" << d->count << std::endl; + } + // To free PmuData, call PmuDataFree. + PmuDataFree(data); + // Like fd, call PmuClose if pd will not be used. + PmuClose(pd); } -// To free PmuData, call PmuDataFree. -PmuDataFree(data); -// Like fd, call PmuClose if pd will not be used. -PmuClose(pd); + ``` * Sample a process ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// Initialize event list and pid list in PmuAttr. -// There is one event in list, named 'cycles'. -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// Call PmuOpen and pmu descriptor is return. -// is an identity for current task. -// Use SAMPLING for sample task. -int pd = PmuOpen(SAMPLING, &attr); -// Start collection. -PmuEnable(pd); -// Collect for one second. -sleep(1); -// Stop collection. -PmuDisable(pd); -PmuData *data = NULL; -// Read pmu data. You can also read data before PmuDisable. -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - // Get an element from array. - PmuData *d = &data[i]; - // Get stack object which is a linked list. - Stack *stack = d->stack; - while (stack) { - // Get symbol object. - if (stack->symbol) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // Initialize event list and pid list in PmuAttr. + // There is one event in list, named 'cycles'. + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // Call PmuOpen and pmu descriptor is return. + // is an identity for current task. + // Use SAMPLING for sample task. + int pd = PmuOpen(SAMPLING, &attr); + // Start collection. + PmuEnable(pd); + // Collect for one second. + sleep(1); + // Stop collection. + PmuDisable(pd); + PmuData *data = NULL; + // Read pmu data. You can also read data before PmuDisable. + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + // Get an element from array. + PmuData *d = &data[i]; + // Get stack object which is a linked list. + Stack *stack = d->stack; + while (stack) { + // Get symbol object. + if (stack->symbol) { + Symbol *data = stack->symbol; + std::cout << std::hex << data->addr << " " << data->symbolName << "+0x" << data->offset << " " + << data->codeMapAddr << " (" << data->module << ")" + << " (" << std::dec << data->fileName << ":" << data->lineNum << ")" << std::endl; + } + stack = stack->next; } - stack = stack->next; } + // To free PmuData, call PmuDataFree. + PmuDataFree(data); + // Like fd, call PmuClose if pd will not be used. + PmuClose(pd); } -// To free PmuData, call PmuDataFree. -PmuDataFree(data); -// Like fd, call PmuClose if pd will not be used. -PmuClose(pd); + ``` * Python examples diff --git a/README.md b/README.md index b3e124a..b7e9406 100644 --- a/README.md +++ b/README.md @@ -107,77 +107,101 @@ Go API文档可以参考GO_API.md: - 获取进程的pmu计数 ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// 初始化事件列表,指定需要计数的事件cycles。 -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// 调用PmuOpen,返回pd。pd表示该任务的id。 -int pd = PmuOpen(COUNTING, &attr); -// 开始采集。 -PmuEnable(pd); -// 采集1秒。 -sleep(1); -// 停止采集。 -PmuDisable(pd); -PmuData *data = NULL; -// 读取PmuData,它是一个数组,长度是len。 -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - PmuData *d = &data[i]; - ... +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pidList[1]; + pidList[0] = getpid(); + char *evtList[1]; + evtList[0] = "cycles"; + // 初始化事件列表,指定需要计数的事件cycles。 + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + // 调用PmuOpen,返回pd。pd表示该任务的id。 + int pd = PmuOpen(COUNTING, &attr); + // 开始采集。 + PmuEnable(pd); + // 采集1秒。 + sleep(1); + // 停止采集。 + PmuDisable(pd); + PmuData *data = NULL; + // 读取PmuData,它是一个数组,长度是len。 + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + PmuData *d = &data[i]; + std::cout << "evt=" << d->evt << "count=" << d->count << std::endl; + } + // 释放PmuData。 + PmuDataFree(data); + // 类似fd,当任务结束时调用PmuClose释放资源。 + PmuClose(pd); } -// 释放PmuData。 -PmuDataFree(data); -// 类似fd,当任务结束时调用PmuClose释放资源。 -PmuClose(pd); ``` - 对进程进行采样 ```C++ -int pidList[1]; -pidList[0] = pid; -char *evtList[1]; -evtList[0] = "cycles"; -// 初始化事件列表,指定需要计数的事件cycles。 -PmuAttr attr = {0}; -attr.evtList = evtList; -attr.numEvt = 1; -attr.pidList = pidList; -attr.numPid = 1; -// 调用PmuOpen,返回pd。pd表示该任务的id。 -int pd = PmuOpen(SAMPLING, &attr); -// 开始采集。 -PmuEnable(pd); -// 采集1秒。 -sleep(1); -// 停止采集。 -PmuDisable(pd); -PmuData *data = NULL; -// 读取PmuData,它是一个数组,长度是len。 -int len = PmuRead(pd, &data); -for (int i = 0; i < len; ++i) { - // 获取数组的一个元素。 - PmuData *d = &data[i]; - // 获取调用栈对象,它是一个链表。 - Stack *stack = d->stack; - while (stack) { - // 获取符号对象。 - if (stack->symbol) { - ... +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +int main() { + int pid = getpid(); + int pidList[1]; + pidList[0] = pid; + char *evtList[1]; + evtList[0] = "cycles"; + // 初始化事件列表,指定需要计数的事件cycles。 + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 1; + attr.pidList = pidList; + attr.numPid = 1; + attr.symbolMode = RESOLVE_ELF_DWARF; + attr.callStack = 1; + attr.freq = 200; + attr.useFreq = 1; + // 调用PmuOpen,返回pd。pd表示该任务的id。 + int pd = PmuOpen(SAMPLING, &attr); + // 开始采集。 + PmuEnable(pd); + // 采集1秒。 + sleep(1); + // 停止采集。 + PmuDisable(pd); + PmuData *data = NULL; + // 读取PmuData,它是一个数组,长度是len。 + int len = PmuRead(pd, &data); + for (int i = 0; i < len; ++i) { + // 获取数组的一个元素。 + PmuData *d = &data[i]; + // 获取调用栈对象,它是一个链表。 + Stack *stack = d->stack; + while (stack) { + // 获取符号对象。 + if (stack->symbol) { + Symbol *data = stack->symbol; + std::cout << std::hex << data->addr << " " << data->symbolName << "+0x" << data->offset << " " + << data->codeMapAddr << " (" << data->module << ")" + << " (" << std::dec << data->fileName << ":" << data->lineNum << ")" << std::endl; + + } + stack = stack->next; } - stack = stack->next; } + // 释放PmuData。 + PmuDataFree(data); + // 类似fd,当任务结束时调用PmuClose释放资源。 + PmuClose(pd); } -// 释放PmuData。 -PmuDataFree(data); -// 类似fd,当任务结束时调用PmuClose释放资源。 -PmuClose(pd); ``` - Python 例子 diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index fe89cc6..4ee4eee 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -25,6 +25,7 @@ int pd = PmuOpen(COUNTING, &attr); # python代码示例 import time import kperf + evtList = ["cycles", "branch-misses"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) @@ -134,15 +135,38 @@ perf record -e cycles,branch-misses 设置PmuAttr的方式和Counting一样,在调用PmuOpen的时候,把任务类型设置为SAMPLING,并且设置采样频率: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +PmuAttr attr = {0}; +char* evtList[1] = {"cycles"}; attr.freq = 1000; // 采样频率是1000HZ attr.useFreq = 1; +attr.evtList = evtList; +attr.numEvt = 1; int pd = PmuOpen(SAMPLING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("cpu=%d pid=%d tid=%d period=%ld\n", data[i].cpu, data[i].pid, data[i].tid, data[i].period); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf +import ksym import time + evtList = ["branch-misses", "cycles"] pmu_attr = kperf.PmuAttr( evtList=evtList, @@ -150,6 +174,16 @@ pmu_attr = kperf.PmuAttr( symbolMode=kperf.SymbolMode.RESOLVE_ELF ) pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) +if pd == -1: + print(f"kperf pmuopen sample failed, expect err is nil, but is {kperf.error()}\n") +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) + +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"cpu {item.cpu} pid {item.pid} tid {item.tid} period {item.period}") +kperf.close(pd) ``` ```go @@ -165,6 +199,18 @@ func main() { fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v\n", err) return } + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + return + } + for _, o := range dataVo.GoData { + fmt.Printf("cpu=%d pid=%d tid=%d period=%v\n", o.Cpu, o.Pid, o.Tid, o.Period) + } + kperf.PmuClose(pd) } ``` @@ -197,23 +243,55 @@ perf record -e arm_spe_0/load_filter=1/ 对于libkperf,可以这样设置PmuAttr: ```c++ // c++代码示例 +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + PmuAttr attr = {0}; attr.period = 8192; // 采样周期是8192 attr.dataFilter = LOAD_FILTER; // 设置filter属性为load_filter + +int pd = PmuOpen(SPE_SAMPLING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + auto o = data[i]; + printf("spe base info comm=%s, pid=%d, tid=%d, coreId=%d, numaId=%d, sockedId=%d\n", o.comm, o.pid, o.tid, o.cpuTopo->coreId, o.cpuTopo->numaId, o.cpuTopo->socketId); + printf("spe ext info pa=%lu, va=%lu, event=%lu, latency=%lu\n", o.ext->pa, o.ext->va, o.ext->event, o.ext->lat); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf +import ksym +import time + pmu_attr = kperf.PmuAttr( - sampleRate = 1000, - symbolMode = kperf.SymbolMode.RESOLVE_ELF, - dataFilter = kperf.SpeFilter.SPE_DATA_ALL, - evFilter = kperf.SpeEventFilter.SPE_EVENT_RETIRED, - minLatency = 0x40 + sampleRate = 8192, + dataFilter = kperf.SpeFilter.LOAD_FILTER, ) # 需要root权限才能运行 pd = kperf.open(kperf.PmuTaskType.SPE_SAMPLING, pmu_attr) + +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) + +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"spe base info comm={item.comm}, pid={item.pid}, tid={item.tid}, coreId={item.cpuTopo.coreId}, numaId={item.cpuTopo.numaId}, sockedId={item.cpuTopo.socketId}") + print(f"spe ext info pa={item.ext.pa}, va={item.ext.va}, event={item.ext.event}, latency={item.ext.lat}\n") +kperf.close(pd) ``` ```go @@ -222,12 +300,28 @@ import "libkperf/kperf" import "time" func main() { - attr := kperf.PmuAttr{MinLatency:0x40, SymbolMode: kperf.ELF, SampleRate: 1000, DataFilter: kperf.SPE_DATA_ALL, EvFilter: kperf.SPE_EVENT_RETIRED} + attr := kperf.PmuAttr{SampleRate:8192, DataFilter: kperf.LOAD_FILTER} pd, err := kperf.PmuOpen(kperf.SPE, attr) if err != nil { fmt.Printf("kperf pmuopen spe failed, expect err is nil, but is %v\n", err) return } + + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + } + + for _, o := range dataVo.GoData { + fmt.Printf("spe base info comm=%v, pid=%v, tid=%v, coreId=%v, numaId=%v, sockedId=%v\n", o.Comm, o.Pid, o.Tid, o.CpuTopo.CoreId, o.CpuTopo.NumaId, o.CpuTopo.SocketId) + fmt.Printf("spe ext info pa=%v, va=%v, event=%v, latency=%v\n", o.SpeExt.Pa, o.SpeExt.Va, o.SpeExt.Event, o.SpeExt.Lat) + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(pd) } ``` @@ -311,13 +405,35 @@ PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = 1; int pd = PmuOpen(COUNTING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("evt=%s, count=%d\n", data[i].evt, data[i].count); +} +PmuClose(pd); ``` + ```python # python代码示例 import kperf +import time + evtList = ["hisi_sccl1_ddrc0/flux_rd/"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) +pmu_data = kperf.read(pd) +for item in pmu_data.iter: + print(f"evt={item.evt} count={item.count}") +kperf.close(pd) ``` ```go @@ -334,6 +450,18 @@ func main() { fmt.Printf("kperf pmuopen counting failed, expect err is nil, but is %v\n", err) return } + kperf.PmuEnable(pd) + time.Sleep(time.Second) + kperf.PmuDisable(pd) + dataVo, err := kperf.PmuRead(pd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + return + } + for _, o := range dataVo.GoData { + fmt.Printf("evt=%v count=%v \n", o.Evt, o.Count) + } + kperf.PmuClose(pd) } ``` @@ -350,7 +478,7 @@ evtList[0] = "hisi_sccl1_ddrc/flux_rd/"; evtList = ["hisi_sccl1_ddrc/flux_rd/"] ``` -```go +```goa // go代码示例 evtList := []string{"hisi_sccl1_ddrc/flux_rd/"} ``` @@ -380,16 +508,27 @@ libkperf支持tracepoint的采集,支持的tracepoint事件可以通过perf li 可以这样设置PmuAttr: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + char *evtList[1]; evtList[0] = "sched:sched_switch"; PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = 1; +attr.period = 1000; int pd = PmuOpen(SAMPLING, &attr); ``` ```python # python代码示例 +import kperf +import ksym +import time +from ctypes import * + evtList = ["sched:sched_switch"] pmu_attr = kperf.PmuAttr( evtList=evtList, @@ -404,7 +543,6 @@ pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) import "libkperf/kperf" import "fmt" - func main() { evtList := []string{"sched:sched_switch"} attr := kperf.PmuAttr{EvtList:evtList, SymbolMode:kperf.ELF, SampleRate: 1000} @@ -424,10 +562,24 @@ tracepoint能够获取每个事件特有的数据,比如sched:sched_switch包 libkperf提供了接口PmuGetField来获取tracepoint的数据。比如对于sched:sched_switch,可以这样调用: ```c++ // c++代码示例 -int prev_pid; -PmuGetField(pmuData->rawData, "prev_pid", &prev_pid, sizeof(prev_pid)); -char next_comm[16]; -PmuGetField(pmuData->rawData, "next_comm", &next_comm, sizeof(next_comm)); +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + auto pmuData = &data[i]; + int prev_pid; + PmuGetField(pmuData->rawData, "prev_pid", &prev_pid, sizeof(prev_pid)); + char next_comm[16]; + PmuGetField(pmuData->rawData, "next_comm", &next_comm, sizeof(next_comm)); + printf("next_comm=%s;prev_pid=%d\n", next_comm, prev_pid); +} ``` ```python @@ -480,10 +632,10 @@ func main() { var cArray [15]C.char nextErr := v.GetField("next_comm", unsafe.Pointer(&cArray)) if nextErr != nil { - fmt.Printf("get next_comm failed err is%v ",nextErr) + fmt.Printf("get next_comm failed err is%v\n",nextErr) } else { ptr := (*C.char)(unsafe.Pointer(&cArray[0])) - fmt.Printf("next_comm=%v;", C.GoString(ptr)) + fmt.Printf("next_comm=%v\n", C.GoString(ptr)) } prevPid := C.int(0) @@ -509,6 +661,11 @@ perf stat -e "{cycles,branch-loads,branch-load-misses,iTLB-loads}",inst_retired 比如,可以这样调用: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + unsigned numEvt = 5; char *evtList[numEvt] = {"cycles","branch-loads","branch-load-misses","iTLB-loads","inst_retired"}; // 前四个事件是一个分组 @@ -517,12 +674,27 @@ PmuAttr attr = {0}; attr.evtList = evtList; attr.numEvt = numEvt; attr.evtAttr = groupId; + +int pd = PmuOpen(COUNTING, &attr); +if ( pd == -1) { + printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); +} +PmuEnable(pd); +sleep(1); +PmuDisable(pd); +PmuData* data = nullptr; +int len = PmuRead(pd, &data); +for (int i = 0; i < len; i++) { + printf("evt=%s, count=%d evt=%d\n", data[i].evt, data[i].count, data[i].evt); +} +PmuClose(pd); ``` ```python # python代码示例 import kperf import time + evtList = ["cycles","branch-loads","branch-load-misses","iTLB-loads","inst_retired"] # 前四个事件是一个分组 evtAttrList = [1,1,1,1,-1] @@ -535,6 +707,7 @@ pmu_data = kperf.read(pd) pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) for data in pmu_data.iter: print(f"cpu {data.cpu} count {data.count} evt {data.evt}") +kperf.close(pd) ``` ```go @@ -605,6 +778,10 @@ pmu_attr = kperf.PmuAttr(evtList=evtList, includeNewFork=True) 参考代码: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + PmuDeviceAttr devAttr[2]; // DDR读带宽 devAttr[0].metric = PMU_DDR_READ_BW; @@ -627,10 +804,10 @@ for (int i = 0; i < len / 2; ++i) { // channelID表示数据对应的通道ID。 // count是距离上次采集的DDR总读/写包长,单位是Byte, // 需要除以时间间隔得到带宽(这里的时间间隔是1秒)。 - cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; + std::cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } for (int i = len / 2; i < len; ++i) { - cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; + std::cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } DevDataFree(devData); PmuDataFree(oriData); @@ -639,6 +816,9 @@ PmuDisable(pd); ```python # python代码示例 +import kperf +import time + dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_READ_BW), kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW) @@ -658,6 +838,10 @@ for data in dev_data.iter: ```go // go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_DDR_READ_BW}, kperf.PmuDeviceAttr{Metric: kperf.PMU_DDR_WRITE_BW}} fd, _ := kperf.PmuDeviceOpen(deviceAttrs) kperf.PmuEnable(fd) @@ -705,6 +889,10 @@ libkperf提供了采集L3 cache平均时延的能力,用于分析访存型应 参考代码: ```c++ +#include +#include "symbol.h" +#include "pmu.h" + // c++代码示例 PmuDeviceAttr devAttr[1]; // L3平均时延 @@ -721,7 +909,7 @@ auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 1, &devData); // devData的长度等于cluster个数 for (int i=0;i +#include "symbol.h" +#include "pmu.h" + PmuDeviceAttr devAttr[1]; // 采集PCIE设备RX的读带宽 devAttr[0].metric = PMU_PCIE_RX_MRD_BW; @@ -802,6 +1001,9 @@ PmuDisable(pd); ```python # python代码示例 +import kperf +import time + dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_PCIE_RX_MRD_BW, bdf="16:04.0") ] @@ -817,6 +1019,10 @@ for data in dev_data.iter: ```go // go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_PCIE_RX_MRD_BW, Bdf: "16:04.0"}} fd, _ := kperf.PmuDeviceOpen(deviceAttrs) kperf.PmuEnable(fd) @@ -848,6 +1054,10 @@ perf trace -e read,write 比如,可以这样调用: ```c++ // c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + unsigned numFunc = 2; const char *funs1 = "read"; const char *funs2 = "write"; @@ -862,7 +1072,7 @@ PmuTraceDisable(pd); PmuTraceData *data = nullptr; int len = PmuTraceRead(pd, &data); for(int i = 0; i < len; ++i) { - printf("funcName: %s, elspsedTime: %f ms pid: %d tid: %d cpu: %d comm: %s", data[i].funcs, data[i].elapsedTime, data[i].pid, data[i].tid, data[i].cpu, data[i].comm) + printf("funcName: %s, elapsedTime: %f ms pid: %d tid: %d cpu: %d comm: %s", data[i].funcs, data[i].elapsedTime, data[i].pid, data[i].tid, data[i].cpu, data[i].comm); } PmuTraceClose(pd); ``` @@ -871,6 +1081,7 @@ PmuTraceClose(pd); # python代码示例 import kperf import time + funcList = ["read","write"] pmu_trace_attr = kperf.PmuTraceAttr(funcs=funcList) pd = kperf.trace_open(kperf.PmuTraceType.TRACE_SYS_CALL, pmu_trace_attr) @@ -930,6 +1141,10 @@ funcName: write elapsedTime: 0.00118 ms pid: 997235 tid: 997235 cpu: 110 comm: t ### 采集BRBE数据 libkperf基于sampling的能力,增加了对branch sample stack数据的采集能力,用于获取CPU的跳转记录, 通过branchSampleFilter可指定获取不同类型的分支跳转记录。 ```c++ +#include +#include "symbol.h" +#include "pmu.h" + char* evtList[1] = {"cycles"}; int* cpuList = nullptr; PmuAttr attr = {0}; @@ -980,6 +1195,7 @@ ffff88f60aa0->ffff88f60618 1 ```python import time +import ksym import kperf evtList = ["cycles"] diff --git a/docs/Go_API.md b/docs/Go_API.md index fd0a346..30a5c3f 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -300,7 +300,7 @@ import "fmt" func main() { syscallList := kperf.PmuSysCallFuncList() if syscallList == nil { - fmt.Printf("sys call list is empty") + fmt.Printf("sys call list is empty\n") } else { for _, funcName := range syscallList { fmt.Printf("func name %v\n", funcName) @@ -478,12 +478,12 @@ import "fmt" func main() { err := kperf.PmuOpenCpuFreqSampling(100) if err != nil { - fmt.Printf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v", err) + fmt.Printf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v\n", err) } freqList := kperf.PmuReadCpuFreqDetail() for _, v := range freqList { - fmt.Printf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) + fmt.Printf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d\n", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) } kperf.PmuCloseCpuFreqSampling() @@ -501,7 +501,7 @@ func main() { attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) if err != nil { - fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v", err) + fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v\n", err) return } @@ -511,24 +511,24 @@ func main() { dataVo, err := kperf.PmuRead(fd) if err != nil { - fmt.Printf("kperf pmuread failed, expect err is nil, but is %v", err) + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) return } for _, o := range dataVo.GoData { if len(o.Symbols) != 0 { - fmt.Printf("expect symbol data is empty, but is not") + fmt.Printf("expect symbol data is empty, but is not\n") } } parseErr := kperf.ResolvePmuDataSymbol(dataVo) if parseErr != nil { - fmt.Printf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v", parseErr) + fmt.Printf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v\n", parseErr) } for _, o := range dataVo.GoData { if len(o.Symbols) == 0 { - fmt.Printf("expect symbol data is not empty, but is empty") + fmt.Printf("expect symbol data is not empty, but is empty\n") } } kperf.PmuDataFree(dataVo) diff --git a/docs/Python_API.md b/docs/Python_API.md index a0a1968..2ec007a 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -82,8 +82,10 @@ kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) ```python # python代码示例 -import time import kperf +import ksym +import time + evtList = ["cycles", "branch-misses"] pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) @@ -196,6 +198,7 @@ get_field(pmu_data: ImplPmuData, field_name: str, value: c_void_p) ```python import kperf +import ksym import time from ctypes import * @@ -272,8 +275,9 @@ kperf.trace_open(trace_type: kperf.PmuTraceType, pmu_trace_attr: kperf.PmuTraceA ```python # python代码示例 -import time import kperf +import time + funcs = ["read", "write"] pmu_trace_attr = kperf.PmuTraceAttr(funcs=funcs) pd = kperf.trace_open(kperf.PmuTraceType.TRACE_SYS_CALL, pmu_trace_attr) @@ -346,6 +350,8 @@ kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指 ```python # python代码示例 +import kperf +import time dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_L3_TRAFFIC) ] @@ -443,6 +449,9 @@ def close_cpu_freq_sampling() 关闭cpu频率采集 def read_cpu_freq_detail() -> CpuFreqDetail 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 ```python #python代码示例 +import kperf +import time + err = kperf.open_cpu_freq_sampling(100) if err != 0: print(f"error number: {kperf.errorno()} error message: {kperf.error()}") @@ -459,6 +468,9 @@ kperf.close_cpu_freq_sampling() def resolvePmuDataSymbol(pmuData: PmuData) -> int: 当SymbolMode不设置或者设置为0时,可通过该接口解析read返回的PmuData数据中的符号 ```python #python代码示例 +import kperf +import time + event_name = "cycles" pmu_attr = kperf.PmuAttr( evtList=[event_name], -- Gitee From 8e632572503d2bd12f66b1d5253a040644cebadf Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Thu, 12 Jun 2025 16:11:23 +0800 Subject: [PATCH 30/48] =?UTF-8?q?=E5=A2=9E=E5=8A=A0\n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Details_Usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 4ee4eee..68c5f2c 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -1072,7 +1072,7 @@ PmuTraceDisable(pd); PmuTraceData *data = nullptr; int len = PmuTraceRead(pd, &data); for(int i = 0; i < len; ++i) { - printf("funcName: %s, elapsedTime: %f ms pid: %d tid: %d cpu: %d comm: %s", data[i].funcs, data[i].elapsedTime, data[i].pid, data[i].tid, data[i].cpu, data[i].comm); + printf("funcName: %s, elapsedTime: %f ms pid: %d tid: %d cpu: %d comm: %s\n", data[i].funcs, data[i].elapsedTime, data[i].pid, data[i].tid, data[i].cpu, data[i].comm); } PmuTraceClose(pd); ``` -- Gitee From 4488e2d9a71ff720c10cefe57a47e321bb72fcd2 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 13 Jun 2025 14:18:33 +0800 Subject: [PATCH 31/48] =?UTF-8?q?=E8=A1=A5=E5=85=85go=E8=BF=90=E8=A1=8C?= =?UTF-8?q?=E5=89=8D=E7=BD=AE=E5=91=BD=E4=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 2 ++ README.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.en.md b/README.en.md index 7a344b5..2c472d0 100644 --- a/README.en.md +++ b/README.en.md @@ -314,6 +314,8 @@ python example.py You can directly go to the go/src/libkperf_test directory. ```shell +export GO111MODULE=off +export LD_LIBRARY_PATH=../libkperf/lib:$LD_LIBRARY_PATH go test -v # run all go test -v -test.run TestCount #specify the test case to run ``` diff --git a/README.md b/README.md index b7e9406..cfa0443 100644 --- a/README.md +++ b/README.md @@ -290,6 +290,8 @@ python example.py * **针对Go示例代码:** 可以直接跳转到 go/src/libkperf_test目录下 ```shell +export GO111MODULE=off +export LD_LIBRARY_PATH=../libkperf/lib:$LD_LIBRARY_PATH go test -v # 全部运行 go test -v -test.run TestCount #指定运行的用例 ``` \ No newline at end of file -- Gitee From 607d068dfa5b89ea070a1cb0ee99a10a3bc059c4 Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Fri, 6 Jun 2025 15:28:00 +0800 Subject: [PATCH 32/48] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E7=AC=A6=E5=8F=B7?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/pmu_list.cpp | 55 ++++++++++++++++++++++++++++++++++++------------ pmu/sampler.cpp | 9 ++++---- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index d82e33e..071b924 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -294,7 +294,31 @@ namespace KUNPENG_PMU { } } - void HandleBlockData(std::vector& pmuData, std::vector& switchData) + + void SortTwoVector(std::vector& pmuData, std::vector& sampleIps) + { + std::vector> combined; + combined.reserve(pmuData.size()); + for (size_t i = 0; i < pmuData.size(); ++i) { + combined.emplace_back(std::make_pair(std::move(pmuData[i]), std::move(sampleIps[i]))); + } + + std::sort(combined.begin(), combined.end(), + [](std::pair& a, std::pair& b) { + if (a.first.tid == b.first.tid) { + return a.first.ts < b.first.ts; + } + return a.first.tid < b.first.tid; + }); + + for (size_t i = 0; i < pmuData.size(); ++i) { + pmuData[i] = std::move(combined[i].first); + sampleIps[i] = std::move(combined[i].second); + } + } + + void HandleBlockData(std::vector& pmuData, std::vector& sampleIps, + SymbolMode symMode,std::vector& switchData) { std::sort(switchData.begin(), switchData.end(), [](const PmuSwitchData& a, const PmuSwitchData& b) { if (a.tid == b.tid) { @@ -305,7 +329,7 @@ namespace KUNPENG_PMU { std::unordered_map> tidToOffTimeStamps; int64_t outTime = 0; int prevTid = -1; - for (const auto& item: switchData) { + for (const auto& item : switchData) { if (item.swOut) { outTime = item.ts; prevTid = item.tid; @@ -323,19 +347,13 @@ namespace KUNPENG_PMU { } } } - - std::sort(pmuData.begin(), pmuData.end(), [](const PmuData& a, const PmuData& b) { - if (a.tid == b.tid) { - return a.ts < b.ts; - } - return a.tid < b.tid; - }); + SortTwoVector(pmuData, sampleIps); int csCnt = 0; int64_t prevTs = 0; int64_t currentTs = 0; int64_t curPeriod = 0; int currentTid = -1; - for (auto& item: pmuData) { + for (auto& item : pmuData) { if (currentTid != item.tid) { currentTid = item.tid; csCnt = 0; @@ -347,7 +365,9 @@ namespace KUNPENG_PMU { if (strcmp(item.evt, "context-switches") == 0) { // Convert stack from 'schedule[kernel] -> futex_wait[kernel] -> ...[kernel] -> lock_wait -> start_thread' // to 'lock_wait -> start_thread', only keeping user stack. - TrimKernelStack(item); + if (symMode != NO_SYMBOL_RESOLVE) { + TrimKernelStack(item); + } // Before the context-switches event, there is only one cycles event, which we need to ignore. if (currentTs == 0) { currentTs = item.ts; @@ -667,7 +687,6 @@ namespace KUNPENG_PMU { } auto& eventData = userDataList[iPmuData]; - auto symMode = symModeList[eventData.pd]; for (size_t i = 0; i < eventData.data.size(); ++i) { auto& pmuData = eventData.data[i]; auto& ipsData = eventData.sampleIps[i]; @@ -675,6 +694,15 @@ namespace KUNPENG_PMU { pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); } } + if (GetBlockedSampleState(eventData.pd) == 1) { + for (auto& item : eventData.data) { + if (strcmp(item.evt, "context-switches") == 0) { + // Convert stack from 'schedule[kernel] -> futex_wait[kernel] -> ...[kernel] -> lock_wait -> start_thread' + // to 'lock_wait -> start_thread', only keeping user stack. + TrimKernelStack(item); + } + } + } New(SUCCESS); return SUCCESS; } @@ -760,7 +788,8 @@ namespace KUNPENG_PMU { } else { FillStackInfo(evData); if (GetBlockedSampleState(pd) == 1) { - HandleBlockData(evData.data, evData.switchData); + auto symMode = symModeList[evData.pd]; + HandleBlockData(evData.data, evData.sampleIps, symMode, evData.switchData); } auto inserted = userDataList.emplace(pData, move(evData)); dataList.erase(pd); diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index cab1556..3255aac 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -217,6 +217,7 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( return; } KUNPENG_PMU::PerfRawSample *sample = (KUNPENG_PMU::PerfRawSample *)event->sample.array; + ips->ips.reserve(ips->ips.size() + sample->nr); // Copy ips from ring buffer and get stack info later. if (evt->callStack == 0) { int i = 0; @@ -224,12 +225,12 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( i++; } if (i < sample->nr) { - ips->ips.push_back(sample->ips[i]); + ips->ips.emplace_back(sample->ips[i]); } } else { for (int i = sample->nr - 1; i >= 0; --i) { if (IsValidIp(sample->ips[i])) { - ips->ips.push_back(sample->ips[i]); + ips->ips.emplace_back(sample->ips[i]); } } } @@ -264,7 +265,7 @@ void KUNPENG_PMU::PerfSampler::ReadRingBuffer(vector &data, vectormmap.tid, event->mmap.filename, event->mmap.addr); } else if (symMode == RESOLVE_ELF) { SymResolverUpdateModuleNoDwarf(event->mmap.tid, event->mmap.filename, event->mmap.addr); @@ -272,7 +273,7 @@ void KUNPENG_PMU::PerfSampler::ReadRingBuffer(vector &data, vectormmap2.tid, event->mmap2.filename, event->mmap2.addr); } else if (symMode == RESOLVE_ELF) { SymResolverUpdateModuleNoDwarf(event->mmap2.tid, event->mmap2.filename, event->mmap2.addr); -- Gitee From 230decd119f7a6b64f52540e6f2c68e629bfd4e9 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Mon, 16 Jun 2025 15:06:26 +0800 Subject: [PATCH 33/48] =?UTF-8?q?go=E6=94=AF=E6=8C=81=E9=9D=99=E6=80=81?= =?UTF-8?q?=E7=BC=96=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 5 +++++ README.md | 5 +++++ go/CMakeLists.txt | 7 ++++++- go/src/libkperf/kperf/kperf.go | 3 ++- go/src/libkperf/sym/sym.go | 3 ++- 5 files changed, 20 insertions(+), 3 deletions(-) diff --git a/README.en.md b/README.en.md index 2c472d0..7ea9efa 100644 --- a/README.en.md +++ b/README.en.md @@ -319,3 +319,8 @@ export LD_LIBRARY_PATH=../libkperf/lib:$LD_LIBRARY_PATH go test -v # run all go test -v -test.run TestCount #specify the test case to run ``` + +* **GO language static mode compilation:** +```shell +go build -tags="static" +``` \ No newline at end of file diff --git a/README.md b/README.md index cfa0443..20e9427 100644 --- a/README.md +++ b/README.md @@ -294,4 +294,9 @@ export GO111MODULE=off export LD_LIBRARY_PATH=../libkperf/lib:$LD_LIBRARY_PATH go test -v # 全部运行 go test -v -test.run TestCount #指定运行的用例 +``` + +* **GO静态模式编译:** +```shell +go build -tags="static" ``` \ No newline at end of file diff --git a/go/CMakeLists.txt b/go/CMakeLists.txt index 05d797e..053cd70 100644 --- a/go/CMakeLists.txt +++ b/go/CMakeLists.txt @@ -2,11 +2,16 @@ project(go) set(TARGET_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/src/libkperf/include) set(TARGET_LIB_DIR ${CMAKE_CURRENT_LIST_DIR}/src/libkperf/lib) +set(TARGET_STATIC_LIB_DIR ${CMAKE_CURRENT_LIST_DIR}/src/libkperf/static_lib) file(MAKE_DIRECTORY ${TARGET_INCLUDE_DIR}) file(MAKE_DIRECTORY ${TARGET_LIB_DIR}) +file(MAKE_DIRECTORY ${TARGET_STATIC_LIB_DIR}) file(GLOB HEAD_FILES "${CMAKE_CURRENT_LIST_DIR}/../include/*.h" "${CMAKE_CURRENT_LIST_DIR}/../symbol/symbol.h") +file(GLOB STATIC_ELF_FILES ${THIRD_PARTY}/local/elfin-parser/libelf++.a ${THIRD_PARTY}/local/elfin-parser/libdwarf++.a) install(FILES ${HEAD_FILES} DESTINATION ${TARGET_INCLUDE_DIR}) -install(TARGETS sym kperf DESTINATION ${TARGET_LIB_DIR}) \ No newline at end of file +install(TARGETS sym kperf DESTINATION ${TARGET_LIB_DIR}) +install(TARGETS sym_static kperf_static DESTINATION ${TARGET_STATIC_LIB_DIR}) +install(FILES ${STATIC_ELF_FILES} DESTINATION ${TARGET_STATIC_LIB_DIR}) \ No newline at end of file diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index b21f304..a27c112 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -17,7 +17,8 @@ package kperf /* #cgo CFLAGS: -I ../include -#cgo LDFLAGS: -L ../lib -lkperf -lsym +#cgo !static LDFLAGS: -L ../lib -lkperf -lsym +#cgo static LDFLAGS: -L ../static_lib -lkperf -lsym -lelf++ -ldwarf++ -lstdc++ -lnuma #include "pmu.h" #include "symbol.h" diff --git a/go/src/libkperf/sym/sym.go b/go/src/libkperf/sym/sym.go index 1d07879..0e83268 100644 --- a/go/src/libkperf/sym/sym.go +++ b/go/src/libkperf/sym/sym.go @@ -17,7 +17,8 @@ package sym /* #cgo CFLAGS: -I ../include -#cgo LDFLAGS: -L ../lib -lsym +#cgo !static LDFLAGS: -L ../lib -lsym +#cgo static LDFLAGS: -L ../static_lib -lsym -lstdc++ -lelf++ -ldwarf++ #include "symbol.h" #include "pcerrc.h" -- Gitee From b8dd682c16990df8da5fc1a2ec0e756572070fd8 Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Mon, 16 Jun 2025 19:13:19 +0800 Subject: [PATCH 34/48] =?UTF-8?q?PmuRead=E5=87=BD=E6=95=B0=E6=80=A7?= =?UTF-8?q?=E8=83=BD=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 8 ++++++++ pmu/pmu_list.cpp | 32 ++++++++++++++++++-------------- pmu/sample_process.cpp | 27 --------------------------- pmu/sample_process.h | 27 ++++++++++++++++++++++++++- pmu/sampler.cpp | 5 +++-- util/common.cpp | 6 ------ util/common.h | 7 ++++++- 7 files changed, 61 insertions(+), 51 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a96e848..f013847 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,6 +35,14 @@ endif() set(TOP_DIR ${PROJECT_SOURCE_DIR}) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funroll-loops -fno-plt") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -funroll-loops -fno-plt ") + +if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a") +endif() + message("TOP_DIR is ${TOP_DIR}") include(${CMAKE_CURRENT_LIST_DIR}/Common.cmake) add_subdirectory(symbol) diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 071b924..8ea83ac 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -15,6 +15,7 @@ ******************************************************************************/ #include #include +#include #include #include "linked_list.h" #include "cpu_map.h" @@ -297,24 +298,27 @@ namespace KUNPENG_PMU { void SortTwoVector(std::vector& pmuData, std::vector& sampleIps) { - std::vector> combined; - combined.reserve(pmuData.size()); - for (size_t i = 0; i < pmuData.size(); ++i) { - combined.emplace_back(std::make_pair(std::move(pmuData[i]), std::move(sampleIps[i]))); - } - - std::sort(combined.begin(), combined.end(), - [](std::pair& a, std::pair& b) { - if (a.first.tid == b.first.tid) { - return a.first.ts < b.first.ts; + std::vector indices(pmuData.size()); + std::iota(indices.begin(), indices.end(), 0); + std::stable_sort(indices.begin(), indices.end(), [&pmuData](size_t a, size_t b){ + if (pmuData[a].tid == pmuData[b].tid) { + return pmuData[a].ts < pmuData[b].ts; } - return a.first.tid < b.first.tid; + return pmuData[a].tid < pmuData[b].tid; }); - for (size_t i = 0; i < pmuData.size(); ++i) { - pmuData[i] = std::move(combined[i].first); - sampleIps[i] = std::move(combined[i].second); + std::vector sortedPmuData; + std::vector sortedSampleIps; + size_t size = pmuData.size(); + sortedPmuData.reserve(size); + sortedSampleIps.reserve(size); + + for (size_t i = 0; i < size; ++i) { + sortedPmuData.emplace_back(std::move(pmuData[indices[i]])); + sortedSampleIps.emplace_back(std::move(sampleIps[indices[i]])); } + pmuData = std::move(sortedPmuData); + sampleIps = std::move(sortedSampleIps); } void HandleBlockData(std::vector& pmuData, std::vector& sampleIps, diff --git a/pmu/sample_process.cpp b/pmu/sample_process.cpp index 778b622..5f30eb6 100644 --- a/pmu/sample_process.cpp +++ b/pmu/sample_process.cpp @@ -22,33 +22,6 @@ #define PAGE_SIZE (sysconf(_SC_PAGESIZE)) #define MB() asm volatile("dmb ish" ::: "memory") static constexpr int MAX_DATA_SIZE = 8192; -#ifdef IS_X86 -#define PerfRingbufferSmpStoreRelease(p, v) \ - ({ \ - union { \ - typeof(*p) val; \ - char charHead[1]; \ - } pointerUnion = {.val = (v)}; \ - asm volatile("mov %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ - }) -#else -#define PerfRingbufferSmpStoreRelease(p, v) \ - ({ \ - union { \ - typeof(*p) val; \ - char charHead[1]; \ - } pointerUnion = {.val = (v)}; \ - asm volatile("stlr %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ - }) -#endif - -void KUNPENG_PMU::PerfMmapConsume(PerfMmap &map) -{ - - __u64 prev = map.prev; - struct perf_event_mmap_page *base = (struct perf_event_mmap_page *)map.base; - PerfRingbufferSmpStoreRelease(&base->data_tail, prev); -} void KUNPENG_PMU::PerfMmapReadDone(PerfMmap &map) { diff --git a/pmu/sample_process.h b/pmu/sample_process.h index 8ad9407..e309c87 100644 --- a/pmu/sample_process.h +++ b/pmu/sample_process.h @@ -18,12 +18,37 @@ #include #include "pmu_event.h" +#ifdef IS_X86 +#define PerfRingbufferSmpStoreRelease(p, v) \ + ({ \ + union { \ + typeof(*p) val; \ + char charHead[1]; \ + } pointerUnion = {.val = (v)}; \ + asm volatile("mov %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ + }) +#else +#define PerfRingbufferSmpStoreRelease(p, v) \ + ({ \ + union { \ + typeof(*p) val; \ + char charHead[1]; \ + } pointerUnion = {.val = (v)}; \ + asm volatile("stlr %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ + }) +#endif + namespace KUNPENG_PMU { int MmapInit(PerfMmap& sampleMmap); union PerfEvent* ReadEvent(PerfMmap& map); int RingbufferReadInit(PerfMmap& map); - void PerfMmapConsume(PerfMmap& map); + inline void PerfMmapConsume(PerfMmap& map) + { + __u64 prev = map.prev; + struct perf_event_mmap_page *base = (struct perf_event_mmap_page *)map.base; + PerfRingbufferSmpStoreRelease(&base->data_tail, prev); + } void PerfMmapReadDone(PerfMmap& map); } // namespace KUNPENG_PMU diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index 3255aac..aa23978 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -229,8 +229,9 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( } } else { for (int i = sample->nr - 1; i >= 0; --i) { - if (IsValidIp(sample->ips[i])) { - ips->ips.emplace_back(sample->ips[i]); + const auto& ip = sample->ips[i]; + if (IsValidIp(ip)) { + ips->ips.emplace_back(ip); } } } diff --git a/util/common.cpp b/util/common.cpp index 67c4a66..c157a5a 100644 --- a/util/common.cpp +++ b/util/common.cpp @@ -28,12 +28,6 @@ #include "pcerr.h" #include "common.h" -bool IsValidIp(unsigned long ip) { - return (ip != PERF_CONTEXT_HV && ip != PERF_CONTEXT_KERNEL && ip != PERF_CONTEXT_USER - && ip != PERF_CONTEXT_GUEST && ip != PERF_CONTEXT_GUEST_KERNEL - && ip != PERF_CONTEXT_GUEST_USER && ip != PERF_CONTEXT_MAX); -} - std::string GetRealPath(const std::string filePath) { char resolvedPath[PATH_MAX]; diff --git a/util/common.h b/util/common.h index 77b8a9b..caa52a3 100644 --- a/util/common.h +++ b/util/common.h @@ -31,7 +31,12 @@ const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/"; const std::string TRACE_DEBUG_EVENT_PATH = "/sys/kernel/debug/tracing/events/"; -bool IsValidIp(unsigned long ip); +inline bool IsValidIp(unsigned long ip) +{ + return (ip != PERF_CONTEXT_HV && ip != PERF_CONTEXT_KERNEL && ip != PERF_CONTEXT_USER + && ip != PERF_CONTEXT_GUEST && ip != PERF_CONTEXT_GUEST_KERNEL + && ip != PERF_CONTEXT_GUEST_USER && ip != PERF_CONTEXT_MAX); +} std::string GetRealPath(const std::string filePath); bool IsValidPath(const std::string& filePath); bool IsDirectory(const std::string& path); -- Gitee From 6028a7370d63f3ec5efc0c16676592c6932d9664 Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Tue, 17 Jun 2025 14:53:20 +0800 Subject: [PATCH 35/48] =?UTF-8?q?=E8=A7=A3=E5=86=B3sample=5Fprocess.h=20?= =?UTF-8?q?=E5=9C=A8x86=E4=B8=8A=E7=BC=96=E8=AF=91=E5=A4=B1=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/sample_process.h | 1 + 1 file changed, 1 insertion(+) diff --git a/pmu/sample_process.h b/pmu/sample_process.h index e309c87..f012335 100644 --- a/pmu/sample_process.h +++ b/pmu/sample_process.h @@ -17,6 +17,7 @@ #define PMU_SAMPLE_PROCESS_H #include #include "pmu_event.h" +#include "common.h" #ifdef IS_X86 #define PerfRingbufferSmpStoreRelease(p, v) \ -- Gitee From 9fef7b692b37a1d5137ae83ca00283506eef76f8 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Tue, 17 Jun 2025 17:09:59 +0800 Subject: [PATCH 36/48] fix PmuDumpData error --- python/modules/_libkperf/Pmu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index 1fa750c..d1d7ba9 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -1659,14 +1659,14 @@ def PmuDumpData(pmuData: PmuData, filepath: str, dumpDwf: int) -> None: int PmuDumpData(struct PmuData *pmuData, unsigned len, char *filepath, int dumpDwf); """ c_PmuDumpData = kperf_so.PmuDumpData - c_PmuDumpData.argtypes = [ctypes.POINTER(CtypesPmuData), ctypes.c_uint, ctypes.c_char_p, ctypes] + c_PmuDumpData.argtypes = [ctypes.POINTER(CtypesPmuData), ctypes.c_uint, ctypes.c_char_p, ctypes.c_int] c_PmuDumpData.restype = ctypes.c_int c_len = ctypes.c_uint(pmuData.len) c_filepath = ctypes.c_char_p(filepath.encode(UTF_8)) c_dumpDwf = ctypes.c_int(dumpDwf) - c_PmuDumpData(pmuData.pointer, c_len, c_filepath, c_dumpDwf) + c_PmuDumpData(pmuData.pointer(), c_len, c_filepath, c_dumpDwf) def PmuGetField(rawData: ctypes.POINTER(CtypesSampleRawData), field_name: str, value: ctypes.c_void_p, -- Gitee From cdd5ce6276bc9fdabab40b4f82d88384e5332172 Mon Sep 17 00:00:00 2001 From: glx Date: Tue, 17 Jun 2025 19:54:42 +0800 Subject: [PATCH 37/48] Add groupId in PmuData MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PmuData增加groupId字段,groupId与EvtAttr.groupId对应,表示PmuData属于哪个事件分组; 修正perf_counter的enable、disable和reset,只有group leader才能调用这些接口,leader调用后所有分组成员也会同时起作用。如果每个成员事件都调用这些接口,那么每个成员事件采集的时长会不一致。 --- docs/Details_Usage.md | 89 ++++++++++++++++++++++----------- go/src/libkperf/kperf/kperf.go | 2 + include/pmu.h | 3 +- pmu/evt_list.cpp | 1 + pmu/evt_list.h | 8 +-- pmu/perf_counter.cpp | 22 ++++++++ pmu/perf_counter.h | 3 ++ pmu/pmu.cpp | 14 +++--- pmu/pmu_list.cpp | 2 +- pmu/pmu_list.h | 2 +- python/modules/_libkperf/Pmu.py | 38 +++++++++----- 11 files changed, 128 insertions(+), 56 deletions(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 68c5f2c..4cb31e3 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -657,56 +657,60 @@ libkperf提供了事件分组的能力,能够让多个事件同时处于采集 perf stat -e "{cycles,branch-loads,branch-load-misses,iTLB-loads}",inst_retired ``` -对于libkperf,可以通过设置PmuAttr的evtAttr字段来设定哪些事件放在一个group内。 -比如,可以这样调用: +如果对多个相关联的事件采集,可以把关联的事件放到一个事件组。比如,计算bad speculation需要用到事件inst_retired,inst_spec和cycles,计算retiring需要用到事件inst_retired和cycles。那么perf应该这样使用: +``` +perf stat -e "{inst_retired,inst_spec,cycles}","{inst_spec,cycles}" +``` +用libkperf可以这样实现: ```c++ -// c++代码示例 -#include -#include "symbol.h" -#include "pmu.h" -#include "pcerrc.h" - -unsigned numEvt = 5; -char *evtList[numEvt] = {"cycles","branch-loads","branch-load-misses","iTLB-loads","inst_retired"}; -// 前四个事件是一个分组 -struct EvtAttr groupId[numEvt] = {1,1,1,1,-1}; +// 指定5个事件,因为inst_retired和cycles会重复出现在多个指标中,所以需要重复指定事件。 +char *evtList[5] = {"inst_retired", "inst_spec", "cycles", "inst_retired", "cycles"}; +// 指定事件分组编号,前三个事件为一组,后两个事件为一组。 +EvtAttr groupId[5] = {1,1,1,2,2}; PmuAttr attr = {0}; attr.evtList = evtList; -attr.numEvt = numEvt; +attr.numEvt = 5; attr.evtAttr = groupId; - int pd = PmuOpen(COUNTING, &attr); -if ( pd == -1) { - printf("kperf pmuopen counting failed, expect err is nil, but is %s\n", Perror()); -} PmuEnable(pd); sleep(1); PmuDisable(pd); -PmuData* data = nullptr; +PmuData *data = nullptr; int len = PmuRead(pd, &data); -for (int i = 0; i < len; i++) { - printf("evt=%s, count=%d evt=%d\n", data[i].evt, data[i].count, data[i].evt); +// 根据分组来聚合数据 +map> evtMap; +for (int i=0;ipmuEvt->name.c_str(); + data[i].groupId = this->groupId; if (data[i].comm == nullptr) { data[i].comm = procTopo->comm; } diff --git a/pmu/evt_list.h b/pmu/evt_list.h index 109dba5..6764d4d 100644 --- a/pmu/evt_list.h +++ b/pmu/evt_list.h @@ -54,8 +54,8 @@ public: using ProcPtr = std::shared_ptr; using CpuPtr = std::shared_ptr; EvtList(const SymbolMode &symbolMode, std::vector &cpuList, std::vector &pidList, - std::shared_ptr pmuEvt, const int group_id) - : symMode(symbolMode), cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt), group_id(group_id) + std::shared_ptr pmuEvt, const int groupId) + : symMode(symbolMode), cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt), groupId(groupId) { this->numCpu = this->cpuList.size(); this->numPid = this->pidList.size(); @@ -99,7 +99,7 @@ public: int GetGroupId() const { - return group_id; + return groupId; } int GetBlockedSample() const @@ -121,7 +121,7 @@ private: std::vector pidList; std::vector unUsedPidList; std::shared_ptr pmuEvt; - int group_id; // event group id + int groupId; // event group id std::vector>> xyCounterArray; std::shared_ptr MapPmuAttr(int cpu, int pid, PmuEvt* pmuEvent); unsigned int numCpu = 0; diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter.cpp index bf801b0..7a212fe 100644 --- a/pmu/perf_counter.cpp +++ b/pmu/perf_counter.cpp @@ -137,6 +137,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } } + this->groupFd = groupFd; DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", attr.type, cpu, attr.config, attr.config1, attr.config2, this->fd, groupFd); if (__glibc_unlikely(this->fd < 0)) { @@ -150,6 +151,11 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou */ int KUNPENG_PMU::PerfCounter::Enable() { + if (groupFd != -1) { + // Only group leader should use ioctl to enable, disable or reset, + // otherwise each event in the group will be collected for different durations. + return SUCCESS; + } int err = PerfEvt::Enable(); if (err != SUCCESS) { return err; @@ -159,3 +165,19 @@ int KUNPENG_PMU::PerfCounter::Enable() this->running = 0; return SUCCESS; } + +int KUNPENG_PMU::PerfCounter::Disable() +{ + if (groupFd != -1) { + return SUCCESS; + } + return PerfEvt::Disable(); +} + +int KUNPENG_PMU::PerfCounter::Reset() +{ + if (groupFd != -1) { + return SUCCESS; + } + return PerfEvt::Reset(); +} \ No newline at end of file diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h index 31280c7..585bf58 100644 --- a/pmu/perf_counter.h +++ b/pmu/perf_counter.h @@ -40,12 +40,15 @@ namespace KUNPENG_PMU { std::vector &extPool, std::vector &swtichData) override; int MapPerfAttr(const bool groupEnable, const int groupFd) override; int Enable() override; + int Disable() override; + int Reset() override; private: // Accumulated pmu count, time enabled and time running. __u64 count = 0; __u64 enabled = 0; __u64 running = 0; + int groupFd = 0; }; } // namespace KUNPENG_PMU #endif diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 4ffd5d7..30de9ef 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -270,12 +270,12 @@ static void CopyAttrData(PmuAttr* newAttr, PmuAttr* inputAttr, enum PmuTaskType newAttr->evtList = newEvtList; newAttr->numEvt = inputAttr->numEvt; - // If the event group ID is not enabled, set the group_id to -1. It indicates that the event is not grouped. + // If the event group ID is not enabled, set the groupId to -1. It indicates that the event is not grouped. if ((collectType == SAMPLING || collectType == COUNTING) && inputAttr->evtAttr == nullptr) { struct EvtAttr *evtAttr = new struct EvtAttr[newAttr->numEvt]; // handle event group id. -1 means that it doesn't run event group feature. for (int i = 0; i < newAttr->numEvt; ++i) { - evtAttr[i].group_id = -1; + evtAttr[i].groupId = -1; } newAttr->evtAttr = evtAttr; } @@ -289,13 +289,13 @@ static bool FreeEvtAttr(struct PmuAttr *attr) bool flag = false; int notGroupId = -1; for (int i = 0; i < attr->numEvt; ++i) { - if (attr->evtAttr[i].group_id != notGroupId ) { + if (attr->evtAttr[i].groupId != notGroupId ) { flag = true; break; } } - // when the values of group_id are all -1, the applied memory is released. + // when the values of groupId are all -1, the applied memory is released. if (!flag) { delete[] attr->evtAttr; attr->evtAttr = nullptr; @@ -831,7 +831,7 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt } } -static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int group_id) +static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int groupId) { unique_ptr taskParam(CreateNode(), PmuTaskAttrFree); /** @@ -868,7 +868,7 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att */ PrepareCpuList(attr, taskParam.get(), pmuEvt); - taskParam->group_id = group_id; + taskParam->groupId = groupId; taskParam->pmuEvt = shared_ptr(pmuEvt, PmuEvtFree); taskParam->pmuEvt->useFreq = attr->useFreq; @@ -890,7 +890,7 @@ struct PmuTaskAttr* AssignPmuTaskParam(enum PmuTaskType collectType, struct PmuA return taskParam; } for (int i = 0; i < attr->numEvt; i++) { - struct PmuTaskAttr* current = AssignTaskParam(collectType, attr, attr->evtList[i], attr->evtAttr[i].group_id); + struct PmuTaskAttr* current = AssignTaskParam(collectType, attr, attr->evtList[i], attr->evtAttr[i].groupId); if (current == nullptr) { return nullptr; } diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 8ea83ac..96ea5df 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -150,7 +150,7 @@ namespace KUNPENG_PMU { } fdNum += CalRequireFd(cpuTopoList.size(), procTopoList.size(), taskParam->pmuEvt->collectType); std::shared_ptr evtList = - std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->group_id); + std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt, pmuTaskAttrHead->groupId); needBytesNum += PredictRequiredMemory(taskParam->pmuEvt->collectType, cpuTopoList.size(), procTopoList.size()); evtList->SetBranchSampleFilter(GetBranchSampleFilter(pd)); InsertEvtList(pd, evtList); diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h index 523e2ad..99539d2 100644 --- a/pmu/pmu_list.h +++ b/pmu/pmu_list.h @@ -34,7 +34,7 @@ struct PmuTaskAttr { // list length has to be as the same as numPid std::shared_ptr pmuEvt; // which pmu to be collected - int group_id; // event group id + int groupId; // event group id struct PmuTaskAttr* next; // next task attribute }; diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index d1d7ba9..ce4d09c 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -28,32 +28,32 @@ class SampleRateUnion(ctypes.Union): class CtypesEvtAttr(ctypes.Structure): """ struct EvtAttr { - int group_id; + int groupId; }; """ - _fields_ = [('group_id', ctypes.c_int)] + _fields_ = [('groupId', ctypes.c_int)] - def __init__(self, group_id: int=0, *args: Any, **kw: Any) -> None: + def __init__(self, groupId: int=0, *args: Any, **kw: Any) -> None: super().__init__(*args, **kw) - self.group_id = ctypes.c_int(group_id) + self.groupId = ctypes.c_int(groupId) class EvtAttr: __slots__ = ['__c_evt_attr'] - def __init__(self, group_id: int=0) -> None: - self.__c_evt_attr = CtypesEvtAttr(group_id) + def __init__(self, groupId: int=0) -> None: + self.__c_evt_attr = CtypesEvtAttr(groupId) @property def c_evt_attr(self) -> CtypesEvtAttr: return self.__c_evt_attr @property - def group_id(self) -> int: - return int(self.c_evt_attr.group_id) + def groupId(self) -> int: + return int(self.c_evt_attr.groupId) - @group_id.setter - def group_id(self, group_id: int) -> None: - self.c_evt_attr.group_id = ctypes.c_int(group_id) + @groupId.setter + def groupId(self, groupId: int) -> None: + self.c_evt_attr.groupId = ctypes.c_int(groupId) @classmethod def from_c_evt_attr(cls, c_evt_attr: CtypesEvtAttr) -> 'EvtAttr': @@ -1115,7 +1115,8 @@ class CtypesPmuData(ctypes.Structure): int64_t ts; // time stamp. unit: ns pid_t pid; // process id int tid; // thread id - unsigned cpu; // cpu id + int cpu; // cpu id + int groupId; // id for group event struct CpuTopology *cpuTopo; // cpu topology const char *comm; // process command uint64_t period; // number of Samples @@ -1132,6 +1133,7 @@ class CtypesPmuData(ctypes.Structure): ('pid', ctypes.c_int), ('tid', ctypes.c_int), ('cpu', ctypes.c_int), + ('groupId', ctypes.c_int), ('cpuTopo', ctypes.POINTER(CtypesCpuTopology)), ('comm', ctypes.c_char_p), ('period', ctypes.c_uint64), @@ -1148,6 +1150,7 @@ class CtypesPmuData(ctypes.Structure): pid: int=0, tid: int=0, cpu: int=0, + groupId: int=0, cpuTopo: CtypesCpuTopology=None, comm: str='', period: int=0, @@ -1164,6 +1167,7 @@ class CtypesPmuData(ctypes.Structure): self.pid = ctypes.c_int(pid) self.tid = ctypes.c_int(tid) self.cpu = ctypes.c_int(cpu) + self.groupId = ctypes.c_int(groupId) self.cpuTopo = cpuTopo self.comm = ctypes.c_char_p(comm.encode(UTF_8)) self.period = ctypes.c_uint64(period) @@ -1183,6 +1187,7 @@ class ImplPmuData: pid: int=0, tid: int=0, cpu: int=0, + groupId: int=0, cpuTopo: CpuTopology=None, comm: str='', period: int=0, @@ -1197,6 +1202,7 @@ class ImplPmuData: pid=pid, tid=tid, cpu=cpu, + groupId=groupId, cpuTopo=cpuTopo.c_cpu_topo if cpuTopo else None, comm=comm, period=period, @@ -1258,6 +1264,14 @@ class ImplPmuData: def cpu(self, cpu: int) -> None: self.c_pmu_data.cpu = ctypes.c_int(cpu) + @property + def groupId(self) -> int: + return self.c_pmu_data.groupId + + @groupId.setter + def groupId(self, groupId: int) -> None: + self.c_pmu_data.groupId = ctypes.c_int(groupId) + @property def cpuTopo(self) -> CpuTopology: return CpuTopology.from_c_cpu_topo(self.c_pmu_data.cpuTopo.contents) if self.c_pmu_data.cpuTopo else None -- Gitee From cb501f6ecf39c7fa93b6ec3536cab02b42c2b243 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Wed, 18 Jun 2025 16:52:13 +0800 Subject: [PATCH 38/48] Add HHA uncore event --- docs/Details_Usage.md | 96 ++++++++++++++++++++++++++ docs/Go_API.md | 2 + docs/Python_API.md | 4 +- go/src/libkperf/kperf/kperf.go | 18 +++-- include/pmu.h | 16 +++-- pmu/pmu_metric.cpp | 121 +++++++++++++++++++++++++-------- python/modules/kperf/pmu.py | 18 +++-- python/tests/test_metric.py | 22 ++++++ test/test_perf/test_metric.cpp | 25 +++++++ 9 files changed, 276 insertions(+), 46 deletions(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 4cb31e3..7fc3ef5 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -1072,6 +1072,102 @@ kperf.PmuClose(fd) pcie bw(16:04.0): 124122412 Bytes/ns ``` +### 采集跨numa/跨socket访问HHA比例 +libkperf提供了采集跨numa/跨socket访问HHA的操作比例的能力,用于分析访存型应用的性能瓶颈,采集以numa为粒度。 + +参考代码: +```c++ +// c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + +PmuDeviceAttr devAttr[2]; +// 采集跨numa访问HHA的操作比例 +devAttr[0].metric = PMU_HHA_CROSS_NUMA; +// 采集跨socket访问HHA的操作比例 +devAttr[1].metric = PMU_HHA_CROSS_SOCKET; +// 初始化采集任务 +int pd = PmuDeviceOpen(devAttr, 2); +// 开始采集 +PmuEnable(pd); +sleep(1); +PmuData *oriData = nullptr; +int oriLen = PmuRead(pd, &oriData); +PmuDeviceData *devData = nullptr; +auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); +// devData的长度等于设备numa的个数 +for (int i = 0; i < len / 2; ++i) { + cout << "HHA cross-numa operations ratio (Numa: " << devData[i].numaId << "): " << devData[i].count<< "\n"; +} +for (int i = len / 2; i < len; ++i) { + cout << "HHA cross-socket operations ratio (Numa: " << devData[i].numaId << "): " << devData[i].count<< "\n"; +} +DevDataFree(devData); +PmuDataFree(oriData); +PmuDisable(pd); +``` + +```python +# python代码示例 +import kperf +import time + +dev_attr = [ + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET) +] +pd = kperf.device_open(dev_attr) +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) +ori_data = kperf.read(pd) +dev_data = kperf.get_device_metric(ori_data, dev_attr) +for data in dev_data.iter: + if data.metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA: + print(f"HHA cross-numa operations ratio (Numa: {data.numaId}): {data.count}") + if data.metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET: + print(f"HHA cross-socket operations ratio (Numa: {data.numaId}): {data.count}") +``` + +```go +// go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + +deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_HHA_CROSS_NUMA}, kperf.PmuDeviceAttr{Metric: kperf.PMU_HHA_CROSS_SOCKET}} +fd, _ := kperf.PmuDeviceOpen(deviceAttrs) +kperf.PmuEnable(fd) +time.Sleep(1 * time.Second) +kperf.PmuDisable(fd) +dataVo, _ := kperf.PmuRead(fd) +deivceDataVo, _ := kperf.PmuGetDevMetric(dataVo, deviceAttrs) +for _, v := range deivceDataVo.GoDeviceData { + if v.Metric == kperf.PMU_HHA_CROSS_NUMA { + fmt.Printf("HHA cross-numa operations ratio (Numa: %v): %v\n", v.NumaId, v.Count) + } + if v.Metric == kperf.PMU_HHA_CROSS_SOCKET { + fmt.Printf("HHA cross-socket operations ratio (Numa: %v): %v\n", v.NumaId, v.Count) + } +} +kperf.DevDataFree(deivceDataVo) +kperf.PmuDataFree(dataVo) +kperf.PmuClose(fd) +``` + +执行上述代码,输出的结果类似如下: +``` +HHA cross-numa operations ratio (Numa: 0): 0.438888 +HHA cross-numa operations ratio (Numa: 1): 0.0248052 +HHA cross-numa operations ratio (Numa: 2): 0.0277224 +HHA cross-numa operations ratio (Numa: 3): 0.181404 +HHA cross-socket operations ratio (Numa: 0): 0.999437 +HHA cross-socket operations ratio (Numa: 1): 0.0253748 +HHA cross-socket operations ratio (Numa: 2): 0.329864 +HHA cross-socket operations ratio (Numa: 3): 0.18956 +``` + ### 采集系统调用函数耗时信息 libkperf基于tracepoint事件采集能力,在原有能力的基础上,重新封装了一组相关的调用API,来提供采集系统调用函数耗时信息的能力,类似于perf trace命令 diff --git a/docs/Go_API.md b/docs/Go_API.md index 30a5c3f..33d7a7c 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -346,6 +346,8 @@ func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事 * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_SMMU_TRAN 采集指定smmu设备的地址转换次数,单位:count + * PMU_HHA_CROSS_NUMA 采集每个numa的跨numa访问HHA的操作比例 + * PMU_HHA_CROSS_SOCKET 采集每个numa的跨socket访问HHA的操作比例 * Bdf: 指定需要采集设备的bdf号,只对pcie和smmu指标有效 * 返回值是int和error,pd > 0表示初始化成功,pd == -1初始化失败,可通过kperf.error()查看错误信息,以下是一个kperf.device_open的示例 diff --git a/docs/Python_API.md b/docs/Python_API.md index 2ec007a..ffebcb2 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -339,12 +339,14 @@ kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指 * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count - * PMU_L3_LAT 采集每个numa的L3的总时延,单位:cycles + * PMU_L3_LAT 采集每个cluster的L3的总时延,单位:cycles * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/ns * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_SMMU_TRAN 采集指定smmu设备的地址转换次数,单位:count + * PMU_HHA_CROSS_NUMA 采集每个numa的跨numa访问HHA的操作比例 + * PMU_HHA_CROSS_SOCKET 采集每个numa的跨socket访问HHA的操作比例 * bdf: 指定需要采集设备的bdf号,只对pcie和smmu指标有效 * 返回值是int类型,pd > 0表示初始化成功,pd == -1初始化失败,可通过kperf.error()查看错误信息,以下是一个kperf.device_open的示例 diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 62fe88a..126edd9 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -246,12 +246,12 @@ var ( // PmuDeviceMetric var ( - // Pernuma metric. - // Collect ddr read bandwidth for each numa node. + // Perchannel metric. + // Collect ddr read bandwidth for each channel. // Unit: Bytes/s PMU_DDR_READ_BW C.enum_PmuDeviceMetric = C.PMU_DDR_READ_BW - // Pernuma metric. - // Collect ddr write bandwidth for each numa node. + // Perchannel metric. + // Collect ddr write bandwidth for each channel. // Unit: Bytes/s PMU_DDR_WRITE_BW C.enum_PmuDeviceMetric = C.PMU_DDR_WRITE_BW // Percore metric. @@ -266,8 +266,8 @@ var ( // Collect L3 total reference count, including miss and hit count. // Unit: count PMU_L3_REF C.enum_PmuDeviceMetric = C.PMU_L3_REF - // Pernuma metric. - // Collect L3 total latency for each numa node. + // Percluster metric. + // Collect L3 total latency for each cluster node. // Unit: cycles PMU_L3_LAT C.enum_PmuDeviceMetric = C.PMU_L3_LAT // Collect pcie rx bandwidth. @@ -285,6 +285,12 @@ var ( // Collect smmu address transaction. // Unit: count PMU_SMMU_TRAN C.enum_PmuDeviceMetric = C.PMU_SMMU_TRAN + // Pernuma metric. + // Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA C.enum_PmuDeviceMetric = C.PMU_HHA_CROSS_NUMA + // Pernuma metric. + // Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET C.enum_PmuDeviceMetric = C.PMU_HHA_CROSS_SOCKET ) // PmuBdfType diff --git a/include/pmu.h b/include/pmu.h index 3d10fd4..0873c0c 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -420,12 +420,12 @@ int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *valu struct SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char *fieldName); enum PmuDeviceMetric { - // Pernuma metric. - // Collect ddr read bandwidth for each numa node. + // Perchannel metric. + // Collect ddr read bandwidth for each channel. // Unit: Bytes PMU_DDR_READ_BW, - // Pernuma metric. - // Collect ddr write bandwidth for each numa node. + // Perchannel metric. + // Collect ddr write bandwidth for each channel. // Unit: Bytes PMU_DDR_WRITE_BW, // Percore metric. @@ -458,7 +458,13 @@ enum PmuDeviceMetric { // Perpcie metric. // Collect smmu address transaction. // Unit: count - PMU_SMMU_TRAN + PMU_SMMU_TRAN, + // Pernuma metric. + // Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA, + // Pernuma metric. + // Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET }; struct PmuDeviceAttr { diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 66c70c4..1fe04d7 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -84,11 +84,13 @@ namespace KUNPENG_PMU { {PmuDeviceMetric::PMU_PCIE_RX_MWR_BW, "PMU_PCIE_RX_MWR_BW"}, {PmuDeviceMetric::PMU_PCIE_TX_MRD_BW, "PMU_PCIE_TX_MRD_BW"}, {PmuDeviceMetric::PMU_PCIE_TX_MWR_BW, "PMU_PCIE_TX_MWR_BW"}, - {PmuDeviceMetric::PMU_SMMU_TRAN, "PMU_SMMU_TRAN"} + {PmuDeviceMetric::PMU_SMMU_TRAN, "PMU_SMMU_TRAN"}, + {PmuDeviceMetric::PMU_HHA_CROSS_NUMA, "PMU_HHA_CROSS_NUMA"}, + {PmuDeviceMetric::PMU_HHA_CROSS_SOCKET, "PMU_HHA_CROSS_SOCKET"}, }; set percoreMetric = {PMU_L3_TRAFFIC, PMU_L3_MISS, PMU_L3_REF}; - set pernumaMetric = {PMU_L3_LAT}; + set pernumaMetric = {PMU_HHA_CROSS_NUMA, PMU_HHA_CROSS_SOCKET}; set perClusterMetric = {PMU_L3_LAT}; set perChannelMetric = {PMU_DDR_READ_BW, PMU_DDR_WRITE_BW}; set perpcieMetric = {PMU_PCIE_RX_MRD_BW, @@ -269,6 +271,30 @@ namespace KUNPENG_PMU { 2 } }; + + PMU_METRIC_PAIR HHA_CROSS_NUMA = { + PmuDeviceMetric::PMU_HHA_CROSS_NUMA, + { + "hisi_sccl", + "hha", + {"0x0", "0x02"}, + "", + "", + 0 + } + }; + + PMU_METRIC_PAIR HHA_CROSS_SOCKET = { + PmuDeviceMetric::PMU_HHA_CROSS_SOCKET, + { + "hisi_sccl", + "hha", + {"0x0", "0x01"}, + "", + "", + 0 + } + }; } static const map HIP_A_UNCORE_METRIC_MAP { @@ -278,6 +304,8 @@ namespace KUNPENG_PMU { METRIC_CONFIG::L3_MISS, METRIC_CONFIG::L3_REF, METRIC_CONFIG::SMMU_TRAN, + METRIC_CONFIG::HHA_CROSS_NUMA, + METRIC_CONFIG::HHA_CROSS_SOCKET, }; static const map HIP_B_UNCORE_METRIC_MAP { @@ -292,6 +320,8 @@ namespace KUNPENG_PMU { METRIC_CONFIG::PCIE_TX_MRD_BW, METRIC_CONFIG::PCIE_TX_MWR_BW, METRIC_CONFIG::SMMU_TRAN, + METRIC_CONFIG::HHA_CROSS_NUMA, + METRIC_CONFIG::HHA_CROSS_SOCKET, }; const UNCORE_METRIC_MAP UNCORE_METRIC_CONFIG_MAP = { @@ -852,7 +882,7 @@ namespace KUNPENG_PMU { } // remove duplicate device attribute - static int RemoveDupDeviceAttr(struct PmuDeviceAttr *attr, unsigned len, std::vector& deviceAttr, bool l3ReDup) + static int RemoveDupDeviceAttr(struct PmuDeviceAttr *attr, unsigned len, std::vector& deviceAttr) { std::unordered_set uniqueSet; for (int i = 0; i < len; ++i) { @@ -864,17 +894,6 @@ namespace KUNPENG_PMU { } if (uniqueSet.find(key) == uniqueSet.end()) { - // when in deviceopen remove the same PMU_L3_TRAFFIC and PMU_L3_REF, - // but when getDevMetric we need to keep them. - if (l3ReDup == true && - (attr[i].metric == PmuDeviceMetric::PMU_L3_TRAFFIC || attr[i].metric == PmuDeviceMetric::PMU_L3_REF)) { - if (uniqueSet.find(std::to_string(PmuDeviceMetric::PMU_L3_TRAFFIC)) != uniqueSet.end()) { - continue; - } - if (uniqueSet.find(std::to_string(PmuDeviceMetric::PMU_L3_REF)) != uniqueSet.end()) { - continue; - } - } uniqueSet.insert(key); deviceAttr.emplace_back(attr[i]); } @@ -957,6 +976,9 @@ namespace KUNPENG_PMU { case PMU_PCIE_TX_MWR_BW: case PMU_SMMU_TRAN: return PMU_METRIC_BDF; + case PMU_HHA_CROSS_NUMA: + case PMU_HHA_CROSS_SOCKET: + return PMU_METRIC_NUMA; } return PMU_METRIC_INVALID; } @@ -977,25 +999,57 @@ namespace KUNPENG_PMU { int AggregateByNuma(const PmuDeviceMetric metric, const vector &rawData, vector &devData) { - map devDataByNuma; + const auto& deviceConfig = GetDeviceMtricConfig(); + const auto& findConfig = deviceConfig.find(metric); + if (findConfig == deviceConfig.end()) { + return SUCCESS; + } + auto &evts = findConfig->second.events; + if (evts.size() != 2) { + return SUCCESS; + } + // Event name for total access count. + string totalEvt = evts[0]; + // Event name for cross-numa/cross-socket count. + string crossEvt = evts[1]; + // Sort data by numa, and then sort by event string. + map> devDataByNuma; for (auto &data : rawData) { + string devName; + string evtName; + if (!GetDeviceName(data.evtName, devName, evtName)) { + continue; + } + auto evtConfig = ExtractEvtStr("config", evtName); auto findData = devDataByNuma.find(data.numaId); if (findData == devDataByNuma.end()) { - PmuDeviceData outData; - outData.metric = data.metric; - outData.count = data.count; - outData.mode = GetMetricMode(data.metric); - outData.numaId = data.numaId; - devDataByNuma[data.numaId] = outData; + devDataByNuma[data.numaId][evtConfig] = data; } else { - findData->second.count += data.count; + devDataByNuma[data.numaId][evtConfig].count += data.count; } } for (auto &data : devDataByNuma) { - devData.push_back(data.second); + // Get events of cross-numa/cross-socket access count and total access count. + auto findcrossData = data.second.find(crossEvt); + auto findtotalData = data.second.find(totalEvt); + if (findcrossData == data.second.end() || findtotalData == data.second.end()) { + continue; + } + // Compute ratio: cross access count / total access count + double ratio = 0.0; + if (findtotalData->second.count != 0) { + ratio = (double)(findcrossData->second.count) / findtotalData->second.count; + } else { + ratio = -1; + } + PmuDeviceData outData; + outData.metric = metric; + outData.count = ratio; + outData.mode = GetMetricMode(metric); + outData.numaId = data.first; + devData.push_back(outData); } - return SUCCESS; } @@ -1264,6 +1318,8 @@ namespace KUNPENG_PMU { {PMU_PCIE_TX_MRD_BW, PcieBWAggregate}, {PMU_PCIE_TX_MWR_BW, PcieBWAggregate}, {PMU_SMMU_TRAN, SmmuTransAggregate}, + {PMU_HHA_CROSS_NUMA, AggregateByNuma}, + {PMU_HHA_CROSS_SOCKET, AggregateByNuma}, }; static bool IsMetricEvent(const string &devName, const string &evtName, const PmuDeviceAttr &devAttr) @@ -1366,7 +1422,7 @@ namespace KUNPENG_PMU { if (perClusterMetric.find(devAttr.metric) != perClusterMetric.end()) { devData.clusterId = pmuData[i].cpuTopo->coreId / clusterWidth; } - if (perChannelMetric.find(devAttr.metric) != pernumaMetric.end()) { + if (perChannelMetric.find(devAttr.metric) != perChannelMetric.end()) { devData.ddrNumaId = pmuData[i].cpuTopo->numaId; devData.socketId = pmuData[i].cpuTopo->socketId; } @@ -1454,7 +1510,7 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) } // Remove duplicate device attributes. vector deviceAttr; - if (RemoveDupDeviceAttr(attr, len, deviceAttr, true) != SUCCESS) { + if (RemoveDupDeviceAttr(attr, len, deviceAttr) != SUCCESS) { return -1; } vector configEvtList; @@ -1466,8 +1522,17 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) configEvtList.insert(configEvtList.end(), temp.begin(), temp.end()); } - vector evts; + //remove the same event of PMU_L3_TRAFFIC and PMU_L3_REF, PMU_HHA_CROSS_NUMA and PMU_HHA_CROSS_SOCKET + unordered_set tmpEvents; + vector filteredEvtList; for (auto& evt : configEvtList) { + if (tmpEvents.find(evt) == tmpEvents.end()) { + tmpEvents.insert(evt); + filteredEvtList.push_back(evt); + } + } + vector evts; + for (auto& evt : filteredEvtList) { evts.push_back(const_cast(evt.c_str())); } @@ -1519,7 +1584,7 @@ int PmuGetDevMetric(struct PmuData *pmuData, unsigned len, } // Remove duplicate device attributes. vector deviceAttr; - if (RemoveDupDeviceAttr(attr, attrLen, deviceAttr, false) != SUCCESS) { + if (RemoveDupDeviceAttr(attr, attrLen, deviceAttr) != SUCCESS) { return -1; } // Filter pmuData by metric and generate InnerDeviceData, diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 2f0a2b5..213125d 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -107,12 +107,12 @@ class SymbolMode: RESOLVE_ELF_DWARF = 2 # Resolve elf and dwarf. All fields in Symbol will be valid. class PmuDeviceMetric: - # Pernuma metric. - # Collect ddr read bandwidth for each numa node. + # Perchannel metric. + # Collect ddr read bandwidth for each channel. # Unit: Bytes/s PMU_DDR_READ_BW = 0 - # Pernuma metric. - # Collect ddr write bandwidth for each numa node. + # Perchannel metric. + # Collect ddr write bandwidth for each channel. # Unit: Bytes/s PMU_DDR_WRITE_BW = 1 # Percore metric. @@ -127,8 +127,8 @@ class PmuDeviceMetric: # Collect L3 total reference count, including miss and hit count. # Unit: count PMU_L3_REF = 4 - # Pernuma metric. - # Collect L3 total latency for each numa node. + # Percluster metric. + # Collect L3 total latency for each cluster node. # Unit: cycles PMU_L3_LAT = 5 # Collect pcie rx bandwidth. @@ -146,6 +146,12 @@ class PmuDeviceMetric: # Collect smmu address transaction. # Unit: count PMU_SMMU_TRAN = 10 + # Pernuma metric. + # Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA = 11 + # Pernuma metric. + # Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET = 12 class PmuDeviceAttr(_libkperf.PmuDeviceAttr): """ diff --git a/python/tests/test_metric.py b/python/tests/test_metric.py index bf653ce..90c254b 100644 --- a/python/tests/test_metric.py +++ b/python/tests/test_metric.py @@ -256,6 +256,28 @@ def test_get_metric_smmu_transaction(): print_dev_data_details(dev_data) kperf.close(pd) +def test_collect_hha_cross(): + dev_attr = [ + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA) + ] + pd = kperf.device_open(dev_attr) + print(kperf.error()) + assert pd != -1, f"Expected non-negative pd, but got {pd}" + kperf.enable(pd) + time.sleep(1) + kperf.disable(pd) + ori_data = kperf.read(pd) + assert len(ori_data) != -1, f"Expected non-negative ori_len, but got {len(ori_data)}" + + dev_data = kperf.get_device_metric(ori_data, dev_attr) + assert dev_data[0].metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET + assert dev_data[0].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + assert dev_data[-1].metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA + assert dev_data[-1].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + print_dev_data_details(dev_data) + kperf.close(pd) + if __name__ == '__main__': # 提示用户使用pytest 运行测试文件 print("This is a pytest script. Run it using the 'pytest' command.") diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index d10ca39..68710cb 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -309,4 +309,29 @@ TEST_F(TestMetric, GetMetricSmmuTransaction) DevDataFree(devData); PmuDataFree(oriData); PmuClose(pd); +} + +TEST_F(TestMetric, GetMetricHHACross) +{ + PmuDeviceAttr devAttr[2] = {}; + devAttr[0].metric = PMU_HHA_CROSS_NUMA; + devAttr[1].metric = PMU_HHA_CROSS_SOCKET; + int pd = PmuDeviceOpen(devAttr, 2); + ASSERT_NE(pd, -1); + PmuEnable(pd); + sleep(1); + PmuDisable(pd); + PmuData* oriData = nullptr; + int oriLen = PmuRead(pd, &oriData); + ASSERT_NE(oriLen, -1); + + PmuDeviceData *devData = nullptr; + auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); + ASSERT_EQ(devData[0].metric, PMU_HHA_CROSS_NUMA); + ASSERT_EQ(devData[0].mode, PMU_METRIC_NUMA); + ASSERT_EQ(devData[len - 1].metric, PMU_HHA_CROSS_SOCKET); + ASSERT_EQ(devData[len - 1].mode, PMU_METRIC_NUMA); + DevDataFree(devData); + PmuDataFree(oriData); + PmuClose(pd); } \ No newline at end of file -- Gitee From a92bda188a714b255330c8ba003600d16b320d04 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Wed, 18 Jun 2025 17:17:28 +0800 Subject: [PATCH 39/48] =?UTF-8?q?fix=E5=8D=95=E8=AF=8D=E6=8B=BC=E5=86=99?= =?UTF-8?q?=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Python_API.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Python_API.md b/docs/Python_API.md index ffebcb2..df07e7c 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -392,8 +392,8 @@ kperf.get_device_metric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) 对 kperf.device_bdf_list(bdf_type: PmuBdfType): 查找当前系统pcie指标中有效的bdf列表和smmu指标中的有效bdf列表 -* calss PmuBdfType: - PMU_BDF_TYPE_PCIE: pice指标类型 +* class PmuBdfType: + PMU_BDF_TYPE_PCIE: pcie指标类型 PMU_BDF_TYPE_SMMU: smmu指标类型 * 返回数据iterator[str],可通过for循环遍历该单元 以下是kperf.device_bdf_list示例 -- Gitee From a103d62a1dc9ee424f17f2f6cd6f8460cfd8ccbd Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Thu, 19 Jun 2025 21:44:37 +0800 Subject: [PATCH 40/48] adapt python2.7 --- python/modules/_libkperf/Config.py | 6 +- python/modules/_libkperf/Perror.py | 8 +- python/modules/_libkperf/Pmu.py | 742 ++++++++++++++--------------- python/modules/_libkperf/Symbol.py | 282 +++++------ python/modules/kperf/perror.py | 8 +- python/modules/kperf/pmu.py | 96 ++-- python/modules/ksym/symbol.py | 44 +- 7 files changed, 592 insertions(+), 594 deletions(-) diff --git a/python/modules/_libkperf/Config.py b/python/modules/_libkperf/Config.py index aef5f19..19d545a 100644 --- a/python/modules/_libkperf/Config.py +++ b/python/modules/_libkperf/Config.py @@ -23,16 +23,16 @@ VERSION = '1.0' UTF_8 = 'utf-8' -def lib_path() -> str: +def lib_path(): return os.path.dirname(os.path.abspath(__file__)) -def libsym_path() -> str: +def libsym_path(): libsym = 'libsym.so' return os.path.join(lib_path(), libsym) -def libkperf_path() -> str: +def libkperf_path(): libkperf = 'libkperf.so' return os.path.join(lib_path(), libkperf) diff --git a/python/modules/_libkperf/Perror.py b/python/modules/_libkperf/Perror.py index b2e474d..ba65041 100644 --- a/python/modules/_libkperf/Perror.py +++ b/python/modules/_libkperf/Perror.py @@ -17,7 +17,7 @@ import ctypes from .Config import UTF_8, sym_so -def Perrorno() -> int: +def Perrorno(): """ int Perrorno(); """ @@ -28,7 +28,7 @@ def Perrorno() -> int: return c_Perrorno() -def Perror() -> str: +def Perror(): """ const char* Perror(); """ @@ -39,7 +39,7 @@ def Perror() -> str: return c_Perror().decode(UTF_8) -def GetWarn() -> int: +def GetWarn(): """ int GetWarn(); """ @@ -50,7 +50,7 @@ def GetWarn() -> int: return c_GetWarn() -def GetWarnMsg() -> str: +def GetWarnMsg(): """ const char* GetWarnMsg(); """ diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index ce4d09c..3025230 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -33,30 +33,30 @@ class CtypesEvtAttr(ctypes.Structure): """ _fields_ = [('groupId', ctypes.c_int)] - def __init__(self, groupId: int=0, *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + def __init__(self, groupId=0, *args, **kw): + super(CtypesEvtAttr, self).__init__(*args, **kw) self.groupId = ctypes.c_int(groupId) class EvtAttr: __slots__ = ['__c_evt_attr'] - def __init__(self, groupId: int=0) -> None: + def __init__(self, groupId=0): self.__c_evt_attr = CtypesEvtAttr(groupId) @property - def c_evt_attr(self) -> CtypesEvtAttr: + def c_evt_attr(self): return self.__c_evt_attr @property - def groupId(self) -> int: + def groupId(self): return int(self.c_evt_attr.groupId) @groupId.setter - def groupId(self, groupId: int) -> None: + def groupId(self, groupId): self.c_evt_attr.groupId = ctypes.c_int(groupId) @classmethod - def from_c_evt_attr(cls, c_evt_attr: CtypesEvtAttr) -> 'EvtAttr': + def from_c_evt_attr(cls, c_evt_attr): evt_attr = cls() evt_attr.__c_evt_attr = c_evt_attr return evt_attr @@ -115,24 +115,24 @@ class CtypesPmuAttr(ctypes.Structure): ] def __init__(self, - evtList: List[str]=None, - pidList: List[int]=None, - cpuList: List[int]=None, - evtAttr: List[int]=None, - sampleRate: int=0, - useFreq: bool=False, - excludeUser: bool=False, - excludeKernel: bool=False, - symbolMode: int=0, - callStack: bool=False, - blockedSample: bool=False, - dataFilter: int=0, - evFilter: int=0, - minLatency: int=0, - includeNewFork: bool=False, - branchSampleFilter: int=0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + evtList=None, + pidList=None, + cpuList=None, + evtAttr=None, + sampleRate=0, + useFreq=False, + excludeUser=False, + excludeKernel=False, + symbolMode=0, + callStack=False, + blockedSample=False, + dataFilter=0, + evFilter=0, + minLatency=0, + includeNewFork=False, + branchSampleFilter=0, + *args, **kw): + super(CtypesPmuAttr, self).__init__(*args, **kw) if evtList: numEvt = len(evtList) @@ -183,26 +183,26 @@ class CtypesPmuAttr(ctypes.Structure): self.includeNewFork = includeNewFork -class PmuAttr: +class PmuAttr(object): __slots__ = ['__c_pmu_attr'] def __init__(self, - evtList: List[str]=None, - pidList: List[int]=None, - cpuList: List[int]=None, - evtAttr: List[CtypesEvtAttr]=None, - sampleRate: int=0, - useFreq: bool=False, - excludeUser: bool=False, - excludeKernel: bool=False, - symbolMode: int=0, - callStack: bool=False, - blockedSample: bool=False, - dataFilter: int=0, - evFilter: int=0, - minLatency: int=0, - includeNewFork: bool=False, - branchSampleFilter: int=0) -> None: + evtList=None, + pidList=None, + cpuList=None, + evtAttr=None, + sampleRate=0, + useFreq=False, + excludeUser=False, + excludeKernel=False, + symbolMode=0, + callStack=False, + blockedSample=False, + dataFilter=0, + evFilter=0, + minLatency=0, + includeNewFork=False, + branchSampleFilter=0): self.__c_pmu_attr = CtypesPmuAttr( evtList=evtList, pidList=pidList, @@ -223,19 +223,19 @@ class PmuAttr: ) @property - def c_pmu_attr(self) -> CtypesPmuAttr: + def c_pmu_attr(self): return self.__c_pmu_attr @property - def numEvt(self) -> int: + def numEvt(self): return self.c_pmu_attr.numEvt @property - def evtList(self) -> List[str]: + def evtList(self): return [self.c_pmu_attr.evtList[i].decode(UTF_8) for i in range(self.numEvt)] @evtList.setter - def evtList(self, evtList: List[str]) -> None: + def evtList(self, evtList): if evtList: numEvt = len(evtList) self.c_pmu_attr.evtList = (ctypes.c_char_p * numEvt)(*[evt.encode(UTF_8) for evt in evtList]) @@ -245,15 +245,15 @@ class PmuAttr: self.c_pmu_attr.numEvt = ctypes.c_uint(0) @property - def numPid(self) -> int: + def numPid(self): return self.c_pmu_attr.numPid @property - def pidList(self) -> List[int]: + def pidList(self): return [self.c_pmu_attr.pidList[i] for i in range(self.numPid)] @pidList.setter - def pidList(self, pidList: List[int]) -> None: + def pidList(self, pidList): if pidList: numPid = len(pidList) self.c_pmu_attr.pidList = (ctypes.c_int * numPid)(*[pid for pid in pidList]) @@ -263,11 +263,11 @@ class PmuAttr: self.c_pmu_attr.numPid = ctypes.c_uint(0) @property - def evtAttr(self) -> List[CtypesEvtAttr]: + def evtAttr(self): return [self.c_pmu_attr.evtAttr[i] for i in range(len(self.c_pmu_attr.evtAttr))] @evtAttr.setter - def evtAttr(self, evtAttr: List[CtypesEvtAttr]) -> None: + def evtAttr(self, evtAttr): if evtAttr: numEvtAttr = len(evtAttr) self.c_pmu_attr.evtAttr = (CtypesEvtAttr * numEvtAttr)(*[CtypesEvtAttr(evt) for evt in evtAttr]) @@ -275,15 +275,15 @@ class PmuAttr: self.c_pmu_attr.evtAttr = None @property - def numCpu(self) -> int: + def numCpu(self): return self.c_pmu_attr.numCpu @property - def cpuList(self) -> List[int]: + def cpuList(self): return [self.c_pmu_attr.cpuList[i] for i in range(self.numCpu)] @cpuList.setter - def cpuList(self, cpuList: List[int]) -> None: + def cpuList(self, cpuList): if cpuList: numCpu = len(cpuList) self.c_pmu_attr.cpuList = (ctypes.c_int * numCpu)(*[cpu for cpu in cpuList]) @@ -293,109 +293,109 @@ class PmuAttr: self.c_pmu_attr.numCpu = ctypes.c_uint(0) @property - def sampleRate(self) -> int: + def sampleRate(self): if not self.useFreq: return self.c_pmu_attr.sampleRate.period else: return self.c_pmu_attr.sampleRate.freq @sampleRate.setter - def sampleRate(self, sampleRate: int) -> None: + def sampleRate(self, sampleRate): if not self.useFreq: self.c_pmu_attr.sampleRate.period = ctypes.c_uint(sampleRate) else: self.c_pmu_attr.sampleRate.freq = ctypes.c_uint(sampleRate) @property - def useFreq(self) -> bool: + def useFreq(self): return bool(self.c_pmu_attr.useFreq) @useFreq.setter - def useFreq(self, useFreq: bool) -> None: + def useFreq(self, useFreq): self.c_pmu_attr.useFreq = int(useFreq) @property - def excludeUser(self) -> bool: + def excludeUser(self): return bool(self.c_pmu_attr.excludeUser) @excludeUser.setter - def excludeUser(self, excludeUser: bool) -> None: + def excludeUser(self, excludeUser): self.c_pmu_attr.excludeUser = int(excludeUser) @property - def excludeKernel(self) -> bool: + def excludeKernel(self): return bool(self.c_pmu_attr.excludeKernel) @excludeKernel.setter - def excludeKernel(self, excludeKernel: bool) -> None: + def excludeKernel(self, excludeKernel): self.c_pmu_attr.excludeKernel = int(excludeKernel) @property - def symbolMode(self) -> int: + def symbolMode(self): return self.c_pmu_attr.symbolMode @symbolMode.setter - def symbolMode(self, symbolMode: int) -> None: + def symbolMode(self, symbolMode): self.c_pmu_attr.symbolMode = ctypes.c_uint(symbolMode) @property - def callStack(self) -> bool: + def callStack(self): return bool(self.c_pmu_attr.callStack) @callStack.setter - def callStack(self, callStack: bool) -> None: + def callStack(self, callStack): self.c_pmu_attr.callStack = int(callStack) @property - def blockedSample(self) -> bool: + def blockedSample(self): return bool(self.c_pmu_attr.blockedSample) @blockedSample.setter - def blockedSample(self, blockedSample: bool) -> None: + def blockedSample(self, blockedSample): self.c_pmu_attr.blockedSample = int(blockedSample) @property - def dataFilter(self) -> int: + def dataFilter(self): return self.c_pmu_attr.dataFilter @dataFilter.setter - def dataFilter(self, dataFilter: int) -> None: + def dataFilter(self, dataFilter): self.c_pmu_attr.dataFilter = ctypes.c_uint64(dataFilter) @property - def evFilter(self) -> int: + def evFilter(self): return self.c_pmu_attr.evFilter @evFilter.setter - def evFilter(self, evFilter: int) -> None: + def evFilter(self, evFilter): self.c_pmu_attr.evFilter = ctypes.c_uint(evFilter) @property - def minLatency(self) -> int: + def minLatency(self): return self.c_pmu_attr.minLatency @minLatency.setter - def minLatency(self, minLatency: int) -> None: + def minLatency(self, minLatency): self.c_pmu_attr.minLatency = ctypes.c_ulong(minLatency) @property - def includeNewFork(self) -> bool: + def includeNewFork(self): return bool(self.c_pmu_attr.includeNewFork) @includeNewFork.setter - def includeNewFork(self, includeNewFork: bool) -> None: + def includeNewFork(self, includeNewFork): self.c_pmu_attr.includeNewFork = int(includeNewFork) @property - def branchSampleFilter(self) -> int: + def branchSampleFilter(self): return self.c_pmu_attr.branchSampleFilter @branchSampleFilter.setter - def branchSampleFilter(self, branchSampleFilter: int) -> None: + def branchSampleFilter(self, branchSampleFilter): self.c_pmu_attr.branchSampleFilter = ctypes.c_ulong(branchSampleFilter) @classmethod - def from_c_pmu_data(cls, c_pmu_attr: CtypesPmuAttr) -> 'PmuAttr': + def from_c_pmu_data(cls, c_pmu_attr): pmu_attr = cls() pmu_attr.__c_pmu_attr = c_pmu_attr return pmu_attr @@ -413,10 +413,10 @@ class CtypesPmuDeviceAttr(ctypes.Structure): ] def __init__(self, - metric: int = 0, - bdf: str = None, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + metric=0, + bdf= None, + *args, **kw): + super(CtypesPmuDeviceAttr, self).__init__(*args, **kw) self.metric = ctypes.c_int(metric) if bdf: @@ -425,44 +425,44 @@ class CtypesPmuDeviceAttr(ctypes.Structure): self.bdf = None -class PmuDeviceAttr: +class PmuDeviceAttr(object): __slots__ = ['__c_pmu_device_attr'] def __init__(self, - metric: int = 0, - bdf: str = None) -> None: + metric=0, + bdf= None): self.__c_pmu_device_attr = CtypesPmuDeviceAttr( metric=metric, bdf=bdf ) @property - def c_pmu_device_attr(self) -> CtypesPmuDeviceAttr: + def c_pmu_device_attr(self): return self.__c_pmu_device_attr @property - def metric(self) -> int: + def metric(self): return self.c_pmu_device_attr.metric @metric.setter - def metric(self, metric: int) -> None: + def metric(self, metric): self.c_pmu_device_attr.metric = ctypes.c_int(metric) @property - def bdf(self) -> str: + def bdf(self): if self.c_pmu_device_attr.bdf: return self.c_pmu_device_attr.bdf.decode(UTF_8) return None @bdf.setter - def bdf(self, bdf: str) -> None: + def bdf(self, bdf): if bdf: self.c_pmu_device_attr.bdf = ctypes.c_char_p(bdf.encode(UTF_8)) else: self.c_pmu_device_attr.bdf = None @classmethod - def from_c_pmu_device_attr(cls, c_pmu_device_attr: CtypesPmuDeviceAttr) -> 'PmuDeviceAttr': + def from_c_pmu_device_attr(cls, c_pmu_device_attr): pmu_device_attr = cls() pmu_device_attr.__c_pmu_device_attr = c_pmu_device_attr return pmu_device_attr @@ -510,43 +510,43 @@ class CtypesPmuDeviceData(ctypes.Structure): ] @property - def coreId(self) -> int: + def coreId(self): if self.mode == 1: # PMU_METRIC_CORE return self._union.coreId return 0 @property - def numaId(self) -> int: + def numaId(self): if self.mode == 2: # PMU_METRIC_NUMA return self._union.numaId return 0 @property - def clusterId(self) -> int: + def clusterId(self): if self.mode == 3: # PMU_METRIC_CLUSTER return self._union.clusterId return 0 @property - def bdf(self) -> str: + def bdf(self): if self.mode == 4 and self._union.bdf: # PMU_METRIC_BDF return self._union.bdf.decode(UTF_8) return "" @property - def channelId(self) -> int: + def channelId(self): if self.mode == 5 and self._union._structure.channelId: # PMU_METRIC_CHANNEL return self._union._structure.channelId return 0 @property - def ddrNumaId(self) -> int: + def ddrNumaId(self): if self.mode == 5 and self._union._structure.ddrNumaId: # PMU_METRIC_CHANNEL return self._union._structure.ddrNumaId return 0 @property - def socketId(self) -> int: + def socketId(self): if self.mode == 5 and self._union._structure.socketId: # PMU_METRIC_CHANNEL return self._union._structure.socketId return 0 @@ -555,74 +555,74 @@ class ImplPmuDeviceData: __slots__ = ['__c_pmu_device_data'] def __init__(self, - metric: int = 0, - count: float = 0, - mode: int = 0) -> None: + metric=0, + count=0, + mode=0): self.__c_pmu_device_data = CtypesPmuDeviceData() self.__c_pmu_device_data.metric = ctypes.c_int(metric) self.__c_pmu_device_data.count = ctypes.c_double(count) self.__c_pmu_device_data.mode = ctypes.c_int(mode) @property - def c_pmu_device_data(self) -> CtypesPmuDeviceData: + def c_pmu_device_data(self): return self.__c_pmu_device_data @property - def metric(self) -> int: + def metric(self): return self.c_pmu_device_data.metric @property - def count(self) -> float: + def count(self): return self.c_pmu_device_data.count @property - def mode(self) -> int: + def mode(self): return self.c_pmu_device_data.mode @property - def coreId(self) -> int: + def coreId(self): if self.mode == 1: # PMU_METRIC_CORE return self.c_pmu_device_data._union.coreId return 0 @property - def numaId(self) -> int: + def numaId(self): if self.mode == 2: # PMU_METRIC_NUMA return self.c_pmu_device_data._union.numaId return 0 @property - def clusterId(self) -> int: + def clusterId(self): if self.mode == 3: # PMU_METRIC_CLUSTER return self.c_pmu_device_data._union.clusterId return 0 @property - def bdf(self) -> str: + def bdf(self): if self.mode == 4 and self.c_pmu_device_data._union.bdf: # PMU_METRIC_BDF return self.c_pmu_device_data._union.bdf.decode(UTF_8) return "" @property - def channelId(self) -> int: + def channelId(self): if self.mode == 5 and self.c_pmu_device_data._union._structure.channelId: # PMU_METRIC_CHANNEL return self.c_pmu_device_data._union._structure.channelId return 0 @property - def ddrNumaId(self) -> int: + def ddrNumaId(self): if self.mode == 5 and self.c_pmu_device_data._union._structure.ddrNumaId: # PMU_METRIC_CHANNEL return self.c_pmu_device_data._union._structure.ddrNumaId return 0 @property - def socketId(self) -> int: + def socketId(self): if self.mode == 5 and self.c_pmu_device_data._union._structure.socketId: # PMU_METRIC_CHANNEL return self.c_pmu_device_data._union._structure.socketId return 0 @classmethod - def from_c_pmu_device_data(cls, c_pmu_device_data: CtypesPmuDeviceData) -> 'ImplPmuDeviceData': + def from_c_pmu_device_data(cls, c_pmu_device_data): pmu_device_data = cls() pmu_device_data.__c_pmu_device_data = c_pmu_device_data return pmu_device_data @@ -631,15 +631,15 @@ class ImplPmuDeviceData: class PmuDeviceData: __slots__ = ['__pointer', '__iter', '__len'] - def __init__(self, pointer: ctypes.POINTER(CtypesPmuDeviceData) = None, len: int = 0) -> None: - self.__pointer = pointer - self.__len = len + def __init__(self, pointer = None, len=0): + self.__pointer =pointer + self.__len =len self.__iter = (ImplPmuDeviceData.from_c_pmu_device_data(self.__pointer[i]) for i in range(self.__len)) - def __del__(self) -> None: + def __del__(self): self.free() - def __len__(self) -> int: + def __len__(self): return self.__len def __getitem__(self, index): @@ -648,14 +648,14 @@ class PmuDeviceData: return ImplPmuDeviceData.from_c_pmu_device_data(self.__pointer[index]) @property - def len(self) -> int: + def len(self): return self.__len @property - def iter(self) -> Iterator[ImplPmuDeviceData]: + def iter(self): return self.__iter - def free(self) -> None: + def free(self): if self.__pointer is not None: DevDataFree(self.__pointer) self.__pointer = None @@ -684,11 +684,11 @@ class CtypesPmuTraceAttr(ctypes.Structure): ] def __init__(self, - funcs: List[str]=None, - pidList: List[int]=None, - cpuList: List[int]=None, - *args: Any, **kw:Any) -> None: - super().__init__(*args, **kw) + funcs=None, + pidList=None, + cpuList=None, + *args, **kw): + super(CtypesPmuTraceAttr, self).__init__(*args, **kw) if funcs: numFuncs = len(funcs) @@ -715,13 +715,13 @@ class CtypesPmuTraceAttr(ctypes.Structure): self.numCpu = ctypes.c_uint(0) -class PmuTraceAttr: +class PmuTraceAttr(object): __slots__ = ['__c_pmu_trace_attr'] def __init__(self, - funcs: List[str]=None, - pidList: List[int]=None, - cpuList: List[int]=None) -> None: + funcs=None, + pidList=None, + cpuList=None): self.__c_pmu_trace_attr = CtypesPmuTraceAttr( funcs=funcs, pidList=pidList, @@ -729,19 +729,19 @@ class PmuTraceAttr: ) @property - def c_pmu_trace_attr(self) -> CtypesPmuTraceAttr: + def c_pmu_trace_attr(self): return self.__c_pmu_trace_attr @property - def numFuncs(self) -> int: + def numFuncs(self): return self.c_pmu_trace_attr.numFuncs @property - def funcs(self) -> List[str]: + def funcs(self): return [self.c_pmu_trace_attr.funcs[i].decode(UTF_8) for i in range(self.numFuncs)] @funcs.setter - def funcs(self, funcs: List[str]) -> None: + def funcs(self, funcs): if funcs: numFuncs = len(funcs) self.c_pmu_trace_attr.funcs = (ctypes.c_char_p * numFuncs)(*[func.encode(UTF_8) for func in funcs]) @@ -751,15 +751,15 @@ class PmuTraceAttr: self.c_pmu_trace_attr.numFuncs = ctypes.c_uint(0) @property - def numPid(self) -> int: + def numPid(self): return self.c_pmu_trace_attr.numPid @property - def pidList(self) -> List[int]: + def pidList(self): return [self.c_pmu_trace_attr.pidList[i] for i in range(self.numPid)] @pidList.setter - def pidList(self, pidList: List[int]) -> None: + def pidList(self, pidList): if pidList: numPid = len(pidList) self.c_pmu_trace_attr.pidList = (ctypes.c_int * numPid)(*[pid for pid in pidList]) @@ -769,15 +769,15 @@ class PmuTraceAttr: self.c_pmu_trace_attr.numPid = ctypes.c_uint(0) @property - def numCpu(self) -> int: + def numCpu(self): return self.c_pmu_trace_attr.numCpu @property - def cpuList(self) -> List[int]: + def cpuList(self): return [self.c_pmu_trace_attr.cpuList[i] for i in range(self.numCpu)] @cpuList.setter - def cpuList(self, cpuList: List[int]) -> None: + def cpuList(self, cpuList): if cpuList: numCpu = len(cpuList) self.c_pmu_trace_attr.cpuList = (ctypes.c_int * numCpu)(*[cpu for cpu in cpuList]) @@ -802,11 +802,11 @@ class CtypesCpuTopology(ctypes.Structure): ] def __init__(self, - coreId: int=0, - numaId: int=0, - socketId: int=0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + coreId=0, + numaId=0, + socketId=0, + *args, **kw): + super(CtypesCpuTopology, self).__init__(*args, **kw) self.coreId = ctypes.c_int(coreId) self.numaId = ctypes.c_int(numaId) self.socketId = ctypes.c_int(socketId) @@ -816,9 +816,9 @@ class CpuTopology: __slots__ = ['__c_cpu_topo'] def __init__(self, - coreId: int=0, - numaId: int=0, - socketId: int=0) -> None: + coreId=0, + numaId=0, + socketId=0): self.__c_cpu_topo = CtypesCpuTopology( coreId=coreId, numaId=numaId, @@ -826,35 +826,35 @@ class CpuTopology: ) @property - def c_cpu_topo(self) -> CtypesCpuTopology: + def c_cpu_topo(self): return self.__c_cpu_topo @property - def coreId(self) -> int: + def coreId(self): return self.c_cpu_topo.coreId @coreId.setter - def coreId(self, coreId: int) -> None: + def coreId(self, coreId): self.c_cpu_topo.coreId = ctypes.c_int(coreId) @property - def numaId(self) -> int: + def numaId(self): return self.c_cpu_topo.numaId @numaId.setter - def numaId(self, numaId: int) -> None: + def numaId(self, numaId): self.c_cpu_topo.numaId = ctypes.c_int(numaId) @property - def socketId(self) -> int: + def socketId(self): return self.c_cpu_topo.socketId @socketId.setter - def socketId(self, socketId: int) -> None: + def socketId(self, socketId): self.c_cpu_topo.socketId = ctypes.c_int(socketId) @classmethod - def from_c_cpu_topo(cls, c_cpu_topo: CtypesCpuTopology) -> 'CpuTopology': + def from_c_cpu_topo(cls, c_cpu_topo): cpu_topo = cls() cpu_topo.__c_cpu_topo = c_cpu_topo return cpu_topo @@ -865,27 +865,27 @@ class CtypesSampleRawData(ctypes.Structure): ('data', ctypes.c_char_p) ] - def __init__(self, data: str='', *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + def __init__(self, data='', *args, **kw): + super(CtypesSampleRawData, self).__init__(*args, **kw) self.data = ctypes.c_char_p(data.encode(UTF_8)) class SampleRawData: __slots__ = ['__c_sample_rawdata'] - def __init__(self, data: str='') -> None: + def __init__(self, data=''): self.__c_sample_rawdata = CtypesSampleRawData(data) @property - def c_pmu_data_rawData(self) -> CtypesSampleRawData: + def c_pmu_data_rawData(self): return self.__c_sample_rawdata @property - def data(self) -> str: + def data(self): return self.__c_sample_rawdata.data.decode(UTF_8) @classmethod - def from_sample_raw_data(cls, c_sample_raw_data: CtypesSampleRawData) -> 'SampleRawData': + def from_sample_raw_data(cls, c_sample_raw_data): sample_raw_data = cls() sample_raw_data.__c_sample_rawdata = c_sample_raw_data return sample_raw_data @@ -910,9 +910,9 @@ class ImplBranchRecords(): __slots__ = ['__c_branch_record'] def __init__(self, - fromAddr: int=0, - toAddr: int=0, - cycles: int=0) -> None: + fromAddr=0, + toAddr=0, + cycles=0): self.__c_branch_record = CytpesBranchSampleRecord( fromAddr=fromAddr, toAddr=toAddr, @@ -920,23 +920,23 @@ class ImplBranchRecords(): ) @property - def c_branch_record(self) -> CytpesBranchSampleRecord: + def c_branch_record(self): return self.__c_branch_record @property - def fromAddr(self) -> int: + def fromAddr(self): return self.c_branch_record.fromAddr @property - def toAddr(self) -> int: + def toAddr(self): return self.c_branch_record.toAddr @property - def cycles(self) -> int: + def cycles(self): return self.c_branch_record.cycles @classmethod - def from_c_branch_record(cls, c_branch_record: CytpesBranchSampleRecord) -> 'ImplBranchRecords': + def from_c_branch_record(cls, c_branch_record): branch_record = cls() branch_record.__c_branch_record = c_branch_record return branch_record @@ -945,17 +945,17 @@ class ImplBranchRecords(): class BranchRecords(): __slots__ = ['__pointer', '__iter', '__len'] - def __init__(self, pointer: ctypes.POINTER(CytpesBranchSampleRecord) = None, nr: int=0) -> None: + def __init__(self, pointer = None, nr=0): self.__pointer = pointer self.__len = nr self.__iter = (ImplBranchRecords.from_c_branch_record(self.__pointer[i]) for i in range(self.__len)) @property - def len(self) -> int: + def len(self): return self.__len @property - def iter(self) -> Iterator[ImplBranchRecords]: + def iter(self): return self.__iter class CytpesSpeDataExt(ctypes.Structure): @@ -966,12 +966,12 @@ class CytpesSpeDataExt(ctypes.Structure): ('lat', ctypes.c_ushort), ] def __init__(self, - pa: int=0, - va: int=0, - event: int=0, - lat: int=0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + pa=0, + va=0, + event=0, + lat=0, + *args, **kw): + super(CytpesSpeDataExt, self).__init__(*args, **kw) self.pa = ctypes.c_ulong(pa) self.va = ctypes.c_ulong(va) self.event = ctypes.c_ulong(event) @@ -1000,33 +1000,33 @@ class PmuDataExt: __slots__ = ['__c_pmu_data_ext'] @property - def c_pmu_data_ext(self) -> CtypesPmuDataExt: + def c_pmu_data_ext(self): return self.__c_pmu_data_ext @property - def pa(self) -> int: + def pa(self): return self.c_pmu_data_ext.ext.speDataExt.pa @property - def va(self) -> int: + def va(self): return self.c_pmu_data_ext.ext.speDataExt.va @property - def event(self) -> int: + def event(self): return self.c_pmu_data_ext.ext.speDataExt.event @property - def lat(self) -> int: + def lat(self): return self.c_pmu_data_ext.ext.speDataExt.lat @property - def branchRecords(self) -> BranchRecords: + def branchRecords(self): if self.__c_pmu_data_ext.ext.branchRecords.branchRecords: return BranchRecords(self.__c_pmu_data_ext.ext.branchRecords.branchRecords, self.__c_pmu_data_ext.ext.branchRecords.nr) return None @classmethod - def from_pmu_data_ext(cls, c_pmu_data_ext: CtypesPmuDataExt) -> 'PmuDataExt': + def from_pmu_data_ext(cls, c_pmu_data_ext): pmu_data_ext = cls() pmu_data_ext.__c_pmu_data_ext = c_pmu_data_ext return pmu_data_ext @@ -1051,13 +1051,13 @@ class CtypesSampleRawField(ctypes.Structure): ] def __init__(self, - field_name: str='', - field_str: str='', - offset: int=0, - size: int=0, - is_signed: int=0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + field_name='', + field_str='', + offset=0, + size=0, + is_signed=0, + *args, **kw): + super(CtypesSampleRawField, self).__init__(*args, **kw) self.fieldName = ctypes.c_char_p(field_name.encode(UTF_8)) self.fieldStr = ctypes.c_char_p(field_str.encode(UTF_8)) self.offset = ctypes.c_uint(offset) @@ -1069,39 +1069,39 @@ class SampleRawField: __slots__ = ['__c_sample_raw_field'] def __init__(self, - field_name: str='', - field_str: str='', - offset: int=0, - size: int=0, - is_signed: int=0) -> None: + field_name='', + field_str='', + offset=0, + size=0, + is_signed=0): self.__c_sample_raw_field = CtypesSampleRawField(field_name, field_str, offset, size, is_signed) @property - def c_sample_raw_field(self) -> CtypesSampleRawField: + def c_sample_raw_field(self): return self.__c_sample_raw_field @property - def field_name(self) -> str: + def field_name(self): return self.__c_sample_raw_field.fieldName.decode(UTF_8) @property - def field_str(self) -> str: + def field_str(self): return self.__c_sample_raw_field.fieldStr.decode(UTF_8) @property - def size(self) -> int: + def size(self): return self.__c_sample_raw_field.size @property - def offset(self) -> int: + def offset(self): return self.__c_sample_raw_field.offset @property - def is_signed(self) -> bool: + def is_signed(self): return bool(self.__c_sample_raw_field.isSigned) @classmethod - def from_sample_raw_field(cls, __c_sample_raw_field: CtypesSampleRawField): + def from_sample_raw_field(cls, __c_sample_raw_field): sample_raw_data = cls() sample_raw_data.__c_sample_raw_field = __c_sample_raw_field return sample_raw_data @@ -1144,22 +1144,22 @@ class CtypesPmuData(ctypes.Structure): ] def __init__(self, - stack: CtypesStack=None, - evt: str='', - ts: int=0, - pid: int=0, - tid: int=0, - cpu: int=0, - groupId: int=0, - cpuTopo: CtypesCpuTopology=None, - comm: str='', - period: int=0, - count: int=0, - countPercent: float=0.0, - ext: CtypesPmuDataExt=None, - rawData: CtypesSampleRawData=None, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + stack=None, + evt='', + ts=0, + pid=0, + tid=0, + cpu=0, + groupId=0, + cpuTopo=None, + comm='', + period=0, + count=0, + countPercent=0.0, + ext=None, + rawData=None, + *args, **kw): + super(CtypesPmuData, self).__init__(*args, **kw) self.stack = stack self.evt = ctypes.c_char_p(evt.encode(UTF_8)) @@ -1181,20 +1181,20 @@ class ImplPmuData: __slots__ = ['__c_pmu_data'] def __init__(self, - stack: Stack=None, - evt: str='', - ts: int=0, - pid: int=0, - tid: int=0, - cpu: int=0, - groupId: int=0, - cpuTopo: CpuTopology=None, - comm: str='', - period: int=0, - count: int=0, - countPercent: float=0.0, - ext: PmuDataExt=None, - rawData: SampleRawData=None) -> None: + stack=None, + evt='', + ts=0, + pid=0, + tid=0, + cpu=0, + groupId=0, + cpuTopo=None, + comm='', + period=0, + count=0, + countPercent=0.0, + ext=None, + rawData=None): self.__c_pmu_data = CtypesPmuData( stack=stack.c_stack if stack else None, evt=evt, @@ -1213,119 +1213,119 @@ class ImplPmuData: ) @property - def c_pmu_data(self) -> CtypesPmuData: + def c_pmu_data(self): return self.__c_pmu_data @property - def stack(self) -> Stack: + def stack(self): return Stack.from_c_stack(self.c_pmu_data.stack.contents) if self.c_pmu_data.stack else None @stack.setter - def stack(self, stack: Stack) -> None: + def stack(self, stack): self.c_pmu_data.stack = stack.c_stack if stack else None @property - def evt(self) -> str: + def evt(self): return self.c_pmu_data.evt.decode(UTF_8) @evt.setter - def evt(self, evt: str) -> None: + def evt(self, evt): self.c_pmu_data.evt = ctypes.c_char_p(evt.encode(UTF_8)) @property - def ts(self) -> int: + def ts(self): return self.c_pmu_data.ts @ts.setter - def ts(self, ts: int) -> None: + def ts(self, ts): self.c_pmu_data.ts = ctypes.c_int64(ts) @property - def pid(self) -> int: + def pid(self): return self.c_pmu_data.pid @pid.setter - def pid(self, pid: int) -> None: + def pid(self, pid): self.c_pmu_data.pid = ctypes.c_int(pid) @property - def tid(self) -> int: + def tid(self): return self.c_pmu_data.tid @tid.setter - def tid(self, tid: int) -> None: + def tid(self, tid): self.c_pmu_data.tid = ctypes.c_int(tid) @property - def cpu(self) -> int: + def cpu(self): return self.c_pmu_data.cpu @cpu.setter - def cpu(self, cpu: int) -> None: + def cpu(self, cpu): self.c_pmu_data.cpu = ctypes.c_int(cpu) @property - def groupId(self) -> int: + def groupId(self): return self.c_pmu_data.groupId @groupId.setter - def groupId(self, groupId: int) -> None: + def groupId(self, groupId): self.c_pmu_data.groupId = ctypes.c_int(groupId) @property - def cpuTopo(self) -> CpuTopology: + def cpuTopo(self): return CpuTopology.from_c_cpu_topo(self.c_pmu_data.cpuTopo.contents) if self.c_pmu_data.cpuTopo else None @cpuTopo.setter - def cpuTopo(self, cpuTopo: CpuTopology) -> None: + def cpuTopo(self, cpuTopo): self.c_pmu_data.cpuTopo = cpuTopo.c_cpu_topo if cpuTopo else None @property - def comm(self) -> str: + def comm(self): return self.c_pmu_data.comm.decode(UTF_8) @comm.setter - def comm(self, comm: str) -> None: + def comm(self, comm): self.c_pmu_data.comm = ctypes.c_char_p(comm.encode(UTF_8)) @property - def period(self) -> int: + def period(self): return self.c_pmu_data.period @period.setter - def period(self, period: int) -> None: + def period(self, period): self.c_pmu_data.period = ctypes.c_uint64(period) @property - def count(self) -> int: + def count(self): return self.c_pmu_data.count @count.setter - def count(self, count: int) -> None: + def count(self, count): self.c_pmu_data.count = ctypes.c_uint64(count) @property - def countPercent(self) -> float: + def countPercent(self): return self.c_pmu_data.countPercent @countPercent.setter - def countPercent(self, countPercent: float) -> None: + def countPercent(self, countPercent): self.c_pmu_data.countPercent = ctypes.c_double(countPercent) @property - def ext(self) -> PmuDataExt: + def ext(self): return PmuDataExt.from_pmu_data_ext(self.c_pmu_data.ext.contents) if self.c_pmu_data.ext else None @property - def rawData(self) -> SampleRawData: + def rawData(self): return SampleRawData.from_sample_raw_data(self.c_pmu_data.rawData) if self.c_pmu_data.rawData else None @ext.setter - def ext(self, ext: PmuDataExt) -> None: + def ext(self, ext): self.c_pmu_data.ext = ext.c_pmu_data_ext if ext else None @classmethod - def from_c_pmu_data(cls, c_pmu_data: CtypesPmuData) -> 'ImplPmuData': + def from_c_pmu_data(cls, c_pmu_data): pmu_data = cls() pmu_data.__c_pmu_data = c_pmu_data return pmu_data @@ -1334,32 +1334,32 @@ class ImplPmuData: class PmuData: __slots__ = ['__pointer', '__iter', '__len'] - def __init__(self, pointer: ctypes.POINTER(CtypesPmuData) = None, len: int = 0) -> None: + def __init__(self, pointer = None, len=0): self.__pointer = pointer self.__len = len self.__iter = (ImplPmuData.from_c_pmu_data(self.__pointer[i]) for i in range(self.__len)) - def __del__(self) -> None: + def __del__(self): self.free() - def __len__(self) -> int: + def __len__(self): return self.__len def __iter__(self): return self.__iter - def pointer(self) -> ctypes.POINTER(CtypesPmuData): + def pointer(self): return self.__pointer @property - def len(self) -> int: + def len(self): return self.__len @property - def iter(self) -> Iterator[ImplPmuData]: + def iter(self): return self.__iter - def free(self) -> None: + def free(self): if self.__pointer is not None: PmuDataFree(self.__pointer) self.__pointer = None @@ -1387,15 +1387,15 @@ class CtypesPmuTraceData(ctypes.Structure): ] def __init__(self, - funcs: str = '', - startTs: int = 0, - elapsedTime: float = 0.0, - pid: int = 0, - tid: int = 0, - cpu: int = 0, - comm: str = '', - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + funcs= '', + startTs=0, + elapsedTime=0.0, + pid=0, + tid=0, + cpu=0, + comm= '', + *args, **kw): + super(CtypesPmuTraceData, self).__init__(*args, **kw) self.funcs = ctypes.c_char_p(funcs.encode(UTF_8)) self.startTs = ctypes.c_int64(startTs) @@ -1408,14 +1408,14 @@ class CtypesPmuTraceData(ctypes.Structure): class ImplPmuTraceData: __slots__ = ['__c_pmu_trace_data'] def __init__(self, - funcs: str = '', - startTs: int = 0, - elapsedTime: float = 0.0, - pid: int = 0, - tid: int = 0, - cpu: int = 0, - comm: str = '', - *args: Any, **kw: Any) -> None: + funcs= '', + startTs=0, + elapsedTime=0.0, + pid=0, + tid=0, + cpu=0, + comm= '', + *args, **kw): self.__c_pmu_trace_data = CtypesPmuTraceData( funcs=funcs, startTs=startTs, @@ -1427,67 +1427,67 @@ class ImplPmuTraceData: ) @property - def c_pmu_trace_data(self) -> CtypesPmuTraceData: + def c_pmu_trace_data(self): return self.__c_pmu_trace_data @property - def funcs(self) -> str: + def funcs(self): return self.__c_pmu_trace_data.funcs.decode(UTF_8) @funcs.setter - def funcs(self, funcs: str) -> None: + def funcs(self, funcs): self.__c_pmu_trace_data.funcs = ctypes.c_char_p(funcs.encode(UTF_8)) @property - def startTs(self) -> int: + def startTs(self): return self.__c_pmu_trace_data.startTs @startTs.setter - def startTs(self, startTs: int) -> None: + def startTs(self, startTs): self.__c_pmu_trace_data.startTs = ctypes.c_int64(startTs) @property - def elapsedTime(self) -> float: + def elapsedTime(self): return self.__c_pmu_trace_data.elapsedTime @elapsedTime.setter - def elapsedTime(self, elapsedTime: float) -> None: + def elapsedTime(self, elapsedTime): self.__c_pmu_trace_data.elapsedTime = ctypes.c_double(elapsedTime) @property - def pid(self) -> int: + def pid(self): return self.__c_pmu_trace_data.pid @pid.setter - def pid(self, pid: int) -> None: + def pid(self, pid): self.__c_pmu_trace_data.pid = ctypes.c_int(pid) @property - def tid(self) -> int: + def tid(self): return self.__c_pmu_trace_data.tid @tid.setter - def tid(self, tid: int) -> None: + def tid(self, tid): self.__c_pmu_trace_data.tid = ctypes.c_int(tid) @property - def cpu(self) -> int: + def cpu(self): return self.__c_pmu_trace_data.cpu @cpu.setter - def cpu(self, cpu: int) -> None: + def cpu(self, cpu): self.__c_pmu_trace_data.cpu = ctypes.c_int(cpu) @property - def comm(self) -> str: + def comm(self): return self.__c_pmu_trace_data.comm.decode(UTF_8) @comm.setter - def comm(self, comm: str) -> None: + def comm(self, comm): self.__c_pmu_trace_data.comm = ctypes.c_char_p(comm.encode(UTF_8)) @classmethod - def from_c_pmu_trace_data(cls, c_pmu_trace_data: CtypesPmuTraceData) -> 'ImplPmuTraceData': + def from_c_pmu_trace_data(cls, c_pmu_trace_data): pmu_trace_data = cls() pmu_trace_data.__c_pmu_trace_data = c_pmu_trace_data return pmu_trace_data @@ -1495,28 +1495,28 @@ class ImplPmuTraceData: class PmuTraceData: __slots__ = ['__pointer', '__iter', '__len'] - def __init__(self, pointer: ctypes.POINTER(CtypesPmuTraceData) = None, len: int = 0) -> None: + def __init__(self, pointer = None, len=0): self.__pointer = pointer self.__len = len self.__iter = (ImplPmuTraceData.from_c_pmu_trace_data(self.__pointer[i]) for i in range(self.__len)) - def __del__(self) -> None: + def __del__(self): self.free() @property - def len(self) -> int: + def len(self): return self.__len @property - def iter(self) -> Iterator[ImplPmuTraceData]: + def iter(self): return self.__iter - def free(self) -> None: + def free(self): if self.__pointer is not None: PmuTraceDataFree(self.__pointer) self.__pointer = None -def PmuOpen(collectType: int, pmuAttr: PmuAttr) -> int: +def PmuOpen(collectType, pmuAttr): """ int PmuOpen(enum PmuTaskType collectType, struct PmuAttr *attr); """ @@ -1529,7 +1529,7 @@ def PmuOpen(collectType: int, pmuAttr: PmuAttr) -> int: return c_PmuOpen(c_collectType, ctypes.byref(pmuAttr.c_pmu_attr)) -def PmuEventListFree() -> None: +def PmuEventListFree(): """ void PmuEventListFree(); """ @@ -1540,7 +1540,7 @@ def PmuEventListFree() -> None: c_PmuEventListFree() -def PmuEventList(eventType: int) -> Iterator[str]: +def PmuEventList(eventType): """ const char** PmuEventList(enum PmuEventType eventType, unsigned *numEvt); """ @@ -1555,7 +1555,7 @@ def PmuEventList(eventType: int) -> Iterator[str]: return (eventList[i].decode(UTF_8) for i in range(c_numEvt.value)) -def PmuEnable(pd: int) -> int: +def PmuEnable(pd): """ int PmuEnable(int pd); """ @@ -1568,7 +1568,7 @@ def PmuEnable(pd: int) -> int: return c_PmuEnable(c_pd) -def PmuDisable(pd: int) -> int: +def PmuDisable(pd): """ int PmuDisable(int pd); """ @@ -1581,7 +1581,7 @@ def PmuDisable(pd: int) -> int: return c_PmuDisable(c_pd) -def PmuCollect(pd: int, milliseconds: int, interval: int) -> int: +def PmuCollect(pd, milliseconds, interval): """ int PmuCollect(int pd, int milliseconds, unsigned interval); """ @@ -1596,7 +1596,7 @@ def PmuCollect(pd: int, milliseconds: int, interval: int) -> int: return c_PmuCollect(c_pd, c_milliseconds, c_interval) -def PmuStop(pd: int) -> None: +def PmuStop(pd): """ void PmuStop(int pd); """ @@ -1609,7 +1609,7 @@ def PmuStop(pd: int) -> None: c_PmuStop(c_pd) -def PmuDataFree(pmuData: ctypes.POINTER(CtypesPmuData)) -> None: +def PmuDataFree(pmuData): """ void PmuDataFree(struct PmuData* pmuData); """ @@ -1619,7 +1619,7 @@ def PmuDataFree(pmuData: ctypes.POINTER(CtypesPmuData)) -> None: c_PmuDataFree(pmuData) -def PmuRead(pd: int) -> PmuData: +def PmuRead(pd): """ int PmuRead(int pd, struct PmuData** pmuData); """ @@ -1633,7 +1633,7 @@ def PmuRead(pd: int) -> PmuData: c_data_len = c_PmuRead(c_pd, ctypes.byref(c_data_pointer)) return PmuData(c_data_pointer, c_data_len) -def ResolvePmuDataSymbol(pmuData: ctypes.POINTER(CtypesPmuData)) -> int: +def ResolvePmuDataSymbol(pmuData): """ int ResolvePmuDataSymbol(struct PmuData* pmuData); """ @@ -1643,8 +1643,7 @@ def ResolvePmuDataSymbol(pmuData: ctypes.POINTER(CtypesPmuData)) -> int: return c_ResolvePmuDataSymbol(pmuData) -def PmuAppendData(fromData: ctypes.POINTER(CtypesPmuData), - toData: ctypes.POINTER(ctypes.POINTER(CtypesPmuData))) -> int: +def PmuAppendData(fromData, toData): """ int PmuAppendData(struct PmuData *fromData, struct PmuData **toData); """ @@ -1655,7 +1654,7 @@ def PmuAppendData(fromData: ctypes.POINTER(CtypesPmuData), return c_PmuAppendData(fromData, toData) -def PmuClose(pd: int) -> None: +def PmuClose(pd): """ void PmuClose(int pd); """ @@ -1668,7 +1667,7 @@ def PmuClose(pd: int) -> None: c_PmuClose(c_pd) -def PmuDumpData(pmuData: PmuData, filepath: str, dumpDwf: int) -> None: +def PmuDumpData(pmuData, filepath, dumpDwf): """ int PmuDumpData(struct PmuData *pmuData, unsigned len, char *filepath, int dumpDwf); """ @@ -1683,8 +1682,7 @@ def PmuDumpData(pmuData: PmuData, filepath: str, dumpDwf: int) -> None: c_PmuDumpData(pmuData.pointer(), c_len, c_filepath, c_dumpDwf) -def PmuGetField(rawData: ctypes.POINTER(CtypesSampleRawData), field_name: str, value: ctypes.c_void_p, - vSize: int) -> int: +def PmuGetField(rawData, field_name, value, vSize): """ int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *value, uint32_t vSize); """ @@ -1695,7 +1693,7 @@ def PmuGetField(rawData: ctypes.POINTER(CtypesSampleRawData), field_name: str, v return c_PmuGetField(rawData, field_name.encode(UTF_8), value, vSize) -def PmuGetFieldExp(rawData: ctypes.POINTER(CtypesSampleRawData), field_name: str) -> SampleRawField: +def PmuGetFieldExp(rawData, field_name): """ SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char *fieldName); """ @@ -1708,7 +1706,7 @@ def PmuGetFieldExp(rawData: ctypes.POINTER(CtypesSampleRawData), field_name: str return SampleRawField.from_sample_raw_field(pointer_field.contents) -def PmuDeviceBdfListFree() -> None: +def PmuDeviceBdfListFree(): """ void PmuDeviceBdfListFree() """ @@ -1718,7 +1716,7 @@ def PmuDeviceBdfListFree() -> None: c_PmuDeviceBdfListFree() -def PmuDeviceBdfList(bdf_type: int) -> Iterator[str]: +def PmuDeviceBdfList(bdf_type): """ const char** PmuDeviceBdfList(enum PmuBdfType bdfType, unsigned *numBdf); """ @@ -1731,10 +1729,10 @@ def PmuDeviceBdfList(bdf_type: int) -> Iterator[str]: c_bdf_list = c_PmuDeviceBdfList(c_bdf_type, ctypes.byref(c_num_bdf)) - return [c_bdf_list[i].decode(UTF_8) for i in range(c_num_bdf.value)] + return [c_bdf_list.decode(UTF_8) for i in range(c_num_bdf.value)] -def PmuDeviceOpen(device_attr: List[PmuDeviceAttr]) -> int: +def PmuDeviceOpen(device_attr): """ int PmuDeviceOpen(struct PmuDeviceAttr *deviceAttr, unsigned len); """ @@ -1746,7 +1744,7 @@ def PmuDeviceOpen(device_attr: List[PmuDeviceAttr]) -> int: return c_PmuDeviceOpen(c_device_attr, c_num_device) -def PmuGetDevMetric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) -> PmuDeviceData: +def PmuGetDevMetric(pmu_data, device_attr): """ int PmuGetDevMetric(struct PmuData *pmuData, unsigned pmuLen, struct PmuDeviceAttr *deviceAttr, unsigned len, struct PmuDeviceData *devicedata); @@ -1764,12 +1762,12 @@ def PmuGetDevMetric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) -> PmuD c_device_data = ctypes.POINTER(CtypesPmuDeviceData)() res = c_PmuGetDevMetric(pmu_data.pointer(), len(pmu_data), c_device_attr, num_device, ctypes.byref(c_device_data)) - if res <= 0: + if res <=0: return PmuDeviceData() return PmuDeviceData(c_device_data, res) -def DevDataFree(dev_data: ctypes.POINTER(CtypesPmuDeviceData)) -> None: +def DevDataFree(dev_data): """ void DevDataFree(struct PmuDeviceData *devData); """ @@ -1779,7 +1777,7 @@ def DevDataFree(dev_data: ctypes.POINTER(CtypesPmuDeviceData)) -> None: c_DevDataFree(dev_data) -def PmuGetCpuFreq(core: int) -> int: +def PmuGetCpuFreq(core): """ Get CPU frequency of a specific CPU core. @@ -1795,7 +1793,7 @@ def PmuGetCpuFreq(core: int) -> int: c_PmuGetCpuFreq.restype = ctypes.c_longlong return c_PmuGetCpuFreq(core) -def PmuGetClusterCore(clusterId: int) -> List[int]: +def PmuGetClusterCore(clusterId): """ Get CPU core list of a specific cluster. @@ -1814,7 +1812,7 @@ def PmuGetClusterCore(clusterId: int) -> List[int]: return [c_core_list[i] for i in range(c_num_core)] -def PmuGetNumaCore(numaId: int) -> List[int]: +def PmuGetNumaCore(numaId): """ Get CPU core list of a specific NUMA node. @@ -1833,7 +1831,7 @@ def PmuGetNumaCore(numaId: int) -> List[int]: return [c_core_list[i] for i in range(c_num_core)] -def PmuTraceOpen(traceType: int, pmuTraceAttr: PmuTraceAttr) -> int: +def PmuTraceOpen(traceType, pmuTraceAttr): """ int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr); """ @@ -1845,7 +1843,7 @@ def PmuTraceOpen(traceType: int, pmuTraceAttr: PmuTraceAttr) -> int: return c_PmuTraceOpen(c_traceType, ctypes.byref(pmuTraceAttr.c_pmu_trace_attr)) -def PmuTraceEnable(pd: int) -> int: +def PmuTraceEnable(pd): """ int PmuTraceEnable(int pd); """ @@ -1857,7 +1855,7 @@ def PmuTraceEnable(pd: int) -> int: return c_PmuTraceEnable(c_pd) -def PmuTraceDisable(pd: int) -> int: +def PmuTraceDisable(pd): """ int PmuTraceDisable(int pd); """ @@ -1869,7 +1867,7 @@ def PmuTraceDisable(pd: int) -> int: return c_PmuTraceDisable(c_pd) -def PmuTraceRead(pd: int) -> PmuTraceData: +def PmuTraceRead(pd): """ int PmuTraceRead(int pd, struct PmuTraceData** pmuTraceData); """ @@ -1883,7 +1881,7 @@ def PmuTraceRead(pd: int) -> PmuTraceData: c_data_len = c_PmuTraceRead(c_pd, ctypes.byref(c_data_pointer)) return PmuTraceData(c_data_pointer, c_data_len) -def PmuTraceClose(pd: int) -> None: +def PmuTraceClose(pd): """ void PmuTraceClose(int pd); """ @@ -1895,7 +1893,7 @@ def PmuTraceClose(pd: int) -> None: c_PmuTraceClose(c_pd) -def PmuTraceDataFree(pmuTraceData: ctypes.POINTER(CtypesPmuTraceData)) -> None: +def PmuTraceDataFree(pmuTraceData): """ void PmuTraceDataFree(struct PmuTraceData* pmuTraceData); """ @@ -1904,7 +1902,7 @@ def PmuTraceDataFree(pmuTraceData: ctypes.POINTER(CtypesPmuTraceData)) -> None: c_PmuTraceDataFree.restype = None c_PmuTraceDataFree(pmuTraceData) -def PmuSysCallFuncList() -> Iterator[str]: +def PmuSysCallFuncList(): """ char **PmuSysCallFuncList(unsigned *numFunc); """ @@ -1917,7 +1915,7 @@ def PmuSysCallFuncList() -> Iterator[str]: return (c_func_list[i].decode(UTF_8) for i in range(c_num_func.value)) -def PmuSysCallFuncListFree() -> None: +def PmuSysCallFuncListFree(): """ void PmuSysCallFuncListFree(); """ @@ -1944,12 +1942,12 @@ class CtypesPmuCpuFreqDetail(ctypes.Structure): ] def __init__(self, - cpuId: int = 0, - minFreq: int = 0, - maxFreq: int = 0, - avgFreq: int = 0, - *args:Any, **kw: Any) -> None: - super().__init__(*args, **kw) + cpuId=0, + minFreq=0, + maxFreq=0, + avgFreq=0, + *args, **kw): + super(CtypesPmuCpuFreqDetail, self).__init__(*args, **kw) self.cpuId = ctypes.c_int(cpuId) self.minFreq = ctypes.c_uint64(minFreq) self.maxFreq = ctypes.c_uint64(maxFreq) @@ -1959,11 +1957,11 @@ class CtypesPmuCpuFreqDetail(ctypes.Structure): class ImplPmuCpuFreqDetail: __slots__ = ['__c_pmu_cpu_freq_detail'] def __init__(self, - cpuId: int = 0, - minFreq: int = 0, - maxFreq: int = 0, - avgFreq: int = 0, - *args:Any, **kw: Any) -> None: + cpuId=0, + minFreq=0, + maxFreq=0, + avgFreq=0, + *args, **kw): self.__c_pmu_cpu_freq_detail = CtypesPmuCpuFreqDetail( cpuId=cpuId, minFreq=minFreq, @@ -1972,43 +1970,43 @@ class ImplPmuCpuFreqDetail: ) @property - def c_pmu_cpu_freq_detail(self) -> CtypesPmuCpuFreqDetail: + def c_pmu_cpu_freq_detail(self): return self.__c_pmu_cpu_freq_detail @property - def cpuId(self) -> int: + def cpuId(self): return self.__c_pmu_cpu_freq_detail.cpuId @cpuId.setter - def cpuId(self, cpuId: int) -> None: + def cpuId(self, cpuId): self.__c_pmu_cpu_freq_detail.cpuId = ctypes.c_int(cpuId) @property - def minFreq(self) -> int: + def minFreq(self): return self.__c_pmu_cpu_freq_detail.minFreq @minFreq.setter - def minFreq(self, minFreq: int) -> None: + def minFreq(self, minFreq): self.__c_pmu_cpu_freq_detail.minFreq = ctypes.c_uint64(minFreq) @property - def maxFreq(self) -> int: + def maxFreq(self): return self.__c_pmu_cpu_freq_detail.maxFreq @maxFreq.setter - def maxFreq(self, maxFreq: int) -> None: + def maxFreq(self, maxFreq): self.__c_pmu_cpu_freq_detail.maxFreq = ctypes.c_uint64(maxFreq) @property - def avgFreq(self) -> int: + def avgFreq(self): return self.__c_pmu_cpu_freq_detail.avgFreq @avgFreq.setter - def avgFreq(self, avgFreq: int) -> None: + def avgFreq(self, avgFreq): self.__c_pmu_cpu_freq_detail.avgFreq = ctypes.c_uint64(avgFreq) @classmethod - def from_c_pmu_cpu_freq_detail(cls, c_pmu_cpu_freq_detail: CtypesPmuCpuFreqDetail) -> 'ImplPmuCpuFreqDetail': + def from_c_pmu_cpu_freq_detail(cls, c_pmu_cpu_freq_detail): freq_detail = cls() freq_detail.__c_pmu_cpu_freq_detail = c_pmu_cpu_freq_detail return freq_detail @@ -2017,21 +2015,21 @@ class ImplPmuCpuFreqDetail: class PmuCpuFreqDetail: __slots__ = ['__pointer', '__iter', '__len'] - def __init__(self, pointer: ctypes.POINTER(CtypesPmuCpuFreqDetail) = None, len: int = 0) -> None: + def __init__(self, pointer=None, len=0): self.__pointer = pointer self.__len = len self.__iter = (ImplPmuCpuFreqDetail.from_c_pmu_cpu_freq_detail(self.__pointer[i]) for i in range(self.__len)) @property - def len(self) -> int: + def len(self): return self.__len @property - def iter(self) -> Iterator[ImplPmuCpuFreqDetail]: + def iter(self): return self.__iter -def PmuReadCpuFreqDetail() -> PmuCpuFreqDetail: +def PmuReadCpuFreqDetail(): """ struct PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum); """ @@ -2043,7 +2041,7 @@ def PmuReadCpuFreqDetail() -> PmuCpuFreqDetail: return PmuCpuFreqDetail(c_freq_detail_pointer, c_cpu_len.value) -def PmuOpenCpuFreqSampling(period: int) -> None: +def PmuOpenCpuFreqSampling(period): """ int PmuOpenCpuFreqSampling(unsigned period); """ @@ -2052,7 +2050,7 @@ def PmuOpenCpuFreqSampling(period: int) -> None: c_period = ctypes.c_uint(period) return c_PmuOpenCpuFreqSampling(c_period) -def PmuCloseCpuFreqSampling() -> None: +def PmuCloseCpuFreqSampling(): """ void PmuCloseCpuFreqSampling(); """ diff --git a/python/modules/_libkperf/Symbol.py b/python/modules/_libkperf/Symbol.py index 705f1a3..f18b9e9 100644 --- a/python/modules/_libkperf/Symbol.py +++ b/python/modules/_libkperf/Symbol.py @@ -47,18 +47,18 @@ class CtypesSymbol(ctypes.Structure): ] def __init__(self, - addr: int = 0, - module: str = '', - symbolName: str = '', - mangleName: str = '', - fileName: str = '', - lineNum: int = 0, - offset: int = 0, - codeMapEndAddr: int = 0, - codeMapAddr: int = 0, - count: int = 0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + addr= 0, + module= '', + symbolName= '', + mangleName= '', + fileName= '', + lineNum= 0, + offset= 0, + codeMapEndAddr= 0, + codeMapAddr= 0, + count= 0, + *args, **kw): + super(CtypesSymbol, self).__init__(*args, **kw) self.addr = ctypes.c_ulong(addr) self.module = ctypes.c_char_p(module.encode(UTF_8)) @@ -79,16 +79,16 @@ class Symbol: __slots__ = ['__c_sym'] def __init__(self, - addr: int = 0, - module: str = '', - symbolName: str = '', - mangleName: str = '', - fileName: str = '', - lineNum: int = 0, - offset: int = 0, - codeMapEndAddr: int = 0, - codeMapAddr: int = 0, - count: int = 0) -> None: + addr= 0, + module= '', + symbolName= '', + mangleName= '', + fileName= '', + lineNum= 0, + offset= 0, + codeMapEndAddr= 0, + codeMapAddr= 0, + count= 0): self.__c_sym = CtypesSymbol( addr=addr, module=module, @@ -103,91 +103,91 @@ class Symbol: ) @property - def c_sym(self) -> CtypesSymbol: + def c_sym(self): return self.__c_sym @property - def addr(self) -> int: + def addr(self): return self.c_sym.addr @addr.setter - def addr(self, addr: int) -> None: + def addr(self, addr): self.c_sym.addr = ctypes.c_ulong(addr) @property - def module(self) -> str: + def module(self): return self.c_sym.module.decode(UTF_8) @module.setter - def module(self, module: str) -> None: + def module(self, module): self.c_sym.module = ctypes.c_char_p(module.encode(UTF_8)) @property - def symbolName(self) -> str: + def symbolName(self): return self.c_sym.symbolName.decode(UTF_8) @symbolName.setter - def symbolName(self, symbolName: str) -> None: + def symbolName(self, symbolName): self.c_sym.symbolName = ctypes.c_char_p(symbolName.encode(UTF_8)) @property - def mangleName(self) -> str: + def mangleName(self): return self.c_sym.mangleName.decode(UTF_8) @mangleName.setter - def mangleName(self, mangleName: str) -> None: + def mangleName(self, mangleName): self.c_sym.mangleName = ctypes.c_char_p(mangleName.encode(UTF_8)) @property - def fileName(self) -> str: + def fileName(self): return self.c_sym.fileName.decode(UTF_8) @fileName.setter - def fileName(self, fileName: str) -> None: + def fileName(self, fileName): self.c_sym.fileName = ctypes.c_char_p(fileName.encode(UTF_8)) @property - def lineNum(self) -> int: + def lineNum(self): return self.c_sym.lineNum @lineNum.setter - def lineNum(self, lineNum: int) -> None: + def lineNum(self, lineNum): self.c_sym.lineNum = ctypes.c_uint(lineNum) @property - def offset(self) -> int: + def offset(self): return self.c_sym.offset @offset.setter - def offset(self, offset: int) -> None: + def offset(self, offset): self.c_sym.offset = ctypes.c_ulong(offset) @property - def codeMapEndAddr(self) -> int: + def codeMapEndAddr(self): return self.c_sym.codeMapEndAddr @codeMapEndAddr.setter - def codeMapEndAddr(self, codeMapEndAddr: int) -> None: + def codeMapEndAddr(self, codeMapEndAddr): self.c_sym.codeMapEndAddr = ctypes.c_ulong(codeMapEndAddr) @property - def codeMapAddr(self) -> int: + def codeMapAddr(self): return self.c_sym.codeMapAddr @codeMapAddr.setter - def codeMapAddr(self, codeMapAddr: int) -> None: + def codeMapAddr(self, codeMapAddr): self.c_sym.codeMapAddr = ctypes.c_ulong(codeMapAddr) @property - def count(self) -> int: + def count(self): return self.c_sym.count @count.setter - def count(self, count: int) -> None: + def count(self, count): self.c_sym.count = ctypes.c_uint64(count) @classmethod - def from_c_sym(cls, c_sym: CtypesSymbol) -> 'Symbol': + def from_c_sym(cls, c_sym): symbol = cls() symbol.__c_sym = c_sym return symbol @@ -213,15 +213,15 @@ CtypesStack._fields_ = [ ] -class Stack: +class Stack(object): __slots__ = ['__c_stack'] def __init__(self, - symbol: Symbol = None, - next: 'Stack' = None, - prev: 'Stack' = None, - count: int = 0) -> None: + symbol= None, + next = None, + prev = None, + count= 0): self.__c_stack = CtypesStack( symbol=symbol.c_sym if symbol else None, next=next.c_stack if next else None, @@ -230,45 +230,45 @@ class Stack: ) @property - def c_stack(self) -> CtypesStack: + def c_stack(self): return self.__c_stack @property - def symbol(self) -> Symbol: + def symbol(self): return Symbol.from_c_sym(self.c_stack.symbol.contents) if self.c_stack.symbol else None @symbol.setter - def symbol(self, symbol: Symbol) -> None: + def symbol(self, symbol): self.c_stack.symbol = symbol.c_sym if symbol else None @property - def next(self) -> 'Stack': + def next(self): return self.from_c_stack(self.c_stack.next.contents) if self.c_stack.next else None @next.setter - def next(self, next: 'Stack') -> None: + def next(self, next): self.c_stack.next = next.c_stack if next else None @property - def prev(self) -> 'Stack': + def prev(self): return self.from_c_stack(self.c_stack.prev.contents) if self.c_stack.prev else None @prev.setter - def prev(self, prev: 'Stack') -> None: + def prev(self, prev): self.c_stack.prev = prev.c_stack if prev else None @property - def count(self) -> int: + def count(self): return self.c_stack.count @count.setter - def count(self, count: int) -> None: + def count(self, count): self.c_stack.count = ctypes.c_uint64(count) @classmethod - def from_c_stack(cls, c_stack: CtypesStack) -> 'Stack': + def from_c_stack(cls, c_stack): stack = cls() stack.__c_stack = c_stack return stack @@ -292,12 +292,12 @@ class CtypesAsmCode(ctypes.Structure): ] def __init__(self, - addr: int = 0, - code: str = '', - fileName: str = '', - lineNum: int = 0, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + addr= 0, + code= '', + fileName= '', + lineNum= 0, + *args, **kw): + super(CtypesAsmCode, self).__init__(*args, **kw) self.addr = ctypes.c_ulong(addr) self.code = ctypes.c_char_p(code.encode(UTF_8)) self.fileName = ctypes.c_char_p(fileName.encode(UTF_8)) @@ -309,10 +309,10 @@ class AsmCode: __slots__ = ['__c_asm_code'] def __init__(self, - addr: int = 0, - code: str = '', - fileName: str = '', - lineNum: int = 0) -> None: + addr= 0, + code= '', + fileName= '', + lineNum= 0): self.__c_asm_code = CtypesAsmCode( addr=addr, code=code, @@ -321,43 +321,43 @@ class AsmCode: ) @property - def c_asm_code(self) -> CtypesAsmCode: + def c_asm_code(self): return self.__c_asm_code @property - def addr(self) -> int: + def addr(self): return self.c_asm_code.addr @addr.setter - def addr(self, addr: int) -> None: + def addr(self, addr): self.c_asm_code.addr = ctypes.c_ulong(addr) @property - def code(self) -> str: + def code(self): return self.c_asm_code.code.decode(UTF_8) @code.setter - def code(self, code: str) -> None: + def code(self, code): self.c_asm_code.code = ctypes.c_char_p(code.encode(UTF_8)) @property - def fileName(self) -> str: + def fileName(self): return self.c_asm_code.fileName.decode(UTF_8) @fileName.setter - def fileName(self, fileName: str) -> None: + def fileName(self, fileName): self.c_asm_code.fileName = ctypes.c_char_p(fileName.encode(UTF_8)) @property - def lineNum(self) -> int: + def lineNum(self): return self.c_asm_code.lineNum @lineNum.setter - def lineNum(self, lineNum: int) -> None: + def lineNum(self, lineNum): self.c_asm_code.lineNum = ctypes.c_uint(lineNum) @classmethod - def from_c_asm_code(cls, c_asm_code: CtypesAsmCode) -> 'AsmCode': + def from_c_asm_code(cls, c_asm_code): asm_code = cls() asm_code.__c_asm_code = c_asm_code return asm_code @@ -383,13 +383,13 @@ class CtypesStackAsm(ctypes.Structure): ] def __init__(self, - fileName: str = '', - funcStartAddr: int = 0, - functFileOffset: int = 0, - next: 'CtypesStackAsm' = None, - asmCode: CtypesAsmCode = None, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + fileName= '', + funcStartAddr= 0, + functFileOffset= 0, + next = None, + asmCode= None, + *args, **kw): + super(CtypesStackAsm, self).__init__(*args, **kw) self.fileName = ctypes.c_char_p(fileName.encode(UTF_8)) self.funcStartAddr = ctypes.c_ulong(funcStartAddr) self.functFileOffset = ctypes.c_ulong(functFileOffset) @@ -402,11 +402,11 @@ class StackAsm: __slots__ = ['__c_stack_asm'] def __init__(self, - fileName: str = '', - funcStartAddr: int = 0, - functFileOffset: int = 0, - next: 'StackAsm' = None, - asmCode: AsmCode = None) -> None: + fileName= '', + funcStartAddr= 0, + functFileOffset= 0, + next = None, + asmCode= None): self.__c_stack_asm = CtypesStackAsm( fileName=fileName, funcStartAddr=funcStartAddr, @@ -416,51 +416,51 @@ class StackAsm: ) @property - def c_stack_asm(self) -> CtypesStackAsm: + def c_stack_asm(self): return self.__c_stack_asm @property - def fileName(self) -> str: + def fileName(self): return self.c_stack_asm.fileName.decode(UTF_8) @fileName.setter - def fileName(self, fileName: str) -> None: + def fileName(self, fileName): self.c_stack_asm.fileName = ctypes.c_char_p(fileName.encode(UTF_8)) @property - def funcStartAddr(self) -> int: + def funcStartAddr(self): return self.c_stack_asm.funcStartAddr @funcStartAddr.setter - def funcStartAddr(self, funcStartAddr: int) -> None: + def funcStartAddr(self, funcStartAddr): self.c_stack_asm.funcStartAddr = ctypes.c_ulong(funcStartAddr) @property - def functFileOffset(self) -> int: + def functFileOffset(self): return self.c_stack_asm.functFileOffset @functFileOffset.setter - def functFileOffset(self, functFileOffset: int) -> None: + def functFileOffset(self, functFileOffset): self.c_stack_asm.functFileOffset = ctypes.c_ulong(functFileOffset) @property - def next(self) -> 'StackAsm': + def next(self): return self.from_c_stack_asm(self.c_stack_asm.next.contents) if self.c_stack_asm.next else None @next.setter - def next(self, next: 'StackAsm') -> None: + def next(self, next): self.c_stack_asm.next = next.c_stack_asm if next else None @property - def asmCode(self) -> AsmCode: + def asmCode(self): return AsmCode.from_c_asm_code(self.c_stack_asm.asmCode.contents) if self.c_stack_asm.asmCode else None @asmCode.setter - def asmCode(self, asmCode: AsmCode) -> None: + def asmCode(self, asmCode): self.c_stack_asm.asmCode = asmCode.c_asm_code if asmCode else None @classmethod - def from_c_stack_asm(cls, c_stack_asm: CtypesStackAsm) -> 'StackAsm': + def from_c_stack_asm(cls, c_stack_asm): stack_asm = cls() stack_asm.__c_stack_asm = c_stack_asm return stack_asm @@ -492,15 +492,15 @@ class CtypesProcTopology(ctypes.Structure): ] def __init__(self, - pid: int = 0, - tid: int = 0, - ppid: int = 0, - childPid: List[int] = None, - comm: str = '', - exe: str = '', - kernel: bool = False, - *args: Any, **kw: Any) -> None: - super().__init__(*args, **kw) + pid= 0, + tid= 0, + ppid= 0, + childPid= None, + comm= '', + exe= '', + kernel= False, + *args, **kw): + super(CtypesProcTopology, self).__init__(*args, **kw) self.pid = ctypes.c_int(pid) self.tid = ctypes.c_int(tid) self.ppid = ctypes.c_int(ppid) @@ -521,13 +521,13 @@ class ProcTopology: __slots__ = ['__c_proc_topology'] def __init__(self, - pid: int = 0, - tid: int = 0, - ppid: int = 0, - childPid: List[int] = None, - comm: str = '', - exe: str = '', - kernel: bool = False) -> None: + pid= 0, + tid= 0, + ppid= 0, + childPid= None, + comm= '', + exe= '', + kernel= False): self.__c_proc_topology = CtypesProcTopology( pid = pid, tid=tid, @@ -539,44 +539,44 @@ class ProcTopology: ) @property - def c_proc_topology(self) -> CtypesProcTopology: + def c_proc_topology(self): return self.__c_proc_topology @property - def pid(self) -> int: + def pid(self): return self.c_proc_topology.pid @pid.setter - def pid(self, pid: int) -> None: + def pid(self, pid): self.c_proc_topology.pid = ctypes.c_int(pid) @property - def tid(self) -> int: + def tid(self): return self.c_proc_topology.tid @tid.setter - def tid(self, tid: int) -> None: + def tid(self, tid): self.c_proc_topology.tid = ctypes.c_int(tid) @property - def ppid(self) -> int: + def ppid(self): return self.c_proc_topology.ppid @ppid.setter - def ppid(self, ppid: int) -> None: + def ppid(self, ppid): self.c_proc_topology.ppid = ctypes.c_int(ppid) @property - def numChild(self) -> int: + def numChild(self): return self.c_proc_topology.numChild @property - def childPid(self) -> List[int]: + def childPid(self): return [self.c_proc_topology.childPid[i] for i in range(self.numChild)] @childPid.setter - def childPid(self, childPid: List[int]) -> None: + def childPid(self, childPid): if childPid: numChildPid = len(childPid) self.c_proc_topology.childPid = (ctypes.c_int * numChildPid)(*childPid) @@ -586,29 +586,29 @@ class ProcTopology: self.c_proc_topology.numChild = ctypes.c_int(0) @property - def comm(self) -> str: + def comm(self): return self.c_proc_topology.comm.decode(UTF_8) @comm.setter - def comm(self, comm: str) -> None: + def comm(self, comm): self.c_proc_topology.comm = ctypes.c_char_p(comm.encode(UTF_8)) @property - def exe(self) -> str: + def exe(self): return self.c_proc_topology.exe.decode(UTF_8) @exe.setter - def exe(self, exe: str) -> None: + def exe(self, exe): self.c_proc_topology.exe = ctypes.c_char_p(exe.encode(UTF_8)) @classmethod - def from_c_proc_topology(cls, c_proc_topology: CtypesProcTopology) -> 'ProcTopology': + def from_c_proc_topology(cls, c_proc_topology): proc_topology = cls() proc_topology.__c_proc_topology = c_proc_topology return proc_topology -def SymResolverRecordKernel() -> None: +def SymResolverRecordKernel(): """ int SymResolverRecordKernel(); """ @@ -619,7 +619,7 @@ def SymResolverRecordKernel() -> None: c_SymResolverRecordKernel() -def SymResolverRecordModule(pid: int) -> None: +def SymResolverRecordModule(pid): """ int SymResolverRecordModule(int pid); """ @@ -632,7 +632,7 @@ def SymResolverRecordModule(pid: int) -> None: c_SymResolverRecordModule(c_pid) -def SymResolverRecordModuleNoDwarf(pid: int) -> None: +def SymResolverRecordModuleNoDwarf(pid): """ int SymResolverRecordModuleNoDwarf(int pid); """ @@ -645,7 +645,7 @@ def SymResolverRecordModuleNoDwarf(pid: int) -> None: c_SymResolverRecordModuleNoDwarf(c_pid) -def StackToHash(pid: int, stackList: List[int]) -> Stack: +def StackToHash(pid, stackList): """ struct Stack* StackToHash(int pid, unsigned long* stack, int nr); """ @@ -664,7 +664,7 @@ def StackToHash(pid: int, stackList: List[int]) -> Stack: return Stack.from_c_stack(c_stack.contents) -def SymResolverMapAddr(pid: int, addr: int) -> Symbol: +def SymResolverMapAddr(pid, addr): """ struct Symbol* SymResolverMapAddr(int pid, unsigned long addr); """ @@ -681,7 +681,7 @@ def SymResolverMapAddr(pid: int, addr: int) -> Symbol: return Symbol.from_c_sym(c_sym.contents) -def FreeModuleData(pid: int) -> None: +def FreeModuleData(pid): """ void FreeModuleData(int pid); """ @@ -694,7 +694,7 @@ def FreeModuleData(pid: int) -> None: c_FreeModuleData(c_pid) -def SymResolverDestroy() -> None: +def SymResolverDestroy(): """ void SymResolverDestroy(); """ diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index 20776de..5806a15 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -119,28 +119,28 @@ class Error: LIBPERF_WARN_PCIE_BIOS_NOT_NEWEST = 1003 LIBPERF_WARN_INVALID_SMMU_BDF = 1004 -def errorno() -> int: +def errorno(): """ Obtaining error codes """ return _libkperf.Perrorno() -def error()-> str: +def error(): """ Obtaining Error Information """ return _libkperf.Perror() -def get_warn() -> int: +def get_warn(): """ Get warning codes """ return _libkperf.GetWarn() -def get_warn_msg()-> str: +def get_warn_msg(): """ Get warning message """ diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 213125d..cce26c5 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -164,7 +164,7 @@ class PmuDeviceAttr(_libkperf.PmuDeviceAttr): }; """ def __init__(self, metric, bdf=None): - super().__init__( + super(PmuDeviceAttr, self).__init__( metric=metric, bdf=bdf ) @@ -253,23 +253,23 @@ class PmuAttr(_libkperf.PmuAttr): includeNewFork: In count mode, enable it you can get the new child thread count, default is disabled. """ def __init__(self, - evtList: List[str] = None, - pidList: List[int] = None, - cpuList: List[int] = None, - evtAttr: List[_libkperf.CtypesEvtAttr] = None, - sampleRate: int = 0, - useFreq: bool = False, - excludeUser: bool = False, - excludeKernel: bool = False, - symbolMode: int = 0, - callStack: bool = False, - blockedSample: bool = False, - dataFilter: int = 0, - evFilter: int = 0, - minLatency: int = 0, - includeNewFork: bool = False, - branchSampleFilter: int = 0) -> None: - super().__init__( + evtList = None, + pidList = None, + cpuList = None, + evtAttr = None, + sampleRate = 0, + useFreq = False, + excludeUser = False, + excludeKernel = False, + symbolMode = 0, + callStack = False, + blockedSample = False, + dataFilter = 0, + evFilter = 0, + minLatency = 0, + includeNewFork = False, + branchSampleFilter = 0): + super(PmuAttr, self).__init__( evtList=evtList, pidList=pidList, cpuList=cpuList, @@ -326,10 +326,10 @@ class PmuTraceAttr(_libkperf.PmuTraceAttr): }; """ def __init__(self, - funcs: List[str] = None, - pidList: List[int] = None, - cpuList: List[int] = None) -> None: - super().__init__( + funcs = None, + pidList = None, + cpuList = None): + super(PmuTraceAttr, self).__init__( funcs=funcs, pidList=pidList, cpuList=cpuList @@ -341,7 +341,7 @@ class ImplPmuTraceData(_libkperf.ImplPmuTraceData): class PmuTraceData(_libkperf.PmuTraceData): pass -def open(collect_type: PmuTaskType, pmu_attr: PmuAttr) -> int: +def open(collect_type, pmu_attr): """ Initialize the collection target. On success, a task id is returned which is the unique identifier of the task. @@ -354,7 +354,7 @@ def open(collect_type: PmuTaskType, pmu_attr: PmuAttr) -> int: return _libkperf.PmuOpen(int(collect_type), pmu_attr) -def event_list(event_type: PmuEventType)-> Iterator[str]: +def event_list(event_type): """ Query all available event from system. :param event_type: type of event chosen by user @@ -363,7 +363,7 @@ def event_list(event_type: PmuEventType)-> Iterator[str]: return _libkperf.PmuEventList(int(event_type)) -def enable(pd: int)-> int: +def enable(pd): """ Enable counting or sampling of task . On success, 0 is returned. @@ -374,7 +374,7 @@ def enable(pd: int)-> int: return _libkperf.PmuEnable(pd) -def disable(pd: int)-> int: +def disable(pd): """ Disable counting or sampling of task . On success, 0 is returned. @@ -385,7 +385,7 @@ def disable(pd: int)-> int: return _libkperf.PmuDisable(pd) -def read(pd: int) -> PmuData: +def read(pd): """ Collect data. Pmu data are collected starting from the last PmuEnable or PmuRead. @@ -396,7 +396,7 @@ def read(pd: int) -> PmuData: """ return _libkperf.PmuRead(pd) -def resolvePmuDataSymbol(pmuData: PmuData) -> int: +def resolvePmuDataSymbol(pmuData): """ when kperf symbol mode is NO_SYMBOL_RESOLVE during PmuRead(), this function can be used to resolve stack symbols :param: pmuData @@ -405,7 +405,7 @@ def resolvePmuDataSymbol(pmuData: PmuData) -> int: return _libkperf.ResolvePmuDataSymbol(pmuData.pointer()) -def stop(pd: int) -> None: +def stop(pd): """ stop a sampling task in asynchronous mode :param pd: task id. @@ -413,7 +413,7 @@ def stop(pd: int) -> None: return _libkperf.PmuStop(pd) -def close(pd: int) -> None: +def close(pd): """ Close task with id . After PmuClose is called, all pmu data related to the task become invalid. @@ -422,7 +422,7 @@ def close(pd: int) -> None: return _libkperf.PmuClose(pd) -def dump(pmuData: PmuData, filepath: str, dump_dwf: int) -> None: +def dump(pmuData, filepath, dump_dwf): """ /** Dump pmu data to a specific file. @@ -437,7 +437,7 @@ def dump(pmuData: PmuData, filepath: str, dump_dwf: int) -> None: return _libkperf.PmuDumpData(pmuData, filepath, dump_dwf) -def get_field(pmu_data: _libkperf.ImplPmuData, field_name: str, value: c_void_p) -> int: +def get_field(pmu_data, field_name, value): """ get field value of trace pointer named field_name :param pmu_data: _libkperf.ImplPmuData @@ -448,7 +448,7 @@ def get_field(pmu_data: _libkperf.ImplPmuData, field_name: str, value: c_void_p) return _libkperf.PmuGetField(pmu_data.rawData.c_pmu_data_rawData, field_name, value, sizeof(value)) -def get_field_exp(pmu_data: _libkperf.ImplPmuData, field_name: str) -> SampleRawField: +def get_field_exp(pmu_data, field_name): """ get the field detail of trace pointer event :param pmu_data: the _libkperf.ImplPmuData @@ -457,7 +457,7 @@ def get_field_exp(pmu_data: _libkperf.ImplPmuData, field_name: str) -> SampleRaw """ return _libkperf.PmuGetFieldExp(pmu_data.rawData.c_pmu_data_rawData, field_name) -def device_bdf_list(bdf_type: PmuBdfType) -> Iterator[str]: +def device_bdf_list(bdf_type): """ Query all available BDF (Bus:Device.Function) list from system. :param bdf_type: type of bdf chosen by user @@ -465,7 +465,7 @@ def device_bdf_list(bdf_type: PmuBdfType) -> Iterator[str]: """ return _libkperf.PmuDeviceBdfList(int(bdf_type)) -def device_open(device_attr: List[PmuDeviceAttr]) -> int: +def device_open(device_attr): """ A high level interface for initializing PMU events for devices, such as L3 cache, DDRC, PCIe, and SMMU, to collect metrics like bandwidth, latency, and others. @@ -475,7 +475,7 @@ def device_open(device_attr: List[PmuDeviceAttr]) -> int: """ return _libkperf.PmuDeviceOpen(device_attr) -def get_device_metric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) -> PmuDeviceData: +def get_device_metric(pmu_data, device_attr): """ Get device performance metric data from pmu data :param pmu_data: raw data collected by pmu @@ -485,7 +485,7 @@ def get_device_metric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) -> Pm return _libkperf.PmuGetDevMetric(pmu_data, device_attr) -def get_cpu_freq(core: int) -> int: +def get_cpu_freq(core): """ Get cpu frequency :param core: cpu core id @@ -494,7 +494,7 @@ def get_cpu_freq(core: int) -> int: return _libkperf.PmuGetCpuFreq(core) -def get_cluster_core(clusterId: int) -> List[int]: +def get_cluster_core(clusterId): """ Get the list of core in a cluster :param cluster: cluster id @@ -502,7 +502,7 @@ def get_cluster_core(clusterId: int) -> List[int]: """ return _libkperf.PmuGetClusterCore(clusterId) -def get_numa_core(numaId: int) -> List[int]: +def get_numa_core(numaId): """ Get the list of core in a numa node :param numaId: numa node id @@ -510,37 +510,37 @@ def get_numa_core(numaId: int) -> List[int]: """ return _libkperf.PmuGetNumaCore(numaId) -def trace_open(trace_type: PmuTraceType, pmu_trace_attr: PmuTraceAttr) -> int: +def trace_open(trace_type, pmu_trace_attr): """ int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr); """ return _libkperf.PmuTraceOpen(int(trace_type), pmu_trace_attr) -def trace_enable(pd: int) -> int: +def trace_enable(pd): """ int PmuTraceEnable(int pd); """ return _libkperf.PmuTraceEnable(pd) -def trace_disable(pd: int) -> int: +def trace_disable(pd): """ int PmuTraceDisable(int pd); """ return _libkperf.PmuTraceDisable(pd) -def trace_read(pd: int) -> PmuTraceData: +def trace_read(pd): """ int PmuTraceRead(int pd, struct PmuTraceData **traceData); """ return _libkperf.PmuTraceRead(pd) -def trace_close(pd: int) -> None: +def trace_close(pd): """ void PmuTraceClose(int pd); """ return _libkperf.PmuTraceClose(pd) -def sys_call_func_list() -> Iterator[str]: +def sys_call_func_list(): """ get the system call function list :return: system call function list @@ -550,13 +550,13 @@ def sys_call_func_list() -> Iterator[str]: class CpuFreqDetail(_libkperf.PmuCpuFreqDetail): pass -def open_cpu_freq_sampling(period: int) -> None: +def open_cpu_freq_sampling(period): return _libkperf.PmuOpenCpuFreqSampling(period) -def close_cpu_freq_sampling() -> None: +def close_cpu_freq_sampling(): return _libkperf.PmuCloseCpuFreqSampling() -def read_cpu_freq_detail() -> CpuFreqDetail: +def read_cpu_freq_detail(): return _libkperf.PmuReadCpuFreqDetail() __all__ = [ diff --git a/python/modules/ksym/symbol.py b/python/modules/ksym/symbol.py index 0e7099e..dfc34fc 100644 --- a/python/modules/ksym/symbol.py +++ b/python/modules/ksym/symbol.py @@ -20,16 +20,16 @@ import _libkperf class Symbol(_libkperf.Symbol): def __init__(self, - addr: int = 0, - module: str = '', - symbolName: str = '', - fileName: str = '', - lineNum: int = 0, - offset: int = 0, - codeMapEndAddr: int = 0, - codeMapAddr: int = 0, - count: int = 0) -> None: - super().__init__( + addr = 0, + module = '', + symbolName = '', + fileName = '', + lineNum = 0, + offset = 0, + codeMapEndAddr = 0, + codeMapAddr = 0, + count = 0): + super(Symbol, self).__init__( addr=addr, module=module, symbolName=symbolName, @@ -45,11 +45,11 @@ class Symbol(_libkperf.Symbol): class Stack(_libkperf.Stack): def __init__(self, - symbol: Symbol = None, - next: 'Stack' = None, - prev: 'Stack' = None, - count: int = 0) -> None: - super().__init__( + symbol = None, + next = None, + prev = None, + count = 0): + super(Stack, self).__init__( symbol=symbol.c_sym if symbol else None, next=next.c_stack if next else None, prev=prev.c_stack if prev else None, @@ -57,39 +57,39 @@ class Stack(_libkperf.Stack): ) -def record_kernel() -> None: +def record_kernel(): _libkperf.SymResolverRecordKernel() -def record_module(pid: int, dwarf: bool = True) -> None: +def record_module(pid, dwarf = True): if dwarf: _libkperf.SymResolverRecordModule(pid) else: _libkperf.SymResolverRecordModuleNoDwarf(pid) -def get_stack(pid: int, stacks: List[int]) -> Iterator[Stack]: +def get_stack(pid, stacks): """ Convert a callstack to an unsigned long long hashid """ return _libkperf.StackToHash(pid, stacks) -def get_symbol(pid: int, addr: int) -> Symbol: +def get_symbol(pid, addr): """ Map a specific address to a symbol """ return _libkperf.SymResolverMapAddr(pid, addr) -def free_module(pid: int) -> None: +def free_module(pid): """ free pid module data """ _libkperf.FreeModuleData(pid) -def destroy() -> None: +def destroy(): _libkperf.SymResolverDestroy() @@ -102,4 +102,4 @@ __all__ = [ 'get_symbol', 'free_module', 'destroy', -] +] \ No newline at end of file -- Gitee From 259fa98eec6237640d43df2096ae140cbd8c5b10 Mon Sep 17 00:00:00 2001 From: glx Date: Thu, 19 Jun 2025 20:00:12 +0800 Subject: [PATCH 41/48] =?UTF-8?q?Implement=20group=20read=20for=20group=20?= =?UTF-8?q?events=20=E5=AE=9E=E7=8E=B0group=20read=EF=BC=9A=20=E5=BD=93?= =?UTF-8?q?=E4=BD=BF=E7=94=A8=E4=BA=8B=E4=BB=B6=E5=88=86=E7=BB=84=E7=9A=84?= =?UTF-8?q?=E6=97=B6=E5=80=99=EF=BC=8C=E5=8F=AA=E6=9C=89group=20leader?= =?UTF-8?q?=E9=9C=80=E8=A6=81read=E8=AE=A1=E6=95=B0=EF=BC=8C=E5=85=B6?= =?UTF-8?q?=E4=BD=99=E7=9A=84group=20members=E6=97=A0=E9=9C=80read?= =?UTF-8?q?=E3=80=82=20=E8=BF=99=E6=A0=B7=E5=87=8F=E5=B0=91=E4=BA=86read?= =?UTF-8?q?=E7=9A=84=E6=AC=A1=E6=95=B0=EF=BC=8C=E6=8F=90=E5=8D=87=E4=BA=86?= =?UTF-8?q?=E6=80=A7=E8=83=BD=EF=BC=9B=E8=80=8C=E4=B8=94=E8=83=BD=E4=BF=9D?= =?UTF-8?q?=E8=AF=81group=E7=9A=84=E6=89=80=E6=9C=89=E4=BA=8B=E4=BB=B6?= =?UTF-8?q?=E9=83=BD=E6=98=AF=E5=9C=A8=E5=90=8C=E6=97=B6=E8=A2=AB=E8=AF=BB?= =?UTF-8?q?=E5=8F=96=EF=BC=8C=E6=89=80=E6=9C=89=E4=BA=8B=E4=BB=B6=E7=9A=84?= =?UTF-8?q?=E9=87=87=E9=9B=86=E6=97=B6=E9=95=BF=E8=83=BD=E5=AE=8C=E5=85=A8?= =?UTF-8?q?=E4=B8=80=E8=87=B4=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 实现方法: - 在Open的时候,对于group events,read_format加上PERF_FORMAT_GROUP。 - 在Read的时候,对于group events,读取所有member的计数,并且把每个计数和事件名词对应起来。 --- pmu/evt_list.cpp | 21 ++++- pmu/evt_list.h | 5 + pmu/perf_counter.cpp | 124 +++++++++++++++++++++---- pmu/perf_counter.h | 25 ++++- pmu/pmu_list.cpp | 3 +- pmu/pmu_metric.cpp | 6 +- test/test_perf/test_metric.cpp | 28 ++---- test/test_perf/test_trace_analysis.cpp | 7 +- test/test_perf/test_trace_pointer.cpp | 3 - 9 files changed, 162 insertions(+), 60 deletions(-) diff --git a/pmu/evt_list.cpp b/pmu/evt_list.cpp index d12f1df..29bcf1d 100644 --- a/pmu/evt_list.cpp +++ b/pmu/evt_list.cpp @@ -91,7 +91,9 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrSetBranchSampleFilter(branchSampleFilter); int err = 0; if (groupEnable) { - err = perfEvt->Init(groupEnable, evtLeader->xyCounterArray[row][col]->GetFd(), resetOutPutFd); + // If evtLeader is nullptr, I am the leader. + auto groupFd = evtLeader?evtLeader->xyCounterArray[row][col]->GetFd():-1; + err = perfEvt->Init(groupEnable, groupFd, resetOutPutFd); } else { err = perfEvt->Init(groupEnable, -1, resetOutPutFd); } @@ -168,7 +170,14 @@ void KUNPENG_PMU::EvtList::FillFields( { for (auto i = start; i < end; ++i) { data[i].cpuTopo = cpuTopo; - data[i].evt = this->pmuEvt->name.c_str(); + if (groupInfo && pmuEvt->collectType == COUNTING && i - start > 0) { + // For group events, PmuData are all read by event leader, + // and then some PmuData elements should be related to group members. + data[i].evt = groupInfo->evtGroupChildList[i-start-1]->pmuEvt->name.c_str(); + } else { + // For no group events or group leader. + data[i].evt = this->pmuEvt->name.c_str(); + } data[i].groupId = this->groupId; if (data[i].comm == nullptr) { data[i].comm = procTopo->comm; @@ -269,7 +278,8 @@ void KUNPENG_PMU::EvtList::AddNewProcess(pid_t pid, const bool groupEnable, cons int err = 0; if (groupEnable) { int sz = this->pidList.size(); - err = perfEvt->Init(groupEnable, evtLeader->xyCounterArray[row][sz - 1]->GetFd(), -1); + auto groupFd = evtLeader?evtLeader->xyCounterArray[row][sz - 1]->GetFd():-1; + err = perfEvt->Init(groupEnable, groupFd, -1); } else { err = perfEvt->Init(groupEnable, -1, -1); } @@ -348,4 +358,9 @@ void KUNPENG_PMU::EvtList::ClearExitFd() procMap.erase(exitPid); numPid--; } +} + +void KUNPENG_PMU::EvtList::SetGroupInfo(const EventGroupInfo &grpInfo) +{ + this->groupInfo = unique_ptr(new EventGroupInfo(grpInfo)); } \ No newline at end of file diff --git a/pmu/evt_list.h b/pmu/evt_list.h index 6764d4d..d064143 100644 --- a/pmu/evt_list.h +++ b/pmu/evt_list.h @@ -49,6 +49,7 @@ enum class UncoreState { OnlyOther = 0b01, }; +struct EventGroupInfo; class EvtList { public: using ProcPtr = std::shared_ptr; @@ -72,6 +73,8 @@ public: int Read(std::vector& pmuData, std::vector& sampleIps, std::vector& extPool, std::vector& switchData); + void SetGroupInfo(const EventGroupInfo &grpInfo); + void SetTimeStamp(const int64_t& timestamp) { this->ts = timestamp; @@ -134,6 +137,8 @@ private: int prevStat; int evtStat; std::mutex mutex; + // Fixme: decouple group event with normal event, use different classes to implement Read and Init. + std::unique_ptr groupInfo = nullptr; }; struct EventGroupInfo { diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter.cpp index 7a212fe..9b2db6d 100644 --- a/pmu/perf_counter.cpp +++ b/pmu/perf_counter.cpp @@ -30,8 +30,20 @@ #include "perf_counter.h" using namespace std; +using namespace pcerr; static constexpr int MAX_ATTR_SIZE = 120; + +struct GroupReadFormat { + __u64 nr; + __u64 timeEnabled; + __u64 timeRunning; + struct { + __u64 value; + __u64 id; + } values[]; +}; + /** * Read pmu counter and deal with pmu multiplexing * Right now we do not implement grouping logic, thus we ignore the @@ -40,18 +52,90 @@ static constexpr int MAX_ATTR_SIZE = 120; int KUNPENG_PMU::PerfCounter::Read(vector &data, std::vector &sampleIps, std::vector &extPool, std::vector &swtichData) { - struct ReadFormat perfCountValue; - - /** - * If some how the file descriptor is less than 0, - * we make the count to be 0 and return - */ if (__glibc_unlikely(this->fd < 0)) { - this->count = 0; + this->accumCount.clear(); + return UNKNOWN_ERROR; + } + + if (groupStatus == GroupStatus::NO_GROUP) { + return ReadSingleEvent(data); + } else if (groupStatus == GroupStatus::GROUP_LEADER) { + return ReadGroupEvents(data); + } + + // Group members do not need to read counters, + // Group leader will read them all. + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::ReadSingleEvent(std::vector &data) +{ + ReadFormat perfCountValue; + int len = read(this->fd, &perfCountValue, sizeof(perfCountValue)); + if (len < 0) { + New(UNKNOWN_ERROR, strerror(errno)); + return UNKNOWN_ERROR; + } + if (accumCount.empty()) { + accumCount.assign(1, 0); + } + + int err = CountValueToData(perfCountValue.value, perfCountValue.timeEnabled, + perfCountValue.timeRunning, accumCount[0], data); + if (err != SUCCESS) { + return err; + } + + this->enabled = perfCountValue.timeEnabled; + this->running = perfCountValue.timeRunning; + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::ReadGroupEvents(std::vector &data) +{ + // Fixme: + // In current class, we do not know how many events in group. + // Then we read for max struct size: nr+timeEnabled+timeRunning+ MAX_GROUP_EVENTS*(value+id) + static const unsigned MAX_GROUP_EVENTS = 14; + unsigned readSize = sizeof(__u64)*3 + sizeof(__u64)*2*MAX_GROUP_EVENTS; + GroupReadFormat *perfCountValue = static_cast(malloc(readSize)); + if (perfCountValue == NULL) { + return COMMON_ERR_NOMEM; + } + int len = read(this->fd, perfCountValue, readSize); + if (len < 0) { + free(perfCountValue); + New(UNKNOWN_ERROR, strerror(errno)); return UNKNOWN_ERROR; } - read(this->fd, &perfCountValue, sizeof(perfCountValue)); - if (perfCountValue.value < count || perfCountValue.timeEnabled < enabled || perfCountValue.timeRunning < running) { + + if (accumCount.empty()) { + accumCount.assign(perfCountValue->nr, 0); + } + + for (int i = 0;i < accumCount.size(); ++i) { + auto err = CountValueToData(perfCountValue->values[i].value, + perfCountValue->timeEnabled, + perfCountValue->timeRunning, + accumCount[i], + data + ); + if (err != SUCCESS) { + free(perfCountValue); + return err; + } + } + + this->enabled = perfCountValue->timeEnabled; + this->running = perfCountValue->timeRunning; + free(perfCountValue); + return SUCCESS; +} + +int KUNPENG_PMU::PerfCounter::CountValueToData(const __u64 value, const __u64 timeEnabled, + const __u64 timeRunning, __u64 &accumCount, vector &data) +{ + if (value < accumCount || timeEnabled < enabled || timeRunning < running) { return LIBPERF_ERR_COUNT_OVERFLOW; } @@ -60,17 +144,14 @@ int KUNPENG_PMU::PerfCounter::Read(vector &data, std::vector(perfCountValue.timeEnabled - enabled) / static_cast(perfCountValue.timeRunning - running); - increCount = static_cast((perfCountValue.value - count)* percent); + percent = static_cast(timeEnabled - enabled) / static_cast(timeRunning - running); + increCount = static_cast((value - accumCount)* percent); } - - this->count = perfCountValue.value; - this->enabled = perfCountValue.timeEnabled; - this->running = perfCountValue.timeRunning; + accumCount = value; data.emplace_back(PmuData{0}); auto& current = data.back(); @@ -123,7 +204,13 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou * and any child events are initialized with disabled bit set to 0. Despite disabled bit being set to 0, * the child events will not start counting until the group leader is enabled. */ - attr.disabled = 0; + if (groupFd != -1) { + attr.disabled = 0; + groupStatus = GroupStatus::GROUP_MEMBER; + } else { + groupStatus = GroupStatus::GROUP_LEADER; + } + attr.read_format |= PERF_FORMAT_GROUP; this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } else { #ifdef IS_X86 @@ -136,6 +223,7 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou } else { this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } + groupStatus = GroupStatus::NO_GROUP; } this->groupFd = groupFd; DBG_PRINT("type: %d cpu: %d config: %llx config1: %llx config2: %llx myfd: %d groupfd: %d\n", @@ -160,7 +248,7 @@ int KUNPENG_PMU::PerfCounter::Enable() if (err != SUCCESS) { return err; } - this->count = 0; + this->accumCount.clear(); this->enabled = 0; this->running = 0; return SUCCESS; diff --git a/pmu/perf_counter.h b/pmu/perf_counter.h index 585bf58..8937bdb 100644 --- a/pmu/perf_counter.h +++ b/pmu/perf_counter.h @@ -44,11 +44,26 @@ namespace KUNPENG_PMU { int Reset() override; private: - // Accumulated pmu count, time enabled and time running. - __u64 count = 0; - __u64 enabled = 0; - __u64 running = 0; - int groupFd = 0; + enum class GroupStatus + { + NO_GROUP, + GROUP_LEADER, + GROUP_MEMBER + }; + + int CountValueToData(const __u64 value, const __u64 timeEnabled, + const __u64 timeRunning, __u64 &accumCount, std::vector &data); + int ReadSingleEvent(std::vector &data); + int ReadGroupEvents(std::vector &data); + + // Accumulated pmu count, time enabled and time running. + __u64 enabled = 0; + __u64 running = 0; + // For group events, is the accum counts of all members. + // For normal events, has only one element. + std::vector<__u64> accumCount; + int groupFd = 0; + GroupStatus groupStatus = GroupStatus::NO_GROUP; }; } // namespace KUNPENG_PMU #endif diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 96ea5df..9e8feb0 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -199,7 +199,7 @@ namespace KUNPENG_PMU { continue; } if (eventGroupInfoMap.find(evtList->GetGroupId()) == eventGroupInfoMap.end()) { - auto err = EvtInit(false, nullptr, pd, evtList, isMemoryEnough); + auto err = EvtInit(true, nullptr, pd, evtList, isMemoryEnough); if (err != SUCCESS) { return err; } @@ -233,6 +233,7 @@ namespace KUNPENG_PMU { return err; } } + evtGroup.second.evtLeader->SetGroupInfo(evtGroup.second); } groupMapPtr eventDataEvtGroup = std::make_shared>(eventGroupInfoMap); InsertDataEvtGroupList(pd, eventDataEvtGroup); diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 1fe04d7..56cf758 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -1722,21 +1722,21 @@ int PmuGetNumaCore(unsigned nodeId, unsigned **coreList) ifstream in(nodeListFile); if (!in.is_open()) { New(LIBPERF_ERR_KERNEL_NOT_SUPPORT); - return LIBPERF_ERR_KERNEL_NOT_SUPPORT; + return -1; } std::string cpulist; in >> cpulist; auto split = SplitStringByDelimiter(cpulist, '-'); if (split.size() != 2) { New(LIBPERF_ERR_KERNEL_NOT_SUPPORT); - return LIBPERF_ERR_KERNEL_NOT_SUPPORT; + return -1; } auto start = stoi(split[0]); auto end = stoi(split[1]); int coreNums = end - start + 1; if (coreNums <= 0) { New(LIBPERF_ERR_KERNEL_NOT_SUPPORT); - return LIBPERF_ERR_KERNEL_NOT_SUPPORT; + return -1; } InitializeCoreArray(); *coreList = &coreArray[start]; diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index 68710cb..d6b5e10 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -37,7 +37,7 @@ TEST_F(TestMetric, GetInvalidBdfList) enum PmuBdfType bdfType = (enum PmuBdfType)5; unsigned bdfLen = 0; const char** bdfList = PmuDeviceBdfList(bdfType, &bdfLen); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_EQ(bdfList, nullptr); } @@ -46,7 +46,7 @@ TEST_F(TestMetric, GetPcieBdfList) enum PmuBdfType bdfType = PMU_BDF_TYPE_PCIE; unsigned bdfLen = 0; const char** bdfList = PmuDeviceBdfList(bdfType, &bdfLen); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(bdfList, nullptr); } @@ -55,7 +55,7 @@ TEST_F(TestMetric, GetSmmuBdfList) enum PmuBdfType bdfType = PMU_BDF_TYPE_SMMU; unsigned bdfLen = 0; const char** bdfList = PmuDeviceBdfList(bdfType, &bdfLen); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(bdfList, nullptr); } @@ -63,7 +63,7 @@ TEST_F(TestMetric, GetCpuFreq) { unsigned core = 6; int64_t cpu6Freq = PmuGetCpuFreq(core); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(cpu6Freq, -1); } @@ -72,12 +72,8 @@ TEST_F(TestMetric, GetClusterIdListSuccess) unsigned clusterId = 3; unsigned* coreList = nullptr; int len = PmuGetClusterCore(clusterId, &coreList); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(len, -1); - for (int i = 0; i < len; ++i) { - cout << coreList[i] << " "; - } - cout << endl; } TEST_F(TestMetric, GetClusterIdListOverSize) @@ -85,7 +81,7 @@ TEST_F(TestMetric, GetClusterIdListOverSize) unsigned clusterId = 33; unsigned* coreList = nullptr; int len = PmuGetClusterCore(clusterId, &coreList); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_EQ(len, -1); } @@ -94,12 +90,8 @@ TEST_F(TestMetric, GetNumaIdList) unsigned numaId = 2; unsigned* coreList = nullptr; int len = PmuGetNumaCore(numaId, &coreList); - cout << Perror() << endl; + ASSERT_EQ(Perrorno(), SUCCESS); ASSERT_NE(len, -1); - for (int i = 0; i < len; ++i) { - cout << coreList[i] << " "; - } - cout << endl; } TEST_F(TestMetric, CollectDDRBandwidth) @@ -108,7 +100,6 @@ TEST_F(TestMetric, CollectDDRBandwidth) devAttr[0].metric = PMU_DDR_READ_BW; devAttr[1].metric = PMU_DDR_WRITE_BW; int pd = PmuDeviceOpen(devAttr, 2); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -133,7 +124,6 @@ TEST_F(TestMetric, CollectL3Latency) PmuDeviceAttr devAttr = {}; devAttr.metric = PMU_L3_LAT; int pd = PmuDeviceOpen(&devAttr, 1); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -212,7 +202,6 @@ TEST_F(TestMetric, CollectL3LatencyAndL3Miss) devAttr[1].metric = PMU_L3_MISS; int pd = PmuDeviceOpen(devAttr, 2); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -248,7 +237,6 @@ TEST_F(TestMetric, GetMetricPcieBandwidth) } int pd = PmuDeviceOpen(devAttr, bdfLen); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); @@ -276,7 +264,6 @@ TEST_F(TestMetric, GetMetricSmmuTransaction) const char** bdfList = nullptr; unsigned bdfLen = 0; bdfList = PmuDeviceBdfList(PMU_BDF_TYPE_SMMU, &bdfLen); - cout << Perror() << endl; ASSERT_NE(bdfList, nullptr); PmuDeviceAttr devAttr[bdfLen] = {}; for (int i = 0; i < bdfLen; ++i) { @@ -285,7 +272,6 @@ TEST_F(TestMetric, GetMetricSmmuTransaction) } int pd = PmuDeviceOpen(devAttr, bdfLen); - cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); diff --git a/test/test_perf/test_trace_analysis.cpp b/test/test_perf/test_trace_analysis.cpp index 7062d69..5abec52 100644 --- a/test/test_perf/test_trace_analysis.cpp +++ b/test/test_perf/test_trace_analysis.cpp @@ -93,11 +93,6 @@ TEST_F(TestAnaylzeData, collect_single_trace_data_success) { EnableTracePointer(pd, 1); int len = PmuTraceRead(pd, &data); EXPECT_TRUE(data != nullptr); - for (int i = 0; i < len; i++) { - cout << "funcName: " << data[i].funcs << " startTs: " << data[i].startTs << " elapsedTime: " << data[i].elapsedTime - << " pid: " << data[i].pid << " tid: " << data[i].tid << " cpu: " << data[i].cpu - << " comm: " << data[i].comm << endl; - } } /** @@ -118,7 +113,7 @@ TEST_F(TestAnaylzeData, collect_sleep_trace_data_success) { ASSERT_NE(pd, -1); EnableTracePointer(pd, 1); int len = PmuTraceRead(pd, &data); - EXPECT_TRUE(data != nullptr); + ASSERT_TRUE(data != nullptr); ASSERT_LT(data[0].elapsedTime, 0.1); } diff --git a/test/test_perf/test_trace_pointer.cpp b/test/test_perf/test_trace_pointer.cpp index 6bdb60d..430f394 100644 --- a/test/test_perf/test_trace_pointer.cpp +++ b/test/test_perf/test_trace_pointer.cpp @@ -142,8 +142,6 @@ TEST_F(TestTraceRaw, trace_pointer_net_napi) { bool l4_hash; rt = PmuGetField(rawData, "l4_hash", &l4_hash, sizeof(l4_hash)); ASSERT_EQ(rt, SUCCESS); - printf("name=%s napi_id=%d queue_mapping=%hd ip_summed=%02X l4_hash=%d ", name, napi_id, queue_mapping, - ip_summed, l4_hash); } } @@ -163,7 +161,6 @@ TEST_F(TestTraceRaw, trace_pointer_skb_copy_datagram_iovec) { unsigned int len; rt = PmuGetField(rawData, "len", &len, sizeof(len)); ASSERT_EQ(rt, SUCCESS); - printf("skbaddr=%p len=%d", skbaddr, len); } } -- Gitee From 29439ee8429c9b35ed9680ade0fd417c2d9918e4 Mon Sep 17 00:00:00 2001 From: glx Date: Fri, 20 Jun 2025 11:44:14 +0800 Subject: [PATCH 42/48] Fix doc --- docs/Details_Usage.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 7fc3ef5..51560ac 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -769,13 +769,6 @@ func main() { kperf.PmuClose(pd) } ``` - -``` - -上述代码把前四个事件设定为一个分组,groupId都设定为1,最后一个事件不分组,groupId设定为-1。 -事件数组attr.evtList和事件属性数组attr.evtAttr必须一一对应,即长度必须一致。 -或者attr.evtAttr也可以是空指针,那么所有事件都不分组。 - 事件分组的效果可以从PmuData.countPercent来体现。PmuData.countPercent表示事件实际采集时间除以事件期望采集时间。 对于同一组的事件,他们的countPercent是相同的。如果一个组的事件过多,超过了硬件计数器的数目,那么这个组的所有事件都不会被采集,countPercent会等于-1. -- Gitee From dbc6fb94cd340c8558eee7ef5bfce36b30ff0631 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Mon, 23 Jun 2025 17:14:24 +0800 Subject: [PATCH 43/48] =?UTF-8?q?=E7=BC=96=E7=A0=81=E9=97=AE=E9=A2=98?= =?UTF-8?q?=E4=BF=AE=E6=94=B9,=E6=8C=87=E9=92=88=E5=88=A0=E9=99=A4?= =?UTF-8?q?=E5=90=8E=E6=9C=AA=E8=AE=BE=E7=BD=AE=E6=88=90nullptr,=E5=A4=9A?= =?UTF-8?q?=E4=BD=99=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/sampler.cpp | 9 ++++++--- pmu/sampler.h | 2 +- pmu/spe.cpp | 9 +++++++++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index aa23978..cb78a80 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -129,7 +129,7 @@ int KUNPENG_PMU::PerfSampler::Close() return SUCCESS; } -void KUNPENG_PMU::PerfSampler::UpdatePidInfo(const pid_t &pid, const int &tid) +void KUNPENG_PMU::PerfSampler::UpdatePidInfo(const int &tid) { auto findProc = procMap.find(tid); if (findProc == procMap.end()) { @@ -149,6 +149,9 @@ void KUNPENG_PMU::PerfSampler::UpdateCommInfo(KUNPENG_PMU::PerfEvent *event) procTopo->tid = event->comm.tid; procTopo->pid = event->comm.pid; procTopo->comm = static_cast(malloc(strlen(event->comm.comm) + 1)); + if (procTopo->comm == nullptr) { + return; + } strcpy(procTopo->comm, event->comm.comm); DBG_PRINT("Add to proc map: %d\n", event->comm.tid); procMap[event->comm.tid] = procTopo; @@ -283,7 +286,7 @@ void KUNPENG_PMU::PerfSampler::ReadRingBuffer(vector &data, vectorfork.pid, event->fork.tid); - UpdatePidInfo(event->fork.pid, event->fork.tid); + UpdatePidInfo(event->fork.tid); break; } case PERF_RECORD_COMM: { @@ -310,7 +313,7 @@ void KUNPENG_PMU::PerfSampler::FillComm(const size_t &start, const size_t &end, auto& pmuData = data[i]; auto findProc = procMap.find(pmuData.tid); if (findProc == procMap.end()) { - UpdatePidInfo(pmuData.pid, pmuData.tid); + UpdatePidInfo(pmuData.tid); findProc = procMap.find(pmuData.tid); if (findProc == procMap.end()) { continue; diff --git a/pmu/sampler.h b/pmu/sampler.h index 41fcdff..6c49a74 100644 --- a/pmu/sampler.h +++ b/pmu/sampler.h @@ -59,7 +59,7 @@ namespace KUNPENG_PMU { void ReadRingBuffer(std::vector &data, std::vector &sampleIps, std::vector &extPool, std::vector &switchData); void FillComm(const size_t &start, const size_t &end, std::vector &data); - void UpdatePidInfo(const pid_t &pid, const int &tid); + void UpdatePidInfo(const int &tid); void UpdateCommInfo(KUNPENG_PMU::PerfEvent *event); void ParseSwitch(KUNPENG_PMU::PerfEvent *event, struct PmuSwitchData *switchCurData); void ParseBranchSampleData(struct PmuData *pmuData, PerfRawSample *sample, union PerfEvent *event, std::vector &extPool); diff --git a/pmu/spe.cpp b/pmu/spe.cpp index 2bc42c4..d87587b 100644 --- a/pmu/spe.cpp +++ b/pmu/spe.cpp @@ -132,14 +132,17 @@ static void CoreSpeClose(struct SpeCoreContext *ctx, struct SpeContext *speCtx) { if (ctx->speMpage && ctx->speMpage != MAP_FAILED) { munmap(ctx->speMpage, speCtx->speMmapSize); + ctx->speMpage = nullptr; } if (ctx->auxMpage && ctx->auxMpage != MAP_FAILED) { munmap(ctx->auxMpage, speCtx->auxMmapSize); + ctx->auxMpage = nullptr; } if (ctx->dummyMpage && ctx->dummyMpage != MAP_FAILED) { munmap(ctx->dummyMpage, speCtx->dummyMmapSize); + ctx->dummyMpage = nullptr; } if (ctx->speFd > 0) { @@ -211,6 +214,7 @@ int SpeOpen(PmuEvt *attr, int cpu, SpeContext *ctx) if (attr->type == -1) { free(ctx); + ctx = nullptr; return LIBPERF_ERR_SPE_UNAVAIL; } @@ -225,6 +229,7 @@ int SpeOpen(PmuEvt *attr, int cpu, SpeContext *ctx) ctx->coreCtxes = (struct SpeCoreContext *)malloc(sizeof(struct SpeCoreContext)); if (!ctx->coreCtxes) { free(ctx); + ctx = nullptr; return COMMON_ERR_NOMEM; } ctx->coreCtxes->mask = ctx->auxMmapSize - 1; @@ -233,7 +238,9 @@ int SpeOpen(PmuEvt *attr, int cpu, SpeContext *ctx) auto err = CoreSpeOpen(&ctx->coreCtxes, ctx, attr, cpu); if (err != 0) { free(ctx->coreCtxes); + ctx->coreCtxes = nullptr; free(ctx); + ctx = nullptr; return err; } return SUCCESS; @@ -304,7 +311,9 @@ void SpeClose(struct SpeContext *ctx) } free(ctx->coreCtxes); + ctx->coreCtxes = nullptr; free(ctx); + ctx = nullptr; return; } -- Gitee From 101dcc2613fff5298ca2edb8112120294b0ef094 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Mon, 23 Jun 2025 19:55:16 +0800 Subject: [PATCH 44/48] =?UTF-8?q?=E5=8E=BB=E9=99=A4=E5=86=97=E4=BD=99?= =?UTF-8?q?=E5=8F=82=E6=95=B0=E5=92=8C=E9=99=A4=E9=9B=B6=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- util/process_map.cpp | 51 -------------------------------------------- util/process_map.h | 2 -- 2 files changed, 53 deletions(-) diff --git a/util/process_map.cpp b/util/process_map.cpp index 89938b5..0745b51 100644 --- a/util/process_map.cpp +++ b/util/process_map.cpp @@ -24,58 +24,7 @@ #include "process_map.h" using namespace std; -constexpr int COMM_SIZE = 128; constexpr int PATH_LEN = 1024; -unsigned int GetNumPid() -{ - DIR *directory = opendir("/proc"); - struct dirent *entry; - unsigned int count = 0; - if (directory == nullptr) { - perror("Error opening /proc directory"); - return -1; - } - - // Count the number of process directories (pidList) - while ((entry = readdir(directory))) { - // Check if the entry is a directory and represents a process ID - if (entry->d_type == DT_DIR && atoi(entry->d_name) != 0) { - count++; - } - } - closedir(directory); - return count; -} - -int *GetAllPids(unsigned int *count) -{ - DIR *directory; - struct dirent *entry; - int *pidList = nullptr; - directory = opendir("/proc"); - - *count = GetNumPid(); - - // Allocate memory for storing pidList - if ((*count) < SIZE_MAX / sizeof(int)) { - pidList = static_cast(malloc((*count) * sizeof(int))); - } - if (pidList == nullptr) { - perror("Memory allocation error"); - closedir(directory); - return nullptr; - } - - int index = 0; - while ((entry = readdir(directory))) { - if (entry->d_type == DT_DIR && atoi(entry->d_name) != 0) { - pidList[index++] = atoi(entry->d_name); - } - } - - closedir(directory); - return pidList; -} void FreeProcTopo(struct ProcTopology *procTopo) { diff --git a/util/process_map.h b/util/process_map.h index 15f0b09..e401fbe 100644 --- a/util/process_map.h +++ b/util/process_map.h @@ -22,8 +22,6 @@ extern "C" { struct ProcTopology* GetProcTopology(pid_t pid); void FreeProcTopo(struct ProcTopology *procTopo); -int* GetAllPids(int* count); -unsigned int GetNumPid(); int* GetChildTid(int pid, int* numChild); #ifdef __cplusplus } -- Gitee From 49b8a4a83463047938a731a41a33815d21f980c4 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 23 Jun 2025 20:31:28 +0800 Subject: [PATCH 45/48] Revise code error: add delete, popen check and extra parameters --- pmu/pfm/pfm.cpp | 1 + pmu/pfm/uncore.cpp | 2 ++ pmu/pmu_metric.cpp | 4 ++-- pmu/spe.cpp | 8 ++++---- symbol/CMakeLists.txt | 2 +- symbol/symbol_resolve.cpp | 8 ++++++++ 6 files changed, 18 insertions(+), 7 deletions(-) diff --git a/pmu/pfm/pfm.cpp b/pmu/pfm/pfm.cpp index e516dd5..aae863b 100644 --- a/pmu/pfm/pfm.cpp +++ b/pmu/pfm/pfm.cpp @@ -173,6 +173,7 @@ struct PmuEvt* PfmGetSpeEvent( evt->collectType = collectType; int type = GetSpeType(); if (type == -1) { + delete evt; return nullptr; } evt->type = static_cast(type); diff --git a/pmu/pfm/uncore.cpp b/pmu/pfm/uncore.cpp index 6ae72cc..e73eb30 100644 --- a/pmu/pfm/uncore.cpp +++ b/pmu/pfm/uncore.cpp @@ -391,6 +391,7 @@ struct PmuEvt* GetUncoreEvent(const char* pmuName, int collectType) // Fill fields for uncore devices. auto err = FillUncoreFields(pmuName, pmuEvtPtr); if (err != SUCCESS) { + delete pmuEvtPtr; return nullptr; } return pmuEvtPtr; @@ -414,6 +415,7 @@ struct PmuEvt* GetUncoreRawEvent(const char* pmuName, int collectType) // Fill fields for uncore devices. auto err = FillUncoreFields(pmuName, pmuEvtPtr); if (err != SUCCESS) { + delete pmuEvtPtr; return nullptr; } return pmuEvtPtr; diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 56cf758..be581c1 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -1154,7 +1154,7 @@ namespace KUNPENG_PMU { return ddrcIndex; } - static bool getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) + static bool getChannelId(const char *evt, unsigned &channelId) { string devName; string evtName; @@ -1196,7 +1196,7 @@ namespace KUNPENG_PMU { unordered_map, PmuDeviceData, channelKeyHash> devDataByChannel; //Key: socketId, channelId, ddrNumaId for (auto &data : rawData) { unsigned channelId; - if (!getChannelId(data.evtName, data.ddrNumaId, channelId)) { + if (!getChannelId(data.evtName, channelId)) { continue; } auto ddrDatakey = make_tuple(data.socketId, channelId, data.ddrNumaId); diff --git a/pmu/spe.cpp b/pmu/spe.cpp index d87587b..a8cc7d9 100644 --- a/pmu/spe.cpp +++ b/pmu/spe.cpp @@ -487,7 +487,7 @@ static struct SpeRecord *CoreAuxData(struct SpeCoreContext *ctx, AuxContext *aux return bufEnd; } -static size_t ComputeAuxSize(size_t auxMapLen, size_t headOff, size_t oldOff, int pageSize) +static size_t ComputeAuxSize(size_t auxMapLen, size_t headOff, size_t oldOff) { // Compute current aux buffer size by current offset and previous offset. size_t size = 0; @@ -502,7 +502,7 @@ static size_t ComputeAuxSize(size_t auxMapLen, size_t headOff, size_t oldOff, in } static struct SpeRecord *CoreSpeData(struct SpeCoreContext *ctx, struct ContextSwitchData *dummyData, - struct SpeRecord *buf, int *remainSize, int pageSize, int cpu) + struct SpeRecord *buf, int *remainSize, int cpu) { int dummyIdx = 1; struct perf_event_mmap_page *mpage = (struct perf_event_mmap_page *)ctx->speMpage; @@ -514,7 +514,7 @@ static struct SpeRecord *CoreSpeData(struct SpeCoreContext *ctx, struct ContextS } size_t headOff = head & ctx->mask; size_t oldOff = old & ctx->mask; - size_t size = ComputeAuxSize(mpage->aux_size, headOff, oldOff, pageSize); + size_t size = ComputeAuxSize(mpage->aux_size, headOff, oldOff); size_t auxOffset = 0; struct SpeRecord *bufEnd = nullptr; @@ -554,7 +554,7 @@ int Spe::SpeReadData(struct SpeContext *context, struct SpeRecord *buf, int size int remainSize = size; int dummySize = context->dummyMmapSize; CoreDummyData(context->coreCtxes, dummyData, dummySize, context->pageSize); - CoreSpeData(context->coreCtxes, dummyData, buf, &remainSize, context->pageSize, cpu); + CoreSpeData(context->coreCtxes, dummyData, buf, &remainSize, cpu); return size - remainSize; } diff --git a/symbol/CMakeLists.txt b/symbol/CMakeLists.txt index aaa8988..33a14d2 100644 --- a/symbol/CMakeLists.txt +++ b/symbol/CMakeLists.txt @@ -8,7 +8,7 @@ set(SYMBOL_FILE_DIR ${PROJECT_TOP_DIR}/symbol) set(INCLUDE_DIR ${PROJECT_TOP_DIR}/include) set(UTIL_FILE_DIR ${PROJECT_TOP_DIR}/util) -file(GLOB SYMBOL_SRC ${SYMBOL_FILE_DIR}/*c ${SYMBOL_FILE_DIR}/*cpp ${UTIL_FILE_DIR}/pcerr.cpp) +file(GLOB SYMBOL_SRC ${SYMBOL_FILE_DIR}/*c ${SYMBOL_FILE_DIR}/*cpp ${UTIL_FILE_DIR}/pcerr.cpp ${UTIL_FILE_DIR}/common.cpp) include_directories(${UTIL_FILE_DIR}) include_directories(${SYMBOL_FILE_DIR}) diff --git a/symbol/symbol_resolve.cpp b/symbol/symbol_resolve.cpp index 2f971f4..cc2d641 100644 --- a/symbol/symbol_resolve.cpp +++ b/symbol/symbol_resolve.cpp @@ -25,6 +25,7 @@ #include "name_resolve.h" #include "pcerr.h" #include "symbol_resolve.h" +#include "common.h" using namespace KUNPENG_SYM; constexpr __u64 MAX_LINE_LENGTH = 1024; @@ -1137,6 +1138,12 @@ struct StackAsm* SymbolResolve::MapAsmCodeStack( { char startAddrStr[ADDR_LEN]; char endAddrStr[ADDR_LEN]; + + if (!ExistPath(moduleName)) { + pcerr::New(LIBSYM_ERR_FILE_INVALID, "file does not exist"); + return nullptr; + } + if (startAddr >= endAddr) { pcerr::New(LIBSYM_ERR_START_SMALLER_END, "libysm the end address must be greater than the start address"); return nullptr; @@ -1150,6 +1157,7 @@ struct StackAsm* SymbolResolve::MapAsmCodeStack( pcerr::New(LIBSYM_ERR_SNPRINF_OPERATE_FAILED, "libsym fails to execute snprintf"); return nullptr; } + std::string cmd = "objdump -Fld " + moduleName + " --start-address=" + std::string{startAddrStr} + " --stop-address=" + std::string{endAddrStr}; FILE* pipe = popen(cmd.c_str(), "r"); -- Gitee From 13673954d950803fe2ae97d70413d2a2a9becccb Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 30 Jun 2025 10:55:51 +0800 Subject: [PATCH 46/48] =?UTF-8?q?=E9=81=BF=E5=85=8D=E5=A4=9A=E7=BA=BF?= =?UTF-8?q?=E7=A8=8B=E5=A4=84=E7=90=86=E4=B8=8B=E5=A4=9A=E6=AC=A1=E9=87=8D?= =?UTF-8?q?=E5=A4=8Dclose=E5=90=8C=E4=B8=80=E4=B8=AAfd=E5=AF=BC=E8=87=B4?= =?UTF-8?q?=E5=BC=82=E5=B8=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- symbol/symbol_resolve.cpp | 6 +----- third_party/elfin-parser/elf/mmap_loader.cc | 10 ++++++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/symbol/symbol_resolve.cpp b/symbol/symbol_resolve.cpp index cc2d641..b9f264d 100644 --- a/symbol/symbol_resolve.cpp +++ b/symbol/symbol_resolve.cpp @@ -672,13 +672,11 @@ int SymbolResolve::RecordElf(const char* fileName) } this->elfMap.emplace(file, myElf); } catch (std::exception& error) { - close(fd); pcerr::New(LIBSYM_ERR_ELFIN_FOMAT_FAILED, "libsym record elf format error: " + std::string{error.what()}); elfSafeHandler.releaseLock(file); return LIBSYM_ERR_ELFIN_FOMAT_FAILED; } - - close(fd); + pcerr::New(0, "success"); elfSafeHandler.releaseLock(file); return 0; @@ -720,14 +718,12 @@ int SymbolResolve::RecordDwarf(const char* fileName) efLoader.reset(); } catch (std::exception& error) { - close(fd); dwarfSafeHandler.releaseLock((file)); pcerr::New(LIBSYM_ERR_DWARF_FORMAT_FAILED, "libsym record dwarf file named " + file + " format error: " + std::string{error.what()}); return LIBSYM_ERR_DWARF_FORMAT_FAILED; } - close(fd); pcerr::New(0, "success"); dwarfSafeHandler.releaseLock((file)); return 0; diff --git a/third_party/elfin-parser/elf/mmap_loader.cc b/third_party/elfin-parser/elf/mmap_loader.cc index 875d7bd..da73fb2 100644 --- a/third_party/elfin-parser/elf/mmap_loader.cc +++ b/third_party/elfin-parser/elf/mmap_loader.cc @@ -26,15 +26,21 @@ public: mmap_loader(int fd) { off_t end = lseek(fd, 0, SEEK_END); - if (end == (off_t)-1) + if (end == (off_t)-1) { + close(fd); throw system_error(errno, system_category(), "finding file length"); + } + lim = end; base = mmap(nullptr, lim, PROT_READ, MAP_SHARED, fd, 0); - if (base == MAP_FAILED) + if (base == MAP_FAILED) { + close(fd); throw system_error(errno, system_category(), "mmap'ing file"); + } + close(fd); } -- Gitee From fbde40ecc7a69cef2ae65511a7423cf552996b6a Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Wed, 28 May 2025 15:01:46 +0800 Subject: [PATCH 47/48] =?UTF-8?q?PMU=5FL3=5FLAT=E7=9A=84=E7=BB=93=E6=9E=9C?= =?UTF-8?q?=E4=BD=BF=E7=94=A8ns=E4=BB=A3=E6=9B=BFcycles?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Details_Usage.md | 14 +++++++------- include/pmu.h | 2 +- pmu/pmu_metric.cpp | 16 +++++++++++++++- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 51560ac..3fe61ee 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -931,7 +931,7 @@ auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 1, &devData); // devData的长度等于cluster个数 for (int i=0;i L3_CLOCK_NS {{CHIP_TYPE::HIPB, 0.3448275862}}; + const UNCORE_METRIC_MAP UNCORE_METRIC_CONFIG_MAP = { {CHIP_TYPE::HIPA, HIP_A_UNCORE_METRIC_MAP}, {CHIP_TYPE::HIPB, HIP_B_UNCORE_METRIC_MAP}, @@ -958,6 +960,17 @@ namespace KUNPENG_PMU { return 64 * rawCount; } + static uint64_t L3Lat(const uint64_t rawCount) + { + const CHIP_TYPE chipType = GetCpuType(); + auto iter = L3_CLOCK_NS.find(chipType); + uint64_t count = rawCount; + if (iter != L3_CLOCK_NS.end()) { + count = rawCount * iter->second; + } + return count; + } + static PmuMetricMode GetMetricMode(const PmuDeviceMetric &metric) { switch(metric) { @@ -1308,7 +1321,8 @@ namespace KUNPENG_PMU { map computeMetricMap = {{PMU_DDR_READ_BW, DDRBw}, {PMU_DDR_WRITE_BW, DDRBw}, - {PMU_L3_TRAFFIC, L3Bw}}; + {PMU_L3_TRAFFIC, L3Bw}, + {PMU_L3_LAT, L3Lat}}; map aggregateMap = { {PMU_DDR_READ_BW, AggregateByChannel}, {PMU_DDR_WRITE_BW, AggregateByChannel}, -- Gitee From 32c477b53a68c26ffca95e4e6d0cb3bc2a90beb6 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Wed, 2 Jul 2025 10:04:20 +0800 Subject: [PATCH 48/48] =?UTF-8?q?PMU=5FL3=5FLAT=E7=9A=84=E7=BB=93=E6=9E=9C?= =?UTF-8?q?=E4=BD=BF=E7=94=A8ns=E4=BB=A3=E6=9B=BFcycles=E6=B3=A8=E9=87=8A?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Details_Usage.md | 2 +- pmu/pmu_metric.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 3fe61ee..d2bfa43 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -930,7 +930,7 @@ PmuDeviceData *devData = nullptr; auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 1, &devData); // devData的长度等于cluster个数 for (int i=0;i