From d1eaa231d1d7551c697713480d97418977f40997 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 11 Apr 2025 16:47:33 +0800 Subject: [PATCH 01/35] =?UTF-8?q?=E9=80=82=E9=85=8Dx86,=E4=BB=85=E6=94=AF?= =?UTF-8?q?=E6=8C=81count=E5=92=8Csampling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/pcerrc.h | 1 + pmu/evt.cpp | 7 ++ pmu/pfm/core.cpp | 139 ++++++++++++++++++++++++++++++++- pmu/pfm/pfm.cpp | 21 ++--- pmu/pfm/pfm_name.cpp | 9 +++ pmu/pfm/pfm_name.h | 9 +++ pmu/pfm/trace.cpp | 1 - pmu/pfm/uncore.cpp | 57 ++++++++++++-- pmu/pmu.cpp | 32 ++++++-- pmu/pmu_event.h | 2 +- pmu/pmu_event_list.cpp | 15 ++++ pmu/pmu_metric.cpp | 20 +++++ pmu/pmu_trace_analysis.cpp | 16 ++++ pmu/sample_process.cpp | 11 +++ pmu/sampler.cpp | 6 ++ pmu/spe.h | 6 ++ python/modules/kperf/perror.py | 1 + util/common.h | 8 ++ util/cpu_map.cpp | 4 + util/cpu_map.h | 1 + util/pcerr.cpp | 1 + 21 files changed, 341 insertions(+), 26 deletions(-) diff --git a/include/pcerrc.h b/include/pcerrc.h index 55a2f9e..3a71491 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -106,6 +106,7 @@ extern "C" { #define LIBPERF_ERR_CPUFREQ_NOT_CONFIG 1062 #define LIBPERF_ERR_CLUSTER_ID_OVERSIZE 1063 #define LIBPERF_ERR_INVALID_PMU_BDF_TYPE 1064 +#define LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 1065 #define UNKNOWN_ERROR 9999 diff --git a/pmu/evt.cpp b/pmu/evt.cpp index db27e0c..4d3b934 100644 --- a/pmu/evt.cpp +++ b/pmu/evt.cpp @@ -113,10 +113,17 @@ __u64 KUNPENG_PMU::ReadOnce(__u64 *head) : "memory"); break; case HEAD_SIZE::HEAD_SIZE_EIGHT: +#ifdef IS_X86 + asm volatile("mov %0, %1" + : "=r"(*(__u64 __attribute__((__may_alias__)) *)pointerUnion.charHead) + : "Q"(*head) + : "memory"); +#else asm volatile("ldar %0, %1" : "=r"(*(__u64 __attribute__((__may_alias__)) *)pointerUnion.charHead) : "Q"(*head) : "memory"); +#endif break; default: break; diff --git a/pmu/pfm/core.cpp b/pmu/pfm/core.cpp index 35fd1a7..32cd3a9 100644 --- a/pmu/pfm/core.cpp +++ b/pmu/pfm/core.cpp @@ -255,6 +255,33 @@ namespace HARDWARE_EVENT { KUNPENG_PMU::COMMON::BUS_CYCLES } }; + + PMU_PAIR REF_CYCLES = { + KUNPENG_PMU::COMMON::REF_CYCLES, + { + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_REF_CPU_CYCLES, + KUNPENG_PMU::COMMON::REF_CYCLES + } + }; + + PMU_PAIR BRANCHES = { + KUNPENG_PMU::COMMON::BRANCHES, + { + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + KUNPENG_PMU::COMMON::BRANCHES + } + }; + + PMU_PAIR BRANCH_INSTRUCTIONS = { + KUNPENG_PMU::COMMON::BRANCH_INSTRUCTIONS, + { + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + KUNPENG_PMU::COMMON::BRANCH_INSTRUCTIONS + } + }; } // namespace hardware event namespace HW_CACHE_EVENT { @@ -312,6 +339,26 @@ namespace HW_CACHE_EVENT { } }; + + PMU_PAIR LLC_STORE_MISSES = { + KUNPENG_PMU::COMMON::LLC_STORE_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10102, + KUNPENG_PMU::COMMON::LLC_STORE_MISSES + } + }; + + PMU_PAIR LLC_STORES = { + KUNPENG_PMU::COMMON::LLC_STORES, + { + PERF_TYPE_HW_CACHE, + 0x102, + KUNPENG_PMU::COMMON::LLC_STORES + } + }; + + PMU_PAIR BRANCH_LOAD_MISSES = { KUNPENG_PMU::COMMON::BRANCH_LOAD_MISSES, { @@ -366,6 +413,42 @@ namespace HW_CACHE_EVENT { } }; + PMU_PAIR NODE_LOAD_MISSES = { + KUNPENG_PMU::COMMON::NODE_LOAD_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10006, + KUNPENG_PMU::COMMON::NODE_LOAD_MISSES + } + }; + + PMU_PAIR NODE_LOADS = { + KUNPENG_PMU::COMMON::NODE_LOADS, + { + PERF_TYPE_HW_CACHE, + 0x6, + KUNPENG_PMU::COMMON::NODE_LOADS + } + }; + + PMU_PAIR NODE_STORE_MISSES = { + KUNPENG_PMU::COMMON::NODE_STORE_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10106, + KUNPENG_PMU::COMMON::NODE_STORE_MISSES + } + }; + + PMU_PAIR NODE_STORES = { + KUNPENG_PMU::COMMON::NODE_STORES, + { + PERF_TYPE_HW_CACHE, + 0x106, + KUNPENG_PMU::COMMON::NODE_STORES + } + }; + PMU_PAIR L1_DCACHE_STORE_MISSES = { KUNPENG_PMU::COMMON::L1_DCACHE_STORE_MISSES, { @@ -872,12 +955,58 @@ const std::unordered_map HIP_E_CORE_PMU_MA SOFTWARE_EVENT::TASK_CLOCK, }; +const std::unordered_map HIP_X86_CORE_PMU_MAP{ + HARDWARE_EVENT::BRANCH_MISSES, + HARDWARE_EVENT::CACHE_MISSES, + HARDWARE_EVENT::CACHE_REFERENCES, + HARDWARE_EVENT::CPU_CYCLES, + HARDWARE_EVENT::CYCLES, + HARDWARE_EVENT::INSTRUCTIONS, + HARDWARE_EVENT::BUS_CYCLES, + HARDWARE_EVENT::REF_CYCLES, + HARDWARE_EVENT::BRANCH_INSTRUCTIONS, + HARDWARE_EVENT::BRANCHES, + + SOFTWARE_EVENT::ALIGNMENT_FAULTS, + SOFTWARE_EVENT::BPF_OUTPUT, + SOFTWARE_EVENT::CONTEXT_SWITCHES, + SOFTWARE_EVENT::CS, + SOFTWARE_EVENT::CPU_CLOCK, + SOFTWARE_EVENT::CPU_MIGRATIONS, + SOFTWARE_EVENT::MIGRATIONS, + SOFTWARE_EVENT::DUMMY, + SOFTWARE_EVENT::EMULATION_FAULTS, + SOFTWARE_EVENT::MAJOR_FAULTS, + SOFTWARE_EVENT::MINOR_FAULTS, + SOFTWARE_EVENT::PAGE_FAULTS, + SOFTWARE_EVENT::FAULTS, + SOFTWARE_EVENT::TASK_CLOCK, + + HW_CACHE_EVENT::L1_DCACHE_LOAD_MISSES, + HW_CACHE_EVENT::L1_DCACHE_LOADS, + HW_CACHE_EVENT::L1_ICACHE_LOAD_MISSES, + HW_CACHE_EVENT::L1_ICACHE_LOADS, + HW_CACHE_EVENT::LLC_LOAD_MISSES, + HW_CACHE_EVENT::LLC_LOADS, + HW_CACHE_EVENT::LLC_STORE_MISSES, + HW_CACHE_EVENT::LLC_STORES, + HW_CACHE_EVENT::BRANCH_LOAD_MISSES, + HW_CACHE_EVENT::BRANCH_LOADS, + HW_CACHE_EVENT::DTLB_LOAD_MISSES, + HW_CACHE_EVENT::DTLB_LOADS, + HW_CACHE_EVENT::NODE_LOAD_MISSES, + HW_CACHE_EVENT::NODE_LOADS, + HW_CACHE_EVENT::NODE_STORE_MISSES, + HW_CACHE_EVENT::NODE_STORES, +}; + const KUNPENG_PMU::CORE_EVT_MAP KUNPENG_PMU::CORE_EVENT_MAP = { {CHIP_TYPE::HIPA, HIP_A_CORE_PMU_MAP}, {CHIP_TYPE::HIPB, HIP_B_CORE_PMU_MAP}, {CHIP_TYPE::HIPC, HIP_C_CORE_PMU_MAP}, {CHIP_TYPE::HIPF, HIP_F_CORE_PMU_MAP}, {CHIP_TYPE::HIPE, HIP_E_CORE_PMU_MAP}, + {CHIP_TYPE::HIPX86, HIP_X86_CORE_PMU_MAP}, }; static struct PmuEvt* ConstructPmuEvtFromCore(KUNPENG_PMU::CoreConfig config, int collectType) @@ -888,7 +1017,6 @@ static struct PmuEvt* ConstructPmuEvtFromCore(KUNPENG_PMU::CoreConfig config, in pmuEvtPtr->type = config.type; pmuEvtPtr->pmuType = KUNPENG_PMU::CORE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } @@ -951,7 +1079,6 @@ static struct PmuEvt* ConstructPmuEvtFromKernel(const char* pmuName, int collect pmuEvtPtr->type = type; pmuEvtPtr->pmuType = KUNPENG_PMU::CORE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } @@ -981,6 +1108,13 @@ std::string GetPmuDevicePath() } struct dirent *dent; while (dent = readdir(dir)) { +#ifdef IS_X86 + // look for devices like /sys/bus/event_source/devices/cpu/events + if (strcmp(dent->d_name, "cpu") == 0) { + pmuDevice = DEVICE_PATH + dent->d_name; + break; + } +#else if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..") || !strcmp(dent->d_name, "cpu")) { continue; } @@ -992,6 +1126,7 @@ std::string GetPmuDevicePath() pmuDevice = DEVICE_PATH + dent->d_name; break; } +#endif } closedir(dir); return pmuDevice; diff --git a/pmu/pfm/pfm.cpp b/pmu/pfm/pfm.cpp index 7d4aa7f..e516dd5 100644 --- a/pmu/pfm/pfm.cpp +++ b/pmu/pfm/pfm.cpp @@ -54,7 +54,6 @@ static struct PmuEvt* GetRawEvent(const char* pmuName, int collectType) pmuEvtPtr->type = PERF_TYPE_RAW; pmuEvtPtr->pmuType = CORE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } @@ -120,26 +119,31 @@ static bool CheckRawEvent(const char *pmuName) static int GetEventType(const char *pmuName) { - if (pmuName[0] == 'r' && CheckRawEvent(pmuName)) { - return RAW_TYPE; - } if (CheckEventInList(CORE_EVENT, pmuName)) { return CORE_TYPE; } - std::string strName(pmuName); - // Kernel trace point event name like 'block:block_bio_complete' - if (CheckEventInList(TRACE_EVENT, pmuName)) { - return TRACE_TYPE; + + if (pmuName[0] == 'r' && CheckRawEvent(pmuName)) { + return RAW_TYPE; } + std::string strName(pmuName); // Parse uncore event name like 'hisi_sccl3_ddrc0/flux_rd/' if (CheckEventInList(UNCORE_EVENT, pmuName)) { return UNCORE_TYPE; } +#ifdef IS_X86 + return -1; +#else + // Kernel trace point event name like 'block:block_bio_complete' + if (CheckEventInList(TRACE_EVENT, pmuName)) { + return TRACE_TYPE; + } // Parse uncore event raw name like 'hisi_sccl3_ddrc0/config=0x0/' // or smmuv3_pmcg_100020/transaction,filter_enable=1,filter_stream_id=0x7d/ if (CheckUncoreRawEvent(pmuName)) { return UNCORE_RAW_TYPE; } +#endif return -1; } @@ -175,7 +179,6 @@ struct PmuEvt* PfmGetSpeEvent( evt->config = dataFilter; evt->config1 = eventFilter; evt->config2 = minLatency; - evt->cpumask = -1; return evt; } diff --git a/pmu/pfm/pfm_name.cpp b/pmu/pfm/pfm_name.cpp index 0ef9a90..7baa0f6 100644 --- a/pmu/pfm/pfm_name.cpp +++ b/pmu/pfm/pfm_name.cpp @@ -24,6 +24,9 @@ const char* KUNPENG_PMU::COMMON::BRANCH_MISSES = "branch-misses"; const char* KUNPENG_PMU::COMMON::BUS_CYCLES = "bus-cycles"; const char* KUNPENG_PMU::COMMON::CACHE_MISSES = "cache-misses"; +const char* KUNPENG_PMU::COMMON::REF_CYCLES = "ref-cycles"; +const char* KUNPENG_PMU::COMMON::BRANCHES = "branches"; +const char* KUNPENG_PMU::COMMON::BRANCH_INSTRUCTIONS = "branch-instructions"; const char* KUNPENG_PMU::COMMON::CACHE_REFERENCES = "cache-references"; const char* KUNPENG_PMU::COMMON::CPU_CYCLES = "cpu-cycles"; const char* KUNPENG_PMU::COMMON::CYCLES = "cycles"; @@ -40,12 +43,18 @@ const char* KUNPENG_PMU::COMMON::IDLE_CYCLES_FRONTEND = "idle-cycles-frontend"; const char* KUNPENG_PMU::COMMON::L1_ICACHE_LOADS = "L1-icache-loads"; const char* KUNPENG_PMU::COMMON::LLC_LOAD_MISSES = "LLC-load-misses"; const char* KUNPENG_PMU::COMMON::LLC_LOADS = "LLC-loads"; +const char* KUNPENG_PMU::COMMON::LLC_STORE_MISSES = "LLC-store-misses"; +const char* KUNPENG_PMU::COMMON::LLC_STORES = "LLC-stores"; const char* KUNPENG_PMU::COMMON::BRANCH_LOAD_MISSES = "branch-load-misses"; const char* KUNPENG_PMU::COMMON::BRANCH_LOADS = "branch-loads"; const char* KUNPENG_PMU::COMMON::DTLB_LOAD_MISSES = "dTLB-load-misses"; const char* KUNPENG_PMU::COMMON::DTLB_LOADS = "dTLB-loads"; const char* KUNPENG_PMU::COMMON::ITLB_LOAD_MISSES = "iTLB-load-misses"; const char* KUNPENG_PMU::COMMON::ITLB_LOADS = "iTLB-loads"; +const char* KUNPENG_PMU::COMMON::NODE_LOAD_MISSES = "node-load-misses"; +const char* KUNPENG_PMU::COMMON::NODE_LOADS = "node-loads"; +const char* KUNPENG_PMU::COMMON::NODE_STORE_MISSES = "node-store-misses"; +const char* KUNPENG_PMU::COMMON::NODE_STORES = "node-stores"; const char* KUNPENG_PMU::COMMON::ALIGNMENT_FAULTS = "alignment-faults"; const char* KUNPENG_PMU::COMMON::BPF_OUTPUT = "bpf-output"; const char* KUNPENG_PMU::COMMON::CONTEXT_SWITCHES = "context-switches"; diff --git a/pmu/pfm/pfm_name.h b/pmu/pfm/pfm_name.h index ec528d5..c5e5905 100644 --- a/pmu/pfm/pfm_name.h +++ b/pmu/pfm/pfm_name.h @@ -25,6 +25,9 @@ extern const char* BRANCH_MISSES; extern const char* BUS_CYCLES; extern const char* CACHE_MISSES; extern const char* CACHE_REFERENCES; +extern const char* REF_CYCLES; +extern const char* BRANCHES; +extern const char* BRANCH_INSTRUCTIONS; extern const char* CPU_CYCLES; extern const char* CYCLES; extern const char* INSTRUCTIONS; @@ -40,12 +43,18 @@ extern const char* IDLE_CYCLES_FRONTEND; extern const char* L1_ICACHE_LOADS; extern const char* LLC_LOAD_MISSES; extern const char* LLC_LOADS; +extern const char* LLC_STORE_MISSES; +extern const char* LLC_STORES; extern const char* BRANCH_LOAD_MISSES; extern const char* BRANCH_LOADS; extern const char* DTLB_LOAD_MISSES; extern const char* DTLB_LOADS; extern const char* ITLB_LOAD_MISSES; extern const char* ITLB_LOADS; +extern const char* NODE_LOAD_MISSES; +extern const char* NODE_LOADS; +extern const char* NODE_STORE_MISSES; +extern const char* NODE_STORES; // Software event extern const char* ALIGNMENT_FAULTS; extern const char* BPF_OUTPUT; diff --git a/pmu/pfm/trace.cpp b/pmu/pfm/trace.cpp index a435635..54c5bec 100644 --- a/pmu/pfm/trace.cpp +++ b/pmu/pfm/trace.cpp @@ -58,6 +58,5 @@ struct PmuEvt* GetKernelTraceEvent(const char* pmuName, int collectType) pmuEvtPtr->type = PERF_TYPE_TRACEPOINT; pmuEvtPtr->pmuType = TRACE_TYPE; pmuEvtPtr->collectType = collectType; - pmuEvtPtr->cpumask = -1; return pmuEvtPtr; } diff --git a/pmu/pfm/uncore.cpp b/pmu/pfm/uncore.cpp index a3e655b..e6fca1a 100644 --- a/pmu/pfm/uncore.cpp +++ b/pmu/pfm/uncore.cpp @@ -44,23 +44,49 @@ static int GetDeviceType(const string &devName) return stoi(typeStr); } -static int GetCpuMask(const string &devName) +static std::vector GetCpuMask(const string &devName) { + std::vector maskList; string maskPath = "/sys/devices/" + devName + "/cpumask"; std::string realPath = GetRealPath(maskPath); if (!IsValidPath(realPath)) { - return -1; + return maskList; } ifstream maskIn(realPath); if (!maskIn.is_open()) { - return -1; + return maskList; } // Cpumask is a comma-separated list of integers, // but now make it simple for ddrc event. - string maskStr; + char maskStr[1024]; maskIn >> maskStr; - return stoi(maskStr); + char *tokStr = strtok(maskStr, ","); + while (tokStr != nullptr) { + if (strstr(tokStr, "-") != nullptr) { + int minCpu, maxCpu; + if (sscanf(tokStr, "%d-%d", &minCpu, &maxCpu) != 2) { + continue; + } + for (int i = minCpu; i <= maxCpu; i++) { + maskList.push_back(i); + } + } else { + int aloneNumber; + if (sscanf(tokStr, "%d", &aloneNumber) == 1) { + maskList.push_back(aloneNumber); + } + } + tokStr = strtok(nullptr, ","); + } + return maskList; +} + +static int64_t TransferStrToHex(const std::string& str) { + int64_t intData; + std::istringstream iss(str); + iss >> std::hex >> intData; + return intData; } static int64_t GetUncoreEventConfig(const char* pmuName) @@ -85,10 +111,25 @@ static int64_t GetUncoreEventConfig(const char* pmuName) if (findEq == string::npos) { return -1; } + +#ifdef IS_X86 + auto umaskEq = configStr.find("umask"); + if (umaskEq != string::npos) { + auto CommaEq = configStr.find(","); + if (CommaEq == string::npos) { + return -1; + } + auto lowStr = configStr.substr(findEq + 1, CommaEq - findEq); + int64_t low = TransferStrToHex(lowStr); + auto highStr = configStr.substr(umaskEq + 6, configStr.size() - umaskEq - 6); + int64_t high = TransferStrToHex(highStr); + config = (high << 8) + low; + return config; + } +#endif auto subStr = configStr.substr(findEq + 1, configStr.size() - findEq); std::istringstream iss(subStr); iss >> std::hex >> config; - return config; } @@ -103,8 +144,8 @@ int FillUncoreFields(const char* pmuName, PmuEvt *evt) return UNKNOWN_ERROR; } evt->type = devType; - int cpuMask = GetCpuMask(devName); - evt->cpumask = cpuMask; + std::vector cpuMaskList = GetCpuMask(devName); + evt->cpuMaskList = cpuMaskList; evt->name = pmuName; return SUCCESS; } diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 44414ca..c033dfc 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -174,6 +174,12 @@ static int CheckCollectTypeConfig(enum PmuTaskType collectType, struct PmuAttr * New(LIBPERF_ERR_INVALID_TASK_TYPE); return LIBPERF_ERR_INVALID_TASK_TYPE; } +#ifdef IS_X86 + if (collectType != COUNTING && collectType != SAMPLING) { + New(LIBPERF_ERR_INVALID_TASK_TYPE, "The x86 architecture supports only the COUTING mode and SMAPLING mode"); + return LIBPERF_ERR_INVALID_TASK_TYPE; + } +#endif if ((collectType == COUNTING) && attr->evtList == nullptr) { New(LIBPERF_ERR_INVALID_EVTLIST, "Counting mode requires a non-null event list."); return LIBPERF_ERR_INVALID_EVTLIST; @@ -205,7 +211,6 @@ static int CheckCollectTypeConfig(enum PmuTaskType collectType, struct PmuAttr * New(LIBPERF_ERR_INVALID_GROUP_SPE); return LIBPERF_ERR_INVALID_GROUP_SPE; } - return SUCCESS; } @@ -789,10 +794,12 @@ static struct PmuEvt* GetPmuEvent(const char* pmuName, int collectType) static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt) { - if (pmuEvt->cpumask >= 0) { - taskParam->numCpu = 1; - taskParam->cpuList = new int[1]; - taskParam->cpuList[0] = pmuEvt->cpumask; + if (!pmuEvt->cpuMaskList.empty()) { + taskParam->numCpu = pmuEvt->cpuMaskList.size(); + taskParam->cpuList = new int[pmuEvt->cpuMaskList.size()]; + for(int i = 0; i < pmuEvt->cpuMaskList.size(); i++) { + taskParam->cpuList[i] = pmuEvt->cpuMaskList[i]; + } } else if (attr->cpuList == nullptr && attr->pidList != nullptr && pmuEvt->collectType == COUNTING) { // For counting with pid list for system wide, open fd with cpu -1 and specific pid. taskParam->numCpu = 1; @@ -839,7 +846,11 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att } else { pmuEvt = GetPmuEvent(evtName, collectType); if (pmuEvt == nullptr) { +#ifdef IS_X86 + New(LIBPERF_ERR_INVALID_EVENT, "Invalid event: " + string(evtName) + ";x86 just supports core event and raw event"); +#else New(LIBPERF_ERR_INVALID_EVENT, "Invalid event: " + string(evtName)); +#endif return nullptr; } } @@ -953,22 +964,33 @@ int PmuDumpData(struct PmuData *pmuData, unsigned len, char *filepath, int dumpD } int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *value, uint32_t vSize) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86; +#else if (rawData == nullptr) { New(LIBPERF_ERR_INVALID_FIELD_ARGS, "rawData cannot be nullptr."); return LIBPERF_ERR_INVALID_FIELD_ARGS; } return PointerPasser::ParsePointer(rawData->data, fieldName, value, vSize); +#endif } struct SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char *fieldName) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return nullptr; +#else if (rawData == nullptr) { New(LIBPERF_ERR_INVALID_FIELD_ARGS, "rawData cannot be nullptr."); return nullptr; } + SampleRawField *rt = PointerPasser::GetSampleRawField(rawData->data, fieldName); if (rt) { New(SUCCESS); } return rt; +#endif } diff --git a/pmu/pmu_event.h b/pmu/pmu_event.h index 8455377..6fc6a72 100644 --- a/pmu/pmu_event.h +++ b/pmu/pmu_event.h @@ -38,7 +38,7 @@ struct PmuEvt { int pmuType; // if pmu is CORE/UNCORE/SPE and etc (to be implemented) int collectType; std::string name; // string name of this pmu event - int cpumask; // a representative CPU number for each socket (package) in the motherboard. + std::vector cpuMaskList; // representative CPU number list for each socket (package) in the motherboard. unsigned excludeUser : 1; // don't count user unsigned excludeKernel : 1; // don't count kernel unsigned callStack : 1; // collect complete call stack diff --git a/pmu/pmu_event_list.cpp b/pmu/pmu_event_list.cpp index c85bc52..3a3bc0d 100644 --- a/pmu/pmu_event_list.cpp +++ b/pmu/pmu_event_list.cpp @@ -37,7 +37,11 @@ static const string EVENT_DIR = "/events/"; static std::mutex pmuEventListMtx; +#ifdef IS_X86 +static vector supportDevPrefixs = {"uncore_iio", "uncore_imc"}; +#else static vector supportDevPrefixs = {"hisi", "smmuv3", "hns3", "armv8"}; +#endif static vector uncoreEventList; static vector traceEventList; @@ -57,6 +61,12 @@ static void GetEventName(const string& devName, vector& eventList) continue; } string fileName(entry->d_name); +#ifdef IS_X86 + // Included in x86 .scale .unit files not for events + if (fileName.find('.') != string::npos) { + continue; + } +#endif auto eventName = devName; eventName += SLASH + fileName; eventName += SLASH; @@ -182,6 +192,10 @@ const char** QueryUncoreEvent(unsigned *numEvt) const char** QueryTraceEvent(unsigned *numEvt) { +#ifdef IS_X86 + *numEvt = 0; + return nullptr; +#else if (!traceEventList.empty()) { *numEvt = traceEventList.size(); return traceEventList.data(); @@ -207,6 +221,7 @@ const char** QueryTraceEvent(unsigned *numEvt) closedir(dir); *numEvt = traceEventList.size(); return traceEventList.data(); +#endif } const char** QueryAllEvent(unsigned *numEvt) { diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 6dffb3b..d645917 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -1263,6 +1263,10 @@ using namespace KUNPENG_PMU; const char** PmuDeviceBdfList(enum PmuBdfType bdfType, unsigned *numBdf) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return nullptr; +#else try { lock_guard lg(pmuBdfListMtx); SetWarn(SUCCESS); @@ -1294,6 +1298,7 @@ const char** PmuDeviceBdfList(enum PmuBdfType bdfType, unsigned *numBdf) New(UNKNOWN_ERROR, ex.what()); return nullptr; } +#endif } static void PmuBdfListFreeSingle(vector &bdfList) @@ -1316,6 +1321,10 @@ void PmuDeviceBdfListFree() int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else SetWarn(SUCCESS); try { if (CheckPmuDeviceAttr(attr, len) != SUCCESS) { @@ -1349,6 +1358,7 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } static int CheckPmuDeviceVar(struct PmuData *pmuData, unsigned len, @@ -1376,6 +1386,10 @@ int PmuGetDevMetric(struct PmuData *pmuData, unsigned len, struct PmuDeviceAttr *attr, unsigned attrLen, struct PmuDeviceData **data) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else SetWarn(SUCCESS); try { if (CheckPmuDeviceVar(pmuData, len, attr, attrLen) != SUCCESS) { @@ -1428,6 +1442,7 @@ int PmuGetDevMetric(struct PmuData *pmuData, unsigned len, New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } void DevDataFree(struct PmuDeviceData *data) @@ -1474,6 +1489,10 @@ static void InitializeCoreArray() int PmuGetClusterCore(unsigned clusterId, unsigned **coreList) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else try { lock_guard lg(pmuCoreListMtx); InitializeCoreArray(); @@ -1505,6 +1524,7 @@ int PmuGetClusterCore(unsigned clusterId, unsigned **coreList) New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } int PmuGetNumaCore(unsigned nodeId, unsigned **coreList) diff --git a/pmu/pmu_trace_analysis.cpp b/pmu/pmu_trace_analysis.cpp index 7dbf4da..c1c6834 100644 --- a/pmu/pmu_trace_analysis.cpp +++ b/pmu/pmu_trace_analysis.cpp @@ -29,6 +29,11 @@ static vector SysCallFuncList; const char** PmuSysCallFuncList(unsigned *numFuncs) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + *numFuncs = 0; + return nullptr; +#else lock_guard lg(SysCallListMtx); SetWarn(SUCCESS); try { @@ -57,6 +62,7 @@ const char** PmuSysCallFuncList(unsigned *numFuncs) New(SUCCESS); *numFuncs = SysCallFuncList.size(); return SysCallFuncList.data(); +#endif } void PmuSysCallFuncListFree() @@ -172,6 +178,10 @@ static char **GeneratePmuAttrEvtList(const char **sysCallFuncs, const unsigned n int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else SetWarn(SUCCESS); auto err = CheckTraceAttr(traceType, traceAttr); if (err != SUCCESS) { @@ -199,6 +209,7 @@ int PmuTraceOpen(enum PmuTraceType traceType, struct PmuTraceAttr *traceAttr) } return pd; +#endif } int PmuTraceEnable(int pd) @@ -213,6 +224,10 @@ int PmuTraceDisable(int pd) int PmuTraceRead(int pd, struct PmuTraceData **pmuTraceData) { +#ifdef IS_X86 + New(LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86); + return -1; +#else PmuData *pmuData = nullptr; unsigned len = PmuRead(pd, &pmuData); if (len == -1) { @@ -246,6 +261,7 @@ int PmuTraceRead(int pd, struct PmuTraceData **pmuTraceData) New(UNKNOWN_ERROR, ex.what()); return -1; } +#endif } void PmuTraceClose(int pd) diff --git a/pmu/sample_process.cpp b/pmu/sample_process.cpp index fcd1bfa..778b622 100644 --- a/pmu/sample_process.cpp +++ b/pmu/sample_process.cpp @@ -22,6 +22,16 @@ #define PAGE_SIZE (sysconf(_SC_PAGESIZE)) #define MB() asm volatile("dmb ish" ::: "memory") static constexpr int MAX_DATA_SIZE = 8192; +#ifdef IS_X86 +#define PerfRingbufferSmpStoreRelease(p, v) \ + ({ \ + union { \ + typeof(*p) val; \ + char charHead[1]; \ + } pointerUnion = {.val = (v)}; \ + asm volatile("mov %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ + }) +#else #define PerfRingbufferSmpStoreRelease(p, v) \ ({ \ union { \ @@ -30,6 +40,7 @@ static constexpr int MAX_DATA_SIZE = 8192; } pointerUnion = {.val = (v)}; \ asm volatile("stlr %1, %0" : "=Q"(*p) : "r"(*(__u64 *)pointerUnion.charHead) : "memory"); \ }) +#endif void KUNPENG_PMU::PerfMmapConsume(PerfMmap &map) { diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index 57682fb..6cfd684 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -55,7 +55,13 @@ int KUNPENG_PMU::PerfSampler::MapPerfAttr(const bool groupEnable, const int grou attr.read_format = PERF_FORMAT_ID; attr.exclude_kernel = this->evt->excludeKernel; attr.exclude_user = this->evt->excludeUser; +#ifdef IS_X86 + if (this->pid == -1) { + attr.pinned = 0; + } +#else attr.pinned = 1; +#endif attr.disabled = 1; attr.inherit = 1; attr.mmap = 1; diff --git a/pmu/spe.h b/pmu/spe.h index aa4ba0e..f1fe64c 100644 --- a/pmu/spe.h +++ b/pmu/spe.h @@ -28,9 +28,15 @@ #include "pmu_event.h" #include "symbol.h" +#ifdef IS_X86 +#define MB() asm volatile("mfence":::"memory") +#define RMB() asm volatile("lfence":::"memory") +#define WMB() asm volatile("sfence":::"memory") +#else #define MB() asm volatile("dsb sy") #define RMB() asm volatile("dsb ld") #define WMB() asm volatile("dsb st") +#endif #define EVENT_EXCEPTION_GEN 0x1 #define EVENT_RETIRED 0x2 diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index 5183196..f8dcb9c 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -105,6 +105,7 @@ class Error: LIBPERF_ERR_CPUFREQ_NOT_CONFIG = 1062 LIBPERF_ERR_CLUSTER_ID_OVERSIZE = 1063 LIBPERF_ERR_INVALID_PMU_BDF_TYPE = 1064 + LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 =1065 UNKNOWN_ERROR = 9999 diff --git a/util/common.h b/util/common.h index 4ffd0c8..f634e4e 100644 --- a/util/common.h +++ b/util/common.h @@ -19,6 +19,14 @@ #include #include +#ifdef __x86_64__ +#define IS_X86 1 +#elif defined(__aarch64__) +#define IS_ARM 1 +#else +#error "Only the x86_64 and aarch64 architecture are supported." +#endif + const std::string TRACE_EVENT_PATH = "/sys/kernel/tracing/events/"; const std::string TRACE_DEBUG_EVENT_PATH = "/sys/kernel/debug/tracing/events/"; diff --git a/util/cpu_map.cpp b/util/cpu_map.cpp index c8e0c9e..4c1577d 100644 --- a/util/cpu_map.cpp +++ b/util/cpu_map.cpp @@ -99,10 +99,14 @@ bool InitCpuType() CHIP_TYPE GetCpuType() { +#ifdef IS_X86 + return HIPX86; +#else if (g_chipType == UNDEFINED_TYPE && !InitCpuType()) { return UNDEFINED_TYPE; } return g_chipType; +#endif } set GetOnLineCpuIds() diff --git a/util/cpu_map.h b/util/cpu_map.h index 6279954..df16320 100644 --- a/util/cpu_map.h +++ b/util/cpu_map.h @@ -27,6 +27,7 @@ enum CHIP_TYPE { HIPC = 3, HIPF = 4, HIPE = 5, + HIPX86 = 6, }; struct CpuTopology* GetCpuTopology(int coreId); diff --git a/util/pcerr.cpp b/util/pcerr.cpp index ab1c083..7d70a3e 100644 --- a/util/pcerr.cpp +++ b/util/pcerr.cpp @@ -53,6 +53,7 @@ namespace pcerr { {LIBPERF_ERR_BRANCH_JUST_SUPPORT_SAMPLING, "branch filter just support sampling mode"}, {LIBPERF_ERR_RESET_FD, "failed to reset fd output"}, {LIBPERF_ERR_SET_FD_RDONLY_NONBLOCK, "failed to set fd readonly and nonbolock"}, + {LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86, "the current interface does not support x86"}, }; static std::unordered_map warnMsgs = { {LIBPERF_WARN_CTXID_LOST, "Some SPE context packets are not found in the traces."}, -- Gitee From 19505ed1cc489a229671735720aa46b9f2ac3e6b Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Sat, 19 Apr 2025 10:41:48 +0800 Subject: [PATCH 02/35] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E9=81=97=E6=BC=8F?= =?UTF-8?q?=E4=BA=8B=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/perf_counter.cpp | 7 ++++++- pmu/pfm/core.cpp | 22 ++++++++++++++++++++++ pmu/pfm/pfm_name.cpp | 2 ++ pmu/pfm/pfm_name.h | 2 ++ pmu/pmu_event_list.cpp | 2 +- util/common.cpp | 7 +++++++ util/common.h | 1 + 7 files changed, 41 insertions(+), 2 deletions(-) diff --git a/pmu/perf_counter.cpp b/pmu/perf_counter.cpp index a6417f7..bf801b0 100644 --- a/pmu/perf_counter.cpp +++ b/pmu/perf_counter.cpp @@ -126,8 +126,13 @@ int KUNPENG_PMU::PerfCounter::MapPerfAttr(const bool groupEnable, const int grou attr.disabled = 0; this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } else { - if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE) { +#ifdef IS_X86 + if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "cpu/")) { this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); +#else + if (this->evt->pmuType == KUNPENG_PMU::UNCORE_TYPE && !StartWith(this->evt->name, "armv8_")) { + this->fd = PerfEventOpen(&attr, -1, this->cpu, groupFd, 0); +#endif } else { this->fd = PerfEventOpen(&attr, this->pid, this->cpu, groupFd, 0); } diff --git a/pmu/pfm/core.cpp b/pmu/pfm/core.cpp index 32cd3a9..c177fb3 100644 --- a/pmu/pfm/core.cpp +++ b/pmu/pfm/core.cpp @@ -395,6 +395,24 @@ namespace HW_CACHE_EVENT { } }; + PMU_PAIR DTLB_STORE_MISSES = { + KUNPENG_PMU::COMMON::DTLB_STORE_MISSES, + { + PERF_TYPE_HW_CACHE, + 0x10103, + KUNPENG_PMU::COMMON::DTLB_STORE_MISSES + } + }; + + PMU_PAIR DTLB_STORES = { + KUNPENG_PMU::COMMON::DTLB_STORES, + { + PERF_TYPE_HW_CACHE, + 0x103, + KUNPENG_PMU::COMMON::DTLB_STORES + } + }; + PMU_PAIR ITLB_LOAD_MISSES = { KUNPENG_PMU::COMMON::ITLB_LOAD_MISSES, { @@ -994,6 +1012,10 @@ const std::unordered_map HIP_X86_CORE_PMU_ HW_CACHE_EVENT::BRANCH_LOADS, HW_CACHE_EVENT::DTLB_LOAD_MISSES, HW_CACHE_EVENT::DTLB_LOADS, + HW_CACHE_EVENT::DTLB_STORE_MISSES, + HW_CACHE_EVENT::DTLB_STORES, + HW_CACHE_EVENT::ITLB_LOADS, + HW_CACHE_EVENT::ITLB_LOAD_MISSES, HW_CACHE_EVENT::NODE_LOAD_MISSES, HW_CACHE_EVENT::NODE_LOADS, HW_CACHE_EVENT::NODE_STORE_MISSES, diff --git a/pmu/pfm/pfm_name.cpp b/pmu/pfm/pfm_name.cpp index 7baa0f6..8686ecd 100644 --- a/pmu/pfm/pfm_name.cpp +++ b/pmu/pfm/pfm_name.cpp @@ -49,6 +49,8 @@ const char* KUNPENG_PMU::COMMON::BRANCH_LOAD_MISSES = "branch-load-misses"; const char* KUNPENG_PMU::COMMON::BRANCH_LOADS = "branch-loads"; const char* KUNPENG_PMU::COMMON::DTLB_LOAD_MISSES = "dTLB-load-misses"; const char* KUNPENG_PMU::COMMON::DTLB_LOADS = "dTLB-loads"; +const char* KUNPENG_PMU::COMMON::DTLB_STORE_MISSES = "dTLB-store-misses"; +const char* KUNPENG_PMU::COMMON::DTLB_STORES = "dTLB-stores"; const char* KUNPENG_PMU::COMMON::ITLB_LOAD_MISSES = "iTLB-load-misses"; const char* KUNPENG_PMU::COMMON::ITLB_LOADS = "iTLB-loads"; const char* KUNPENG_PMU::COMMON::NODE_LOAD_MISSES = "node-load-misses"; diff --git a/pmu/pfm/pfm_name.h b/pmu/pfm/pfm_name.h index c5e5905..1563590 100644 --- a/pmu/pfm/pfm_name.h +++ b/pmu/pfm/pfm_name.h @@ -49,6 +49,8 @@ extern const char* BRANCH_LOAD_MISSES; extern const char* BRANCH_LOADS; extern const char* DTLB_LOAD_MISSES; extern const char* DTLB_LOADS; +extern const char* DTLB_STORE_MISSES; +extern const char* DTLB_STORES; extern const char* ITLB_LOAD_MISSES; extern const char* ITLB_LOADS; extern const char* NODE_LOAD_MISSES; diff --git a/pmu/pmu_event_list.cpp b/pmu/pmu_event_list.cpp index 3a3bc0d..ab83ffd 100644 --- a/pmu/pmu_event_list.cpp +++ b/pmu/pmu_event_list.cpp @@ -38,7 +38,7 @@ static const string EVENT_DIR = "/events/"; static std::mutex pmuEventListMtx; #ifdef IS_X86 -static vector supportDevPrefixs = {"uncore_iio", "uncore_imc"}; +static vector supportDevPrefixs = {"uncore_iio", "uncore_imc", "cpu"}; #else static vector supportDevPrefixs = {"hisi", "smmuv3", "hns3", "armv8"}; #endif diff --git a/util/common.cpp b/util/common.cpp index 647119a..67c4a66 100644 --- a/util/common.cpp +++ b/util/common.cpp @@ -155,4 +155,11 @@ std::string GetTraceEventDir() return TRACE_DEBUG_EVENT_PATH; } return ""; +} + +bool StartWith(const std::string& str, const std::string& prefix) { + if (str.size() < prefix.size()) { + return false; + } + return str.substr(0, prefix.size()) == prefix; } \ No newline at end of file diff --git a/util/common.h b/util/common.h index f634e4e..1c3bcb7 100644 --- a/util/common.h +++ b/util/common.h @@ -41,5 +41,6 @@ std::vector SplitStringByDelimiter(const std::string& str, char del int RaiseNumFd(uint64_t numFd); bool ExistPath(const std::string& filePath); std::string GetTraceEventDir(); +bool StartWith(const std::string& str, const std::string& prefix); #endif // LIBKPROF_COMMON_H -- Gitee From e61bfbfcd14ff03c3152c86f0bd07ab04f2b33ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=96=E9=BE=99?= Date: Mon, 21 Apr 2025 20:31:22 +0800 Subject: [PATCH 03/35] fix blockedSample bugs --- example/pmu_hotspot.cpp | 5 ++++- example/pmu_hotspot.go | 5 ++++- example/pmu_hotspot.py | 5 ++++- pmu/pmu_list.cpp | 19 ++++++++----------- pmu/pmu_metric.h | 2 +- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/example/pmu_hotspot.cpp b/example/pmu_hotspot.cpp index 9487368..c9225f1 100644 --- a/example/pmu_hotspot.cpp +++ b/example/pmu_hotspot.cpp @@ -111,7 +111,7 @@ int GetPmuDataHotspot(PmuData* pmuData, int pmuDataLen, std::vector& tm std::string GetPeriodPercent(uint64_t period) { std::ostringstream oss; - oss << std::fixed << std::setprecision(FLOAT_PRECISION) << (static_cast(period) * 100 / g_totalPeriod); + oss << std::fixed << std::setprecision(FLOAT_PRECISION) << (static_cast(period) / g_totalPeriod * 100); return oss.str(); } @@ -209,6 +209,7 @@ void BlockedSample(int pid, double interval, int count, bool blockedSample) std::cerr << "error msg:" << Perror() << std::endl; return; } + g_totalPeriod = 0; std::vector hotSpotData; GetPmuDataHotspot(pmuData, len, hotSpotData); PrintHotSpotGraph(hotSpotData); @@ -219,6 +220,7 @@ void BlockedSample(int pid, double interval, int count, bool blockedSample) for (int i = 0; i < hotSpotData.size(); ++i) { PrintStack(hotSpotData[i].stack, 0, hotSpotData[i].period); } + g_totalPeriod = 0; PmuDataFree(pmuData); } PmuDisable(pd); @@ -253,6 +255,7 @@ void print_usage() { std::cerr << " process name: process path or input process number\n"; std::cerr << " example: pmu_hotspot 0.1 10 0 ./process\n"; std::cerr << " example: pmu_hotspot 1 100 1 ./process\n"; + std::cerr << " example: pmu_hotspot 1 100 1 \n"; } int main(int argc, char** argv) diff --git a/example/pmu_hotspot.go b/example/pmu_hotspot.go index fd8cc73..e825bfd 100644 --- a/example/pmu_hotspot.go +++ b/example/pmu_hotspot.go @@ -35,6 +35,7 @@ func printUsage() { fmt.Println(" process name: process path or input process number") fmt.Println(" example: ./pmu_hotspot_of_go 0.1 10 0 ./process") fmt.Println(" example: ./pmu_hotspot_of_go 1 100 1 ./process") + fmt.Println(" example: ./pmu_hotspot_of_go 1 100 1 ") } var GlobalPeriod uint64 = 0 @@ -118,7 +119,7 @@ func GetPmuDataHotSpot(vo kperf.PmuDataVo) []kperf.PmuData { } func getPeriodPercent(period uint64) float64 { - return float64(period) * 100.00 / float64(GlobalPeriod) + return float64(period) / float64(GlobalPeriod) * 100.00 } func printHotSpotGraph(hotspotData []kperf.PmuData) { @@ -208,6 +209,7 @@ func blockSample(pid int, interval float64, count int, blockedSample int) { return } + GlobalPeriod := 0 hotspotData := GetPmuDataHotSpot(pmuDataVo) printHotSpotGraph(hotspotData) fmt.Printf(strings.Repeat("=", 50) + "Print the call stack of the hotspot function" + strings.Repeat("=", 50) + "\n") @@ -215,6 +217,7 @@ func blockSample(pid int, interval float64, count int, blockedSample int) { for _, data := range hotspotData { printStack(data.Symbols, data.Period) } + GlobalPeriod := 0 } kperf.PmuDisable(fd) kperf.PmuClose(fd) diff --git a/example/pmu_hotspot.py b/example/pmu_hotspot.py index fc76c3a..eea4b58 100644 --- a/example/pmu_hotspot.py +++ b/example/pmu_hotspot.py @@ -79,7 +79,7 @@ def get_pmu_data_hotspot(pmu_data, tmp_data): def get_period_percent(period): - return f"{(period * 100 / g_total_period):.{FLOAT_PRECISION}f}" + return f"{(period / g_total_period * 100.0):.{FLOAT_PRECISION}f}" def print_stack(stack, depth=0, period=0): @@ -157,6 +157,7 @@ def blocked_sample(pid, interval, count, blockedSample): if pmu_data == -1: print(f"read failed, error msg: {kperf.error()}") return + g_total_period = 0 hotspot_data = [] get_pmu_data_hotspot(pmu_data, hotspot_data) print_hotspot_graph(hotspot_data) @@ -164,6 +165,7 @@ def blocked_sample(pid, interval, count, blockedSample): print(f"{'@symbol':<40}{'@module':<40}{'@percent':>40}") for data in hotspot_data: print_stack(data.stack, 0, data.period) + g_total_period = 0 err = kperf.disable(pd) if err != 0: print(f"disable failed, error msg: {kperf.error()}") @@ -192,6 +194,7 @@ def print_usage(): print(" process name: process path or input process number") print(" example: python3 pmu_hotspot.py 0.1 10 0 ./process") print(" example: python3 pmu_hotspot.py 1 100 1 ./process") + print(" example: python3 pmu_hotspot.py 1 100 1 ") def main(): pid = 0 diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 09a32f0..9cb1109 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -296,7 +296,7 @@ namespace KUNPENG_PMU { if (item.swOut) { outTime = item.ts; prevTid = item.tid; - DBG_PRINT("Switch out: tid=%d, ts=%ld\n", item.tid, item.ts); + DBG_PRINT("Switch out: tid=%d, ts=%llu\n", item.tid, item.ts); } else { // if the first event is sched_in, we need to ignore it. if (prevTid == -1) { @@ -305,7 +305,7 @@ namespace KUNPENG_PMU { } if (prevTid == item.tid && outTime > 0) { tidToOffTimeStamps[item.tid].emplace_back(item.ts - outTime); - DBG_PRINT("Switch in: tid=%d, ts=%ld, offTime=%ld\n", item.tid, item.ts, item.ts - outTime); + DBG_PRINT("Switch in: tid=%d, ts=%llu, offTime=%llu\n", item.tid, item.ts, item.ts - outTime); outTime = 0; } } @@ -334,24 +334,21 @@ namespace KUNPENG_PMU { if (strcmp(item.evt, "context-switches") == 0) { // Before the context-switches event, there is only one cycles event, which we need to ignore. if (currentTs == 0) { + currentTs = item.ts; DBG_PRINT("Ignoring first cycles event for tid=%d\n", item.tid); continue; } - // only the on cpu event is cycles or cpu-clock, this compute is right. + // only the on cpu event is cycles, this compute is right. if (csCnt < tidToOffTimeStamps[item.tid].size()) { item.period = tidToOffTimeStamps[item.tid][csCnt] * curPeriod / (currentTs - prevTs); - DBG_PRINT("Context switch: tid=%d, period=%ld\n", item.tid, item.period); + DBG_PRINT("Context switch: ts=%llu, tid=%d, period=%llu\n", item.ts, item.tid, item.period); csCnt++; } } else { // on cpu event data update. - if (prevTs == 0) { - prevTs = item.ts; - } else { - prevTs = currentTs; - currentTs = item.ts; - curPeriod = item.period; - } + prevTs = currentTs; + currentTs = item.ts; + curPeriod = item.period; } } } diff --git a/pmu/pmu_metric.h b/pmu/pmu_metric.h index 593f97d..eae8524 100644 --- a/pmu/pmu_metric.h +++ b/pmu/pmu_metric.h @@ -9,7 +9,7 @@ * PURPOSE. * See the Mulan PSL v2 for more details. * Author: Mr.Lei - * Create: 2024-04-17 + * Create: 2025-04-17 * Description: definition of pmu metric some interface for free ******************************************************************************/ -- Gitee From 7aa5d0c53d549feb52cef131062fe883f56e57fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=96=E9=BE=99?= Date: Wed, 23 Apr 2025 17:51:29 +0800 Subject: [PATCH 04/35] fix 920F cpu not support info --- include/pcerrc.h | 1 + pmu/pmu_metric.cpp | 10 +++++++++- python/modules/kperf/perror.py | 1 + test/test_perf/test_metric.cpp | 1 + 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/pcerrc.h b/include/pcerrc.h index 3a71491..465e5e4 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -107,6 +107,7 @@ extern "C" { #define LIBPERF_ERR_CLUSTER_ID_OVERSIZE 1063 #define LIBPERF_ERR_INVALID_PMU_BDF_TYPE 1064 #define LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 1065 +#define LIBPERF_ERR_NOT_SUPPORT_METRIC 1066 #define UNKNOWN_ERROR 9999 diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index d645917..eedf8dd 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -298,7 +298,11 @@ namespace KUNPENG_PMU { static const map GetDeviceMtricConfig() { - return UNCORE_METRIC_CONFIG_MAP.at(GetCpuType()); + CHIP_TYPE chipType = GetCpuType(); + if (UNCORE_METRIC_CONFIG_MAP.find(chipType) == UNCORE_METRIC_CONFIG_MAP.end()) { + return {}; + } + return UNCORE_METRIC_CONFIG_MAP.at(chipType); } static int QueryUncoreRawDevices() @@ -757,6 +761,10 @@ namespace KUNPENG_PMU { static int CheckDeviceMetricEnum(PmuDeviceMetric metric) { const auto& metricConfig = GetDeviceMtricConfig(); + if (metricConfig.empty()) { + New(LIBPERF_ERR_NOT_SUPPORT_METRIC, "The current platform cpu does not support uncore metric collection."); + return LIBPERF_ERR_NOT_SUPPORT_METRIC; + } if (metricConfig.find(metric) == metricConfig.end()) { New(LIBPERF_ERR_INVALID_PMU_DEVICES_METRIC, "For this platform this metric " + GetMetricString(metric) + " is invalid value for PmuDeviceMetric!"); diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index f8dcb9c..3f897d0 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -106,6 +106,7 @@ class Error: LIBPERF_ERR_CLUSTER_ID_OVERSIZE = 1063 LIBPERF_ERR_INVALID_PMU_BDF_TYPE = 1064 LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 =1065 + LIBPERF_ERR_NOT_SUPPORT_METRIC = 1066 UNKNOWN_ERROR = 9999 diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index 85b8653..56bee10 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -107,6 +107,7 @@ TEST_F(TestMetric, CollectDDRBandwidth) PmuDeviceAttr devAttr = {}; devAttr.metric = PMU_DDR_READ_BW; int pd = PmuDeviceOpen(&devAttr, 1); + cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); sleep(1); -- Gitee From 0fed4980e1bd9f5d73e785fabd33bc880fb4c622 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 25 Apr 2025 10:07:24 +0800 Subject: [PATCH 05/35] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dpython=20ksym.get=5Fsta?= =?UTF-8?q?ck=E5=92=8Cget=5Fsymbol=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/modules/_libkperf/Symbol.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/modules/_libkperf/Symbol.py b/python/modules/_libkperf/Symbol.py index 7b3242e..705f1a3 100644 --- a/python/modules/_libkperf/Symbol.py +++ b/python/modules/_libkperf/Symbol.py @@ -645,7 +645,7 @@ def SymResolverRecordModuleNoDwarf(pid: int) -> None: c_SymResolverRecordModuleNoDwarf(c_pid) -def StackToHash(pid: int, stackList: List[int]) -> Iterator[Stack]: +def StackToHash(pid: int, stackList: List[int]) -> Stack: """ struct Stack* StackToHash(int pid, unsigned long* stack, int nr); """ @@ -659,10 +659,9 @@ def StackToHash(pid: int, stackList: List[int]) -> Iterator[Stack]: c_nr = ctypes.c_int(stack_len) c_stack = c_StackToHash(c_pid, c_stack_list, c_nr) - while c_stack: - stack = Stack.from_c_stack(c_stack) - yield stack - c_stack = c_stack.contents.next + if not c_stack: + return None + return Stack.from_c_stack(c_stack.contents) def SymResolverMapAddr(pid: int, addr: int) -> Symbol: @@ -677,8 +676,9 @@ def SymResolverMapAddr(pid: int, addr: int) -> Symbol: c_addr = ctypes.c_ulong(addr) c_sym = c_SymResolverMapAddr(c_pid, c_addr) - - return Symbol.from_c_sym(c_sym) + if not c_sym: + return None + return Symbol.from_c_sym(c_sym.contents) def FreeModuleData(pid: int) -> None: -- Gitee From 6c127d66145a2a7a9dec4575d5e3a96ab140e6cb Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Sun, 27 Apr 2025 14:11:32 +0800 Subject: [PATCH 06/35] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=B5=84=E6=96=99?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 10 ++++------ README.md | 10 +++++----- docs/Details_Usage.md | 2 +- docs/Go_API.md | 10 +++++----- go/src/libkperf_test/libkperf_test.go | 2 +- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/README.en.md b/README.en.md index 503138f..ed63554 100644 --- a/README.en.md +++ b/README.en.md @@ -124,7 +124,7 @@ All pmu functions are accomplished by the following interfaces: Here are some examples: -* Get pmu count for a process. +* Get pmu count for a process ```C++ int pidList[1]; @@ -205,8 +205,7 @@ PmuDataFree(data); PmuClose(pd); ``` -Python examples: - +* Python examples ```python import time from collections import defaultdict @@ -237,8 +236,7 @@ def Counting(): kperf.close(pd) ``` -Go example - +* Go example ```go import "libkperf/kperf" import "fmt" @@ -290,7 +288,7 @@ python example.py ``` * **For Go example Code:** - You can directly go to the go/src/libkperf/libkperf_test directory. + You can directly go to the go/src/libkperf_test directory. ```shell go test -v # run all diff --git a/README.md b/README.md index ee59dc4..98f7465 100644 --- a/README.md +++ b/README.md @@ -105,8 +105,7 @@ Go API文档可以参考GO_API.md: 以下是一些示例: -- 获取进程的pmu计数。 - +- 获取进程的pmu计数 ```C++ int pidList[1]; pidList[0] = pid; @@ -181,7 +180,7 @@ PmuDataFree(data); PmuClose(pd); ``` -Python 例子: +- Python 例子 ```python import time from collections import defaultdict @@ -213,7 +212,8 @@ def Counting(): kperf.close(pd) ``` -Go 例子 + +- Go 例子 ```go import "libkperf/kperf" import "fmt" @@ -264,7 +264,7 @@ python example.py ``` * **针对Go示例代码:** -可以直接跳转到 go/src/libkperf/libkperf_test目录下 +可以直接跳转到 go/src/libkperf_test目录下 ```shell go test -v # 全部运行 go test -v -test.run TestCount #指定运行的用例 diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 815f4d8..5c9f6e8 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -297,7 +297,7 @@ c --> d(......) Symbol的字段信息受PmuAttr影响: - PmuAttr.callStack会决定Stack是完整的调用栈,还是只有一层调用栈(即Stack链表只有一个元素)。 - PmuAttr.symbolMode如果等于NO_SYMBOL_RESOLVE,那么PmuData的stack是空指针。 -- PmuAttr.symbolMode如果等于RESOLVE_ELF,那么Symbol的fileName和lineNum没有数据,都等于0,因为没有解析dwarf信息。 +- PmuAttr.symbolMode如果等于RESOLVE_ELF,那么Symbol的fileName和lineNum没有数据,都等于0,因为没有解析dwarf信息(注:kernel的fileName为'[kernel]')。 - PmuAttr.symbolMode如果等于RESOLVE_ELF_DWARF,那么Symbol的所有信息都有效。 ### 采集uncore事件 diff --git a/docs/Go_API.md b/docs/Go_API.md index fa1ba8f..de71c2f 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -340,7 +340,7 @@ func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事 * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count - * PMU_L3_LAT 采集每个numa的L3的总时延,单位:cycles + * PMU_L3_LAT 采集每个cluster的L3的总时延,单位:cycles * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/ns * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns @@ -395,7 +395,7 @@ import "libkperf/kperf" import "fmt" func main() { - clusterId := uint(1) + clusterId := uint(1) coreList, err := kperf.PmuGetClusterCore(clusterId) if err != nil { fmt.Printf("kperf PmuGetClusterCore failed, expect err is nil, but is %v\n", err) @@ -419,7 +419,7 @@ import "libkperf/kperf" import "fmt" func main() { - nodeId := uint(0) + nodeId := uint(0) coreList, err := kperf.PmuGetNumaCore(nodeId) if err != nil { fmt.Printf("kperf PmuGetNumaCore failed, expect err is nil, but is %v\n", err) @@ -436,14 +436,14 @@ func main() { func PmuGetCpuFreq(core uint) (int64, error) 查询当前系统指定core的实时CPU频率 * core cpu coreId -* 返回值为int64, 时当前cpu core的实时频率,出现错误频率为-1,且error不为空 +* 返回值为int64, 为当前cpu core的实时频率,出现错误频率为-1,且error不为空 ```go import "libkperf/kperf" import "fmt" func main() { - coreId := uint(0) + coreId := uint(0) freq, err := kperf.PmuGetCpuFreq(coreId) if err != nil { fmt.Printf("kperf PmuGetCpuFreq failed, expect err is nil, but is %v\n", err) diff --git a/go/src/libkperf_test/libkperf_test.go b/go/src/libkperf_test/libkperf_test.go index 2f55951..e64ea6a 100644 --- a/go/src/libkperf_test/libkperf_test.go +++ b/go/src/libkperf_test/libkperf_test.go @@ -22,7 +22,7 @@ func TestCount(t *testing.T) { } for _, o := range dataVo.GoData { - t.Logf("================================Get Couting data success================================") + t.Logf("================================Get Counting data success================================") t.Logf("count base info comm=%v, evt=%v, pid=%v, tid=%v, coreId=%v, numaId=%v, sockedId=%v", o.Comm, o.Evt, o.Pid, o.Tid, o.CpuTopo.CoreId, o.CpuTopo.NumaId, o.CpuTopo.SocketId) t.Logf("count info count=%v, countPercent=%v", o.Count, o.CountPercent) } -- Gitee From 30cbfcc302c38fec4e4e16b92b83b44f27b3c635 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 28 Apr 2025 22:08:07 +0800 Subject: [PATCH 07/35] test pr file --- docs/test_pr.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/test_pr.md diff --git a/docs/test_pr.md b/docs/test_pr.md new file mode 100644 index 0000000..e69de29 -- Gitee From 359a999cb321d0f89e0fe93de6e707bfbeffab62 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Tue, 29 Apr 2025 10:28:48 +0800 Subject: [PATCH 08/35] =?UTF-8?q?=E9=87=8D=E7=BD=AElibsym=E5=BC=82?= =?UTF-8?q?=E5=B8=B8=E4=B8=BAwarning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/spe.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pmu/spe.cpp b/pmu/spe.cpp index a075bb8..0d1c7d9 100644 --- a/pmu/spe.cpp +++ b/pmu/spe.cpp @@ -362,9 +362,19 @@ void Spe::CoreDummyData(struct SpeCoreContext *context, struct ContextSwitchData if (header->type == PERF_RECORD_MMAP && symbolMode != NO_SYMBOL_RESOLVE) { struct PerfRecordMmap *sample = (struct PerfRecordMmap *)header; if (symbolMode == RESOLVE_ELF_DWARF) { - SymResolverUpdateModule(sample->tid, sample->filename, sample->addr); + int ret = SymResolverUpdateModule(sample->tid, sample->filename, sample->addr); + if (ret != SUCCESS) { + // if the module fails to be updated, a warning is recorded to overwrite the failure error code. + SetWarn(ret, Perror()); + New(SUCCESS); + } } else if (symbolMode == RESOLVE_ELF) { - SymResolverUpdateModuleNoDwarf(sample->tid, sample->filename, sample->addr); + int ret = SymResolverUpdateModuleNoDwarf(sample->tid, sample->filename, sample->addr); + if (ret != SUCCESS) { + // if the module fails to be updated, a warning is recorded to overwrite the failure error code. + SetWarn(ret, Perror()); + New(SUCCESS); + } } dataTail += header->size; continue; -- Gitee From 924b361204d3b28a51757455576f695964bc1f2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=96=E9=BE=99?= Date: Tue, 29 Apr 2025 15:09:37 +0800 Subject: [PATCH 09/35] =?UTF-8?q?fix=20blocked=20sample=20bugs=20and=20blo?= =?UTF-8?q?cked=20sample=E3=80=81ddrc=20case?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- example/case/blocked_sample_case.cpp | 465 +++++++++++++++++++++++++++ example/case/ddrc_case.cpp | 31 ++ example/pmu_hotspot.cpp | 13 +- example/pmu_hotspot.go | 8 +- example/pmu_hotspot.py | 10 +- python/modules/_libkperf/Pmu.py | 6 +- 6 files changed, 522 insertions(+), 11 deletions(-) create mode 100644 example/case/blocked_sample_case.cpp create mode 100644 example/case/ddrc_case.cpp diff --git a/example/case/blocked_sample_case.cpp b/example/case/blocked_sample_case.cpp new file mode 100644 index 0000000..4db89e7 --- /dev/null +++ b/example/case/blocked_sample_case.cpp @@ -0,0 +1,465 @@ +/* + * Optimization Notes: + * + * This program constructs a multi-threaded task, where each task consists of three phases: + * + * 1. on-CPU computation: + * Two modes are provided: + * - inefficient: Simulates inefficient computation using heavy floating-point operations (default). + * - efficient: Uses integers instead of floating-point numbers for optimized computation + * (though more efficient, overall time remains almost unchanged as off-CPU phase (synchronous IO) is the bottleneck). + * + * 2. IO operation phase: + * Three modes are provided: + * - global: Write to a single file protected by a global lock (baseline). + * - split: Each thread writes to its own file (reduces lock contention). + * - async: Asynchronous IO, enqueues data for background batch writing (previous version lacked batching, causing worse performance). + * + * 3. Supplemental on-CPU computation. + * + * Usage (command-line argument order): + * [numThreads] [tasksPerThread] [cpuIterations] [ioDataSize] [ioWrites] [ioMode] [onCpuMode] + * + * Example (your given test parameters, plus onCpuMode parameter): + * ./blocked_sample_io 4 50 100000 5000 3000 global inefficient + * + * Where: + * ioMode: global|split|async + * onCpuMode: inefficient (inefficient implementation) or efficient (optimized implementation) + * + * Note: If the user attempts to optimize the CPU computation part using the efficient on-CPU mode, + * the overall runtime remains almost unchanged, proving that the bottleneck lies mainly in the off-CPU part (synchronous IO and lock contention). + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; +using namespace std::chrono; + +// Define IO mode enumeration +enum class IOMode { GLOBAL, SPLIT, ASYNC }; +IOMode currentIOMode = IOMode::GLOBAL; // Default IO mode + +//------------------------------------------------------------- +// on-CPU simulation: Implementation of two computation methods +//------------------------------------------------------------- +// Inefficient CPU work: Heavy loop computation to prevent compiler optimization +void doOnCpuWorkInefficient(int iterations) { + volatile double dummy = 1.0; + for (int i = 0; i < iterations; i++) { + dummy = dummy * 1.000001 + 0.000001; + } + (void)dummy; +} + +// Efficient CPU work: Use integers to simulate decimals for optimized computation +void doOnCpuWorkEfficient(int iterations) { + long long dummy = 1000000; // Use integers to simulate decimals, assuming precision of 1e-6 + for (int i = 0; i < iterations; i++) { + dummy = dummy * 1000001 / 1000000 + 1; + } + (void)dummy; +} + +// Global flag to decide which on-CPU computation method to use (default is inefficient) +bool efficientOnCpu = false; + +// Encapsulated on-CPU work interface, calls corresponding implementation based on efficientOnCpu +void doOnCpuWork(int iterations) { + if (efficientOnCpu) { + doOnCpuWorkEfficient(iterations); + } else { + doOnCpuWorkInefficient(iterations); + } +} + +//------------------------------------------------------------- +// GLOBAL mode: Global file and mutex +//------------------------------------------------------------- +mutex globalFileMutex; +ofstream globalSyncFile; // Global file + +//------------------------------------------------------------- +// Asynchronous IO Manager (optimized): Batch writing to reduce flush frequency +//------------------------------------------------------------- +class AsyncIOManager { +private: + queue msgQueue; + mutex mtx; + condition_variable cv; + atomic stop; + thread worker; + ofstream outFile; + const size_t batchSize; // Number of messages written per batch + +public: + AsyncIOManager(const string& filename, size_t batchSize = 50) + : stop(false), batchSize(batchSize) + { + outFile.open(filename, ios::out | ios::trunc); + if (!outFile.is_open()){ + cerr << "Failed to open file: " << filename << endl; + } + worker = thread([this]() { this->process(); }); + } + + ~AsyncIOManager(){ + { + lock_guard lock(mtx); + stop = true; + } + cv.notify_one(); + if(worker.joinable()){ + worker.join(); + } + if(outFile.is_open()){ + outFile.close(); + } + } + + // Push message to be written into the queue + void push(const string &msg) { + { + lock_guard lock(mtx); + msgQueue.push(msg); + } + cv.notify_one(); + } + +private: + // Background thread processes batch writes + void process() { + while (true) { + vector localBatch; + { + unique_lock lock(mtx); + cv.wait(lock, [this]() { return stop || !msgQueue.empty(); }); + while (!msgQueue.empty() && localBatch.size() < batchSize) { + localBatch.push_back(msgQueue.front()); + msgQueue.pop(); + } + if (stop && localBatch.empty()) { + break; + } + } + // Merge and write batch, then flush + if (outFile.is_open()) { + string batchStr; + for (const auto &msg : localBatch) { + batchStr.append(msg); + } + outFile << batchStr; + outFile.flush(); + } + } + } +}; + +AsyncIOManager *asyncIO = nullptr; // Global pointer to asynchronous IO manager + +//------------------------------------------------------------- +// Thread Pool: Manages worker threads and task queue +//------------------------------------------------------------- +class ThreadPool { +public: + ThreadPool(size_t threads); + ~ThreadPool(); + void enqueue(function task); + void wait(); + +private: + vector workers; + queue> tasks; + mutex queue_mutex; + condition_variable condition; + atomic stop; + atomic active_tasks; + condition_variable cv_finished; +}; + +ThreadPool::ThreadPool(size_t threads) : stop(false), active_tasks(0) { + for (size_t i = 0; i < threads; i++) { + workers.emplace_back([this, i]() { + while (true) { + function task; + { + unique_lock lock(this->queue_mutex); + this->condition.wait(lock, [this]() { + return this->stop.load() || !this->tasks.empty(); + }); + if (this->stop.load() && this->tasks.empty()) + return; + task = move(this->tasks.front()); + this->tasks.pop(); + active_tasks++; + } + task(); + { + lock_guard lock(this->queue_mutex); + active_tasks--; + if (tasks.empty() && active_tasks == 0) { + cv_finished.notify_all(); + } + } + } + }); + } +} + +ThreadPool::~ThreadPool() { + { + lock_guard lock(queue_mutex); + stop.store(true); + } + condition.notify_all(); + for (thread &worker : workers) { + if (worker.joinable()) + worker.join(); + } +} + +void ThreadPool::enqueue(function task) { + { + lock_guard lock(queue_mutex); + tasks.push(move(task)); + } + condition.notify_one(); +} + +void ThreadPool::wait() { + unique_lock lock(queue_mutex); + cv_finished.wait(lock, [this]() { + return tasks.empty() && active_tasks == 0; + }); +} + +//------------------------------------------------------------- +// Helper functions: Print divider and usage instructions +//------------------------------------------------------------- +void printDivider() { + cout << string(60, '-') << endl; +} + +void printUsage(const char* programName) { + cout << "Usage: " << programName << " [numThreads] [tasksPerThread] [cpuIterations] [ioDataSize] [ioWrites] [ioMode] [onCpuMode]" << endl; + cout << " numThreads: Number of worker threads (default: 4)" << endl; + cout << " tasksPerThread: Number of tasks per thread (default: 50)" << endl; + cout << " cpuIterations: Number of on-CPU computation iterations (default: 100000)" << endl; + cout << " ioDataSize: Number of characters written per synchronous IO operation (default: 5000)" << endl; + cout << " ioWrites: Number of IO operations per task (default: 3000)" << endl; + cout << " ioMode: IO mode, options: global, split, async (default: global)" << endl; + cout << " onCpuMode: on-CPU mode, options: inefficient, efficient (default: inefficient)" << endl; +} + +//------------------------------------------------------------- +// GLOBAL mode IO operation: Write to global file with global lock +//------------------------------------------------------------- +void doGlobalIOWork(int taskId, int ioDataSize, int ioWrites) { + stringstream ss; + ss << "Task " << taskId << " data: "; + for (int i = 0; i < ioDataSize; i++) { + ss << "X"; + } + ss << "\n"; + string data = ss.str(); + for (int i = 0; i < ioWrites; i++) { + { + lock_guard lock(globalFileMutex); + if (globalSyncFile.is_open()) { + globalSyncFile << data; + globalSyncFile.flush(); + } + } + doOnCpuWork(1000); + } +} + +//------------------------------------------------------------- +// SPLIT mode IO operation: Each thread writes to its own file +//------------------------------------------------------------- +void doSplitIOWork(int taskId, int ioDataSize, int ioWrites) { + stringstream ss; + ss << "Task " << taskId << " data: "; + for (int i = 0; i < ioDataSize; i++) { + ss << "X"; + } + ss << "\n"; + string data = ss.str(); + static thread_local ofstream localFile; + static thread_local bool initialized = false; + if (!initialized) { + auto tid = this_thread::get_id(); + hash hasher; + size_t id_hash = hasher(tid); + string filename = "split_output_" + to_string(id_hash) + ".txt"; + localFile.open(filename, ios::out | ios::trunc); + if (!localFile.is_open()) { + cerr << "Failed to open file: " << filename << endl; + } + initialized = true; + } + for (int i = 0; i < ioWrites; i++) { + localFile << data; + localFile.flush(); + doOnCpuWork(1000); + } +} + +//------------------------------------------------------------- +// ASYNC mode IO operation: Push data into asynchronous queue +//------------------------------------------------------------- +void doAsyncIOWork(int taskId, int ioDataSize, int ioWrites) { + stringstream ss; + ss << "Task " << taskId << " data: "; + for (int i = 0; i < ioDataSize; i++) { + ss << "X"; + } + ss << "\n"; + string data = ss.str(); + for (int i = 0; i < ioWrites; i++) { + if (asyncIO) { + asyncIO->push(data); + } + doOnCpuWork(1000); + } +} + +//------------------------------------------------------------- +// Task processing: on-CPU computation -> IO operation -> small amount of on-CPU computation +//------------------------------------------------------------- +void processTask(int taskId, int cpuIterations, int ioDataSize, int ioWrites) { + // Phase 1: on-CPU computation (choose implementation based on onCpuMode) + doOnCpuWork(cpuIterations); + + // Phase 2: IO operation, choose execution method based on current IO mode + if (currentIOMode == IOMode::GLOBAL) { + doGlobalIOWork(taskId, ioDataSize, ioWrites); + } else if (currentIOMode == IOMode::SPLIT) { + doSplitIOWork(taskId, ioDataSize, ioWrites); + } else if (currentIOMode == IOMode::ASYNC) { + doAsyncIOWork(taskId, ioDataSize, ioWrites); + } + + // Phase 3: Small amount of additional on-CPU computation + doOnCpuWork(cpuIterations / 10); +} + +//------------------------------------------------------------- +// main function: Parse arguments, initialize IO & on-CPU modes, start thread pool, and measure elapsed time +//------------------------------------------------------------- +int main(int argc, char* argv[]) { + // Default parameters + int numThreads = 4; + int tasksPerThread = 50; + int cpuIterations = 100000; + int ioDataSize = 5000; + int ioWrites = 3000; + string ioModeStr = "global"; // Default IO mode + string onCpuModeStr = "inefficient"; // Default on-CPU mode + + // Argument check and help information + if (argc > 1) { + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) { + printUsage(argv[0]); + return 0; + } + } + if (argc > 1) { numThreads = atoi(argv[1]); } + if (argc > 2) { tasksPerThread = atoi(argv[2]); } + if (argc > 3) { cpuIterations = atoi(argv[3]); } + if (argc > 4) { ioDataSize = atoi(argv[4]); } + if (argc > 5) { ioWrites = atoi(argv[5]); } + if (argc > 6) { ioModeStr = argv[6]; } + if (argc > 7) { onCpuModeStr = argv[7]; } + + // Determine current IO mode based on ioMode parameter + if (ioModeStr == "global") { + currentIOMode = IOMode::GLOBAL; + cout << "Using GLOBAL mode: Writing to global file with global mutex protection" << endl; + } else if (ioModeStr == "split") { + currentIOMode = IOMode::SPLIT; + cout << "Using SPLIT mode: Each thread writes to its own file, reducing lock granularity" << endl; + } else if (ioModeStr == "async") { + currentIOMode = IOMode::ASYNC; + cout << "Using ASYNC mode: Asynchronous IO, background thread performs batch writes" << endl; + } else { + cout << "Unknown IO mode, defaulting to GLOBAL mode" << endl; + currentIOMode = IOMode::GLOBAL; + } + + // Determine on-CPU mode based on onCpuMode parameter + if (onCpuModeStr == "efficient") { + efficientOnCpu = true; + cout << "Using efficient on-CPU implementation" << endl; + } else { + efficientOnCpu = false; + cout << "Using inefficient on-CPU implementation (default)" << endl; + } + + int totalTasks = numThreads * tasksPerThread; + printDivider(); + cout << "Program configuration:" << endl; + cout << " Number of worker threads (numThreads): " << numThreads << endl; + cout << " Number of tasks per thread (tasksPerThread): " << tasksPerThread << endl; + cout << " Total number of tasks: " << totalTasks << endl; + cout << " On-CPU computation iterations (cpuIterations): " << cpuIterations << endl; + cout << " Characters written per IO operation (ioDataSize): " << ioDataSize << endl; + cout << " Number of IO operations per task (ioWrites): " << ioWrites << endl; + cout << " IO mode (ioMode): " << ioModeStr << endl; + cout << " on-CPU mode (onCpuMode): " << onCpuModeStr << endl; + printDivider(); + + // Perform necessary initialization based on IO mode + if (currentIOMode == IOMode::GLOBAL) { + globalSyncFile.open("global_output.txt", ios::out | ios::trunc); + if (!globalSyncFile.is_open()){ + cerr << "Failed to open global_output.txt file. Please check permissions or path." << endl; + return 1; + } + } else if (currentIOMode == IOMode::ASYNC) { + asyncIO = new AsyncIOManager("async_output.txt", 50); + } + + // Create thread pool, distribute tasks, and measure total elapsed time + ThreadPool pool(numThreads); + auto startTime = high_resolution_clock::now(); + for (int i = 0; i < totalTasks; i++) { + pool.enqueue([=]() { + processTask(i, cpuIterations, ioDataSize, ioWrites); + }); + } + pool.wait(); + auto endTime = high_resolution_clock::now(); + duration elapsed = endTime - startTime; + + // Cleanup resources + if (currentIOMode == IOMode::GLOBAL) { + globalSyncFile.close(); + } else if (currentIOMode == IOMode::ASYNC) { + delete asyncIO; + asyncIO = nullptr; + } + + printDivider(); + cout << "Completed " << totalTasks << " tasks in " + << fixed << setprecision(2) << elapsed.count() << " seconds." << endl; + cout << "Current IO mode: " << ioModeStr << ", on-CPU mode: " << onCpuModeStr << endl; + cout << "Optimization direction: Reducing lock granularity/scattered writes or adopting batch asynchronous IO can effectively alleviate off-CPU bottlenecks;" << endl; + cout << " Even with an efficient on-CPU implementation, there will be no significant impact on overall runtime." << endl; + printDivider(); + + return 0; +} \ No newline at end of file diff --git a/example/case/ddrc_case.cpp b/example/case/ddrc_case.cpp new file mode 100644 index 0000000..9ee4b8f --- /dev/null +++ b/example/case/ddrc_case.cpp @@ -0,0 +1,31 @@ +#include +#include +#include +#include + +#define ARRAY_SIZE (1024 * 1024 * 512) // 512MB, ensuring it exceeds L3 cache +#define STRIDE 64 // Memory access stride (simulating cache line access) + +void memory_read_test(std::vector &array) { + volatile int sum = 0; // Prevent compiler optimization + auto start = std::chrono::high_resolution_clock::now(); + + while (true) { // Infinite loop + for (size_t i = 0; i < array.size(); i += STRIDE) { + sum += array[i]; // Memory access operation + } + + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = end - start; + double readCnt = (array.size() * sizeof(int)) / (elapsed.count() * 1024 * 1024 * 1024); // GB/s + + std::cout << "Data throughput: " << readCnt << " GB/s" << std::endl; + start = end; // Reset timer + } +} + +int main() { + std::vector memory_array(ARRAY_SIZE, 1); // Initialize a large array + memory_read_test(memory_array); + return 0; +} \ No newline at end of file diff --git a/example/pmu_hotspot.cpp b/example/pmu_hotspot.cpp index cdfeee3..c3b6626 100644 --- a/example/pmu_hotspot.cpp +++ b/example/pmu_hotspot.cpp @@ -217,7 +217,11 @@ void BlockedSample(int pid, double interval, int count, bool blockedSample) std::cout << std::string(50, '=') << std::endl; std::cout << std::setw(40) << "@symbol" << std::setw(40) << "@module"; std::cout << std::setw(40) << std::right << "@percent" << std::endl; - for (int i = 0; i < hotSpotData.size(); ++i) { + int stackLen = hotSpotData.size(); + if (stackLen > 10) { + stackLen = 10; // Only print top 10 hotspots stack. + } + for (int i = 0; i < stackLen; ++i) { PrintStack(hotSpotData[i].stack, 0, hotSpotData[i].period); } g_totalPeriod = 0; @@ -268,6 +272,7 @@ int main(int argc, char** argv) int count = 0; bool blockedSample = false; int pid = 0; + bool needKill = false; try { interval = std::stod(argv[1]); if (interval <= 0) { @@ -285,6 +290,7 @@ int main(int argc, char** argv) pid = std::stoi(argv[4]); } catch (const std::invalid_argument&) { StartProc(argv[4], pid); + needKill = true; } } catch (const std::exception& e) { std::cerr << "Error parsing arguments: " << e.what() << "\n"; @@ -292,7 +298,8 @@ int main(int argc, char** argv) return EXIT_FAILURE; } BlockedSample(pid, interval, count, blockedSample); - EndProc(pid); - + if (needKill) { + EndProc(pid); + } return 0; } \ No newline at end of file diff --git a/example/pmu_hotspot.go b/example/pmu_hotspot.go index ef3c1bd..8fc5d59 100644 --- a/example/pmu_hotspot.go +++ b/example/pmu_hotspot.go @@ -215,8 +215,12 @@ func blockSample(pid int, interval float64, count int, blockedSample int) { printHotSpotGraph(hotspotData) fmt.Printf(strings.Repeat("=", 50) + "Print the call stack of the hotspot function" + strings.Repeat("=", 50) + "\n") fmt.Printf("% -40s%-40s%+40s\n", "@symbol", "@module", "@percent") - for _, data := range hotspotData { - printStack(data.Symbols, data.Period) + stackLen := len(hotspotData) + if stackLen > 10 { + stackLen = 10 + } + for i := 0; i < stackLen; i++ { + printStack(hotspotData[i].Symbols, hotspotData[i].Period) } GlobalPeriod = 0 } diff --git a/example/pmu_hotspot.py b/example/pmu_hotspot.py index eea4b58..a1ca675 100644 --- a/example/pmu_hotspot.py +++ b/example/pmu_hotspot.py @@ -163,8 +163,9 @@ def blocked_sample(pid, interval, count, blockedSample): print_hotspot_graph(hotspot_data) print("=" * 50 + "Print the call stack of the hotspot function" + "=" * 50) print(f"{'@symbol':<40}{'@module':<40}{'@percent':>40}") - for data in hotspot_data: - print_stack(data.stack, 0, data.period) + stack_len = min(10, len(hotspot_data)) + for i in range(stack_len): + print_stack(hotspot_data[i].stack, 0, hotspot_data[i].period) g_total_period = 0 err = kperf.disable(pd) if err != 0: @@ -214,17 +215,20 @@ def main(): if blockedSample not in (0, 1): raise ValueError("BlockedSample must be 0 or 1.") + need_kill = False try: pid = int(sys.argv[4]) except ValueError: pid = start_proc(sys.argv[4]) + need_kill = True except ValueError as e: print(f"Invalid argument: {e}") print_usage() sys.exit(1) blocked_sample(pid, interval, count, blockedSample) - end_proc(pid) + if need_kill: + end_proc(pid) if __name__ == "__main__": main() \ No newline at end of file diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index 51512a0..ae0847c 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -1087,7 +1087,7 @@ class CtypesPmuData(ctypes.Structure): ('cpu', ctypes.c_int), ('cpuTopo', ctypes.POINTER(CtypesCpuTopology)), ('comm', ctypes.c_char_p), - ('period', ctypes.c_int), + ('period', ctypes.c_uint64), ('count', ctypes.c_uint64), ('countPercent', ctypes.c_double), ('ext', ctypes.POINTER(CtypesPmuDataExt)), @@ -1119,7 +1119,7 @@ class CtypesPmuData(ctypes.Structure): self.cpu = ctypes.c_int(cpu) self.cpuTopo = cpuTopo self.comm = ctypes.c_char_p(comm.encode(UTF_8)) - self.period = ctypes.c_int(period) + self.period = ctypes.c_uint64(period) self.count = ctypes.c_uint64(count) self.countPercent = ctypes.c_double(countPercent) self.ext = ext @@ -1233,7 +1233,7 @@ class ImplPmuData: @period.setter def period(self, period: int) -> None: - self.c_pmu_data.period = ctypes.c_int(period) + self.c_pmu_data.period = ctypes.c_uint64(period) @property def count(self) -> int: -- Gitee From c355086ecf94c112c18960c0639c0a3197e7d348 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Wed, 7 May 2025 15:03:26 +0800 Subject: [PATCH 10/35] =?UTF-8?q?=E7=89=B9=E6=80=A7=E6=96=B0=E5=A2=9E:?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0cpu=E4=B8=BB=E9=A2=91=E7=9A=84=E9=87=87?= =?UTF-8?q?=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go/src/libkperf/kperf/kperf.go | 55 +++++++++ go/src/libkperf_test/libkperf_test.go | 14 +++ include/pcerrc.h | 1 + include/pmu.h | 26 ++++ pmu/cpu_freq.cpp | 171 ++++++++++++++++++++++++++ pmu/cpu_freq.h | 66 ++++++++++ python/modules/_libkperf/Pmu.py | 136 ++++++++++++++++++++ python/modules/kperf/perror.py | 1 + python/modules/kperf/pmu.py | 16 +++ test/test_perf/test_api.cpp | 17 +++ 10 files changed, 503 insertions(+) create mode 100644 pmu/cpu_freq.cpp create mode 100644 pmu/cpu_freq.h diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 2b5958c..4511baa 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -413,6 +413,13 @@ type PmuDeviceDataVo struct { cDeviceData *C.struct_PmuDeviceData } +type PmuCpuFreqDetail struct { + CpuId int // core id + MinFreq uint64 // minimum frequency of core + MaxFreq uint64 // maximum frequency of core + AvgFreq uint64 // average frequency of core +} + // Initialize the collection target // On success, a task id is returned which is the unique identity for the task // On error, -1 is returned @@ -1059,6 +1066,54 @@ func PmuGetCpuFreq(core uint) (int64, error) { return int64(freq), nil } + +// open cpu core freq sampling +// period unit ms +// return error or nil +func PmuOpenCpuFreqSampling(period uint) (error) { + c_period := C.uint32_t(period) + ret := C.PmuOpenCpuFreqSampling(c_period) + if int(ret) == -1 { + return errors.New(C.GoString(C.Perror())) + } + return nil +} + +// close cpu freq sampling +func PmuCloseCpuFreqSampling() { + C.PmuCloseCpuFreqSampling() +} + +// get the maximum frequency,minimum frequency,and average frequency of each core +// param cpuNum +// return PmuCpuFreqDetail array +func PmuReadCpuFreqDetail() ([]PmuCpuFreqDetail) { + cpuNum := C.uint32_t(0) + cpuFreqList := C.PmuReadCpuFreqDetail(&cpuNum) + + if (uint32(cpuNum) == 0) { + return nil + } + + ptr := unsafe.Pointer(cpuFreqList) + slice := reflect.SliceHeader{ + Data: uintptr(ptr), + Len: int(cpuNum), + Cap: int(cpuNum), + } + + cCpuFreqList := *(*[]C.struct_PmuCpuFreqDetail)(unsafe.Pointer(&slice)) + goCpuFreqList := make([]PmuCpuFreqDetail, int(cpuNum)) + + for i, v := range cCpuFreqList { + goCpuFreqList[i].CpuId = int(v.cpuId) + goCpuFreqList[i].MinFreq = uint64(v.minFreq) + goCpuFreqList[i].MaxFreq = uint64(v.maxFreq) + goCpuFreqList[i].AvgFreq = uint64(v.avgFreq) + } + return goCpuFreqList +} + func transferCPmuDataToGoData(cPmuData *C.struct_PmuData, dataLen int, fd int) []PmuData { ptr := unsafe.Pointer(cPmuData) slice := reflect.SliceHeader { diff --git a/go/src/libkperf_test/libkperf_test.go b/go/src/libkperf_test/libkperf_test.go index e64ea6a..28848fe 100644 --- a/go/src/libkperf_test/libkperf_test.go +++ b/go/src/libkperf_test/libkperf_test.go @@ -273,3 +273,17 @@ func TestPmuGetNumaCore(t *testing.T) { t.Logf("coreId has:%v", v) } } + +func TestPmuGetCpuFreqDetail(t *testing.T) { + err := kperf.PmuOpenCpuFreqSampling(100) + if err != nil { + t.Fatalf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v", err) + } + + freqList := kperf.PmuReadCpuFreqDetail() + for _, v := range freqList { + t.Logf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) + } + + kperf.PmuCloseCpuFreqSampling() +} diff --git a/include/pcerrc.h b/include/pcerrc.h index 465e5e4..259befa 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -108,6 +108,7 @@ extern "C" { #define LIBPERF_ERR_INVALID_PMU_BDF_TYPE 1064 #define LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 1065 #define LIBPERF_ERR_NOT_SUPPORT_METRIC 1066 +#define LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD 1067 #define UNKNOWN_ERROR 9999 diff --git a/include/pmu.h b/include/pmu.h index af9bb2a..747259d 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -265,6 +265,13 @@ struct PmuTraceData { const char *comm; // process command }; +struct PmuCpuFreqDetail { + int cpuId; // cpu core id + uint64_t minFreq; // minimum frequency of core + uint64_t maxFreq; // maximum frequency of core + uint64_t avgFreq; // average frequency of core +}; + /** * @brief * Initialize the collection target. @@ -618,6 +625,25 @@ const char** PmuSysCallFuncList(unsigned *numFunc); */ int64_t PmuGetCpuFreq(unsigned core); +/** + * @brief get the maximum frequency,minimum frequency,and average frequency of each core + * @param cpuNum + * @return PmuCpuFreqDetail array of pointers + */ +struct PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum); + +/** + * @brief open cpu core freq sampling + * @param time period unit ms + * @return -1 or 0 + */ +int PmuOpenCpuFreqSampling(unsigned period); + +/** + * @brief close cpu freq sampling + */ +void PmuCloseCpuFreqSampling(); + #pragma GCC visibility pop #ifdef __cplusplus } diff --git a/pmu/cpu_freq.cpp b/pmu/cpu_freq.cpp new file mode 100644 index 0000000..2d2b033 --- /dev/null +++ b/pmu/cpu_freq.cpp @@ -0,0 +1,171 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Li + * Create: 2025-05-07 + * Description: sample cpu freq. + ******************************************************************************/ +#include "cpu_freq.h" +#include "pmu.h" +#include "pcerr.h" + +using namespace pcerr; + +CpuFreqManager* CpuFreqManager::instance = nullptr; +std::mutex CpuFreqManager::singleMutex; +std::mutex CpuFreqManager::initMutex; +std::vector CpuFreqManager::freqDetailList; +bool CpuFreqManager::hasInit = false; + +PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum) { + auto& ds = CpuFreqManager::GetCpuFreqDetail(); + *cpuNum = ds.size(); + return ds.data(); +} + +int PmuOpenCpuFreqSampling(unsigned period) { + return CpuFreqManager::GetInstance()->InitCpuFreqSampling(period); +} + +void PmuCloseCpuFreqSampling() { + CpuFreqManager::Clear(); +} + +void CpuFreqManager::Clear() { + std::lock_guard lock(singleMutex); + if (instance == nullptr) { + return; + } + delete instance; + instance = nullptr; +} + +CpuFreqManager* CpuFreqManager::GetInstance() { + if (instance == nullptr) { + std::lock_guard lock(singleMutex); + if(instance == nullptr) { + instance = new CpuFreqManager(); + } + } + return instance; +} + +int CpuFreqManager::CheckCpuFreqIsExist() { + for(int cpuId = 0; cpuId < MAX_CPU_NUM; cpuId++) { + int64_t freq = PmuGetCpuFreq(cpuId); + if (freq == -1 ) { + return -1; + } + } + return 0; +} + +int CpuFreqManager::CheckSleepPeriod(unsigned period) { + if (period == 0 || period > 10000) { + New(LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD, "invalid period, the period must be less than 10000ms and greater than 0ms"); + return LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD; + } + return SUCCESS; +} + +int CpuFreqManager::InitCpuFreqSampling(unsigned period) { + this->isEnable = true; + if (hasInit) { + return 0; + } + std::lock_guard lock(initMutex); + + if (CheckSleepPeriod(period) != 0) { + return -1; + } + + if (CheckCpuFreqIsExist() != 0) { + return -1; + } + + this->sleepPeriod = static_cast(period) / 1000; + this->cpuFreqThread = std::thread([this]() { + while (!isEnd) { + if (!isEnable) { + continue; + } + std::lock_guard lock(mapMutex); + for (int cpu = 0; cpu < MAX_CPU_NUM; cpu++) { + int64_t freq = PmuGetCpuFreq(cpu); + if (freq == -1) { + continue; + } + if (this->freqListMap.find(cpu) != this->freqListMap.end()) { + this->freqListMap[cpu].push_back(freq); + } else { + std::vector freqList = {freq}; + this->freqListMap.insert({cpu, freqList}); + } + } + sleep(this->sleepPeriod); + } + }); + hasInit = true; + return 0; +} + +void CpuFreqManager::CalFreqDetail() { + isEnable = false; + std::lock_guard lock(mapMutex); + + if(!this->freqListMap.empty()) { + uint64_t maxFreq, minFreq, sumFreq; + for (int cpuId = 0; cpuId < MAX_CPU_NUM; cpuId++) { + std::vector freqList; + minFreq = 0; + maxFreq = 0; + sumFreq = 0; + if (this->freqListMap.find(cpuId) != this->freqListMap.end()) { + minFreq = UINT64_MAX; + freqList = freqListMap[cpuId]; + } + for (const auto& curFreq: freqList) { + minFreq = minFreq > curFreq ? curFreq : minFreq; + maxFreq = maxFreq > curFreq ? maxFreq : curFreq; + sumFreq += curFreq; + } + uint64_t avgFreq = sumFreq / freqList.size(); + PmuCpuFreqDetail detail = {.cpuId=cpuId, .minFreq=minFreq, .maxFreq=maxFreq, .avgFreq=avgFreq}; + freqDetailList.push_back(detail); + } + freqListMap.clear(); + } else { + GetCurFreqDetail(); + } + + isEnable = true; +} + +void CpuFreqManager::GetCurFreqDetail() { + for(int cpuId = 0; cpuId < MAX_CPU_NUM; cpuId++) { + uint64_t freq = PmuGetCpuFreq(cpuId); + if (freq == -1) { + freq = 0; + } + PmuCpuFreqDetail detail = {.cpuId=cpuId, .minFreq=freq, .maxFreq=freq, .avgFreq=freq}; + freqDetailList.push_back(detail); + } +} + +std::vector& CpuFreqManager::GetCpuFreqDetail() { + std::lock_guard lock(initMutex); + freqDetailList.clear(); + if (!hasInit) { + CpuFreqManager::GetCurFreqDetail(); + } else { + CpuFreqManager::GetInstance()->CalFreqDetail(); + } + return freqDetailList; +} \ No newline at end of file diff --git a/pmu/cpu_freq.h b/pmu/cpu_freq.h new file mode 100644 index 0000000..e6aaf96 --- /dev/null +++ b/pmu/cpu_freq.h @@ -0,0 +1,66 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Li + * Create: 2025-05-07 + * Description: sample cpu freq. + ******************************************************************************/ +#ifndef LIBKPERF_CPU_FREQ_H +#define LIBKPERF_CPU_FREQ_H + +#include +#include +#include +#include +#include + +#include "cpu_map.h" + +class CpuFreqManager { +public: + CpuFreqManager(): isEnable(false), isEnd(false), sleepPeriod(0.1) {}; + ~CpuFreqManager() { + std::lock_guard lock(initMutex); + if(!hasInit) { + return; + } + isEnable = false; + isEnd = true; + cpuFreqThread.join(); + hasInit = false; + } + static void Clear(); + static CpuFreqManager* GetInstance(); + static std::vector& GetCpuFreqDetail(); + static void GetCurFreqDetail(); + + int InitCpuFreqSampling(unsigned period); + void CalFreqDetail(); + +private: + static CpuFreqManager* instance; + static std::mutex singleMutex; + static std::mutex initMutex; + static std::vector freqDetailList; + static bool hasInit; + + std::mutex mapMutex; + std::thread cpuFreqThread; + volatile bool isEnable; + volatile bool isEnd; + double sleepPeriod; + std::map> freqListMap; + + int CheckCpuFreqIsExist(); + static int CheckSleepPeriod(unsigned period); +}; + + +#endif //LIBKPERF_CPU_FREQ_H \ No newline at end of file diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index ae0847c..e99eb94 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -1857,6 +1857,138 @@ def PmuSysCallFuncListFree() -> None: c_PmuSysCallFuncListFree() +class CtypesPmuCpuFreqDetail(ctypes.Structure): + """ + struct PmuCpuFreqDetail { + int cpuId; // cpu core id + uint64_t minFreq; // minimum frequency of core + uint64_t maxFreq; // maximum frequency of core + uint64_t avgFreq; // average frequency of core + } + """ + _fields_ = [ + ('cpuId', ctypes.c_int), + ('minFreq', ctypes.c_uint64), + ('maxFreq', ctypes.c_uint64), + ('avgFreq', ctypes.c_uint64), + ] + + def __init__(self, + cpuId: int = 0, + minFreq: int = 0, + maxFreq: int = 0, + avgFreq: int = 0, + *args:Any, **kw: Any) -> None: + super().__init__(*args, **kw) + self.cpuId = ctypes.c_int(cpuId) + self.minFreq = ctypes.c_uint64(minFreq) + self.maxFreq = ctypes.c_uint64(maxFreq) + self.avgFreq = ctypes.c_uint64(avgFreq) + + +class ImplPmuCpuFreqDetail: + __slots__ = ['__c_pmu_cpu_freq_detail'] + def __init__(self, + cpuId: int = 0, + minFreq: int = 0, + maxFreq: int = 0, + avgFreq: int = 0, + *args:Any, **kw: Any) -> None: + self.__c_pmu_cpu_freq_detail = CtypesPmuCpuFreqDetail( + cpuId=cpuId, + minFreq=minFreq, + maxFreq=maxFreq, + avgFreq=avgFreq + ) + + @property + def c_pmu_cpu_freq_detail(self) -> CtypesPmuCpuFreqDetail: + return self.__c_pmu_cpu_freq_detail + + @property + def cpuId(self) -> int: + return self.__c_pmu_cpu_freq_detail.cpuId + + @cpuId.setter + def cpuId(self, cpuId: int) -> None: + self.__c_pmu_cpu_freq_detail.cpuId = ctypes.c_int(cpuId) + + @property + def minFreq(self) -> int: + return self.__c_pmu_cpu_freq_detail.minFreq + + @minFreq.setter + def minFreq(self, minFreq: int) -> None: + self.__c_pmu_cpu_freq_detail.minFreq = ctypes.c_uint64(minFreq) + + @property + def maxFreq(self) -> int: + return self.__c_pmu_cpu_freq_detail.maxFreq + + @maxFreq.setter + def maxFreq(self, maxFreq: int) -> None: + self.__c_pmu_cpu_freq_detail.maxFreq = ctypes.c_uint64(maxFreq) + + @property + def avgFreq(self) -> int: + return self.__c_pmu_cpu_freq_detail.avgFreq + + @avgFreq.setter + def avgFreq(self, avgFreq: int) -> None: + self.__c_pmu_cpu_freq_detail.avgFreq = ctypes.c_uint64(avgFreq) + + @classmethod + def from_c_pmu_cpu_freq_detail(cls, c_pmu_cpu_freq_detail: CtypesPmuCpuFreqDetail) -> 'ImplPmuCpuFreqDetail': + freq_detail = cls() + freq_detail.__c_pmu_cpu_freq_detail = c_pmu_cpu_freq_detail + return freq_detail + + +class PmuCpuFreqDetail: + __slots__ = ['__pointer', '__iter', '__len'] + + def __init__(self, pointer: ctypes.POINTER(CtypesPmuCpuFreqDetail) = None, len: int = 0) -> None: + self.__pointer = pointer + self.__len = len + self.__iter = (ImplPmuCpuFreqDetail.from_c_pmu_cpu_freq_detail(self.__pointer[i]) for i in range(self.__len)) + + @property + def len(self) -> int: + return self.__len + + @property + def iter(self) -> Iterator[ImplPmuCpuFreqDetail]: + return self.__iter + + +def PmuReadCpuFreqDetail() -> PmuCpuFreqDetail: + """ + struct PmuCpuFreqDetail* PmuReadCpuFreqDetail(unsigned* cpuNum); + """ + c_PmuGetCpuFreqDetail = kperf_so.PmuReadCpuFreqDetail + c_PmuGetCpuFreqDetail.argtypes = [] + c_PmuGetCpuFreqDetail.restype = ctypes.POINTER(CtypesPmuCpuFreqDetail) + c_cpu_len = ctypes.c_uint(0) + c_freq_detail_pointer = c_PmuGetCpuFreqDetail(ctypes.byref(c_cpu_len)) + + return PmuCpuFreqDetail(c_freq_detail_pointer, c_cpu_len.value) + +def PmuOpenCpuFreqSampling(period: int) -> None: + """ + int PmuOpenCpuFreqSampling(unsigned period); + """ + c_PmuOpenCpuFreqSampling = kperf_so.PmuOpenCpuFreqSampling + + c_period = ctypes.c_uint(period) + return c_PmuOpenCpuFreqSampling(c_period) + +def PmuCloseCpuFreqSampling() -> None: + """ + void PmuCloseCpuFreqSampling(); + """ + c_PmuCloseCpuFreqSampling = kperf_so.PmuCloseCpuFreqSampling + c_PmuCloseCpuFreqSampling() + __all__ = [ 'CtypesEvtAttr', @@ -1903,4 +2035,8 @@ __all__ = [ 'PmuTraceDataFree', 'PmuSysCallFuncList', 'PmuSysCallFuncListFree', + 'PmuOpenCpuFreqSampling', + 'PmuReadCpuFreqDetail', + 'PmuCloseCpuFreqSampling', + 'PmuCpuFreqDetail', ] diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index 3f897d0..4ba96b2 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -107,6 +107,7 @@ class Error: LIBPERF_ERR_INVALID_PMU_BDF_TYPE = 1064 LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 =1065 LIBPERF_ERR_NOT_SUPPORT_METRIC = 1066 + LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD = 1067 UNKNOWN_ERROR = 9999 diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 975b122..90aeaf9 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -526,6 +526,18 @@ def sys_call_func_list() -> Iterator[str]: """ return _libkperf.PmuSysCallFuncList() +class CpuFreqDetail(_libkperf.PmuCpuFreqDetail): + pass + +def open_cpu_freq_sampling(period: int) -> None: + return _libkperf.PmuOpenCpuFreqSampling(period) + +def close_cpu_freq_sampling() -> None: + return _libkperf.PmuCloseCpuFreqSampling() + +def read_cpu_freq_detail() -> CpuFreqDetail: + return _libkperf.PmuReadCpuFreqDetail() + __all__ = [ 'PmuTaskType', 'PmuEventType', @@ -572,4 +584,8 @@ __all__ = [ 'trace_close', 'sys_call_func_list', 'BranchSampleFilter', + 'CpuFreqDetail', + 'open_cpu_freq_sampling', + 'close_cpu_freq_sampling', + 'read_cpu_freq_detail', ] diff --git a/test/test_perf/test_api.cpp b/test/test_perf/test_api.cpp index 6ae2eea..edb4311 100644 --- a/test/test_perf/test_api.cpp +++ b/test/test_perf/test_api.cpp @@ -17,6 +17,7 @@ #include "util_time.h" #include "process_map.h" #include "common.h" +#include "cpu_map.h" #include "test_common.h" using namespace std; @@ -670,3 +671,19 @@ TEST_F(TestAPI, TestBrBeBadMode) { ASSERT_EQ(pd, -1); ASSERT_EQ(Perrorno(), LIBPERF_ERR_BRANCH_JUST_SUPPORT_SAMPLING); } + +TEST_F(TestAPI, TestCpuFreqSampling) { + int ret = PmuOpenCpuFreqSampling(100); + ASSERT_NE(ret, -1); + PmuCloseCpuFreqSampling(); + + unsigned cpuNum = 0; + PmuCpuFreqDetail* pDetail1 = PmuReadCpuFreqDetail(&cpuNum); + ASSERT_EQ(cpuNum, MAX_CPU_NUM); + ret = PmuOpenCpuFreqSampling(100); + ASSERT_NE(ret, -1); + sleep(2); + PmuCpuFreqDetail* pDetail2 = PmuReadCpuFreqDetail(&cpuNum); + ASSERT_EQ(cpuNum, MAX_CPU_NUM); + PmuCloseCpuFreqSampling(); +} \ No newline at end of file -- Gitee From 489bc7784781037f5891e45ebec31aff01006bc5 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Wed, 14 May 2025 11:44:54 +0800 Subject: [PATCH 11/35] support python3.6 && python whl format --- build.sh | 8 ++++++-- python/CMakeLists.txt | 4 ++-- python/modules/CMakeLists.txt | 7 ++++++- python/modules/setup.py.in | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/build.sh b/build.sh index 77bf34f..0e0ef3b 100644 --- a/build.sh +++ b/build.sh @@ -35,6 +35,7 @@ creat_dir "${BUILD_DIR}" export CC=gcc export CXX=g++ PYTHON_EXE="" +PYTHON_WHL=false if [ -d "${THIRD_PARTY}/local" ];then echo ${THIRD_PARTY}/local "is exist" else @@ -56,6 +57,9 @@ for arg in "$@"; do build_type=*) BUILD_TYPE="${arg#*=}" ;; + whl=*) + WHL="${arg#*=}" + ;; python_exe=*) PYTHON_EXE="${arg#*=}" ;; @@ -100,9 +104,9 @@ build_libkperf() cd $BUILD_DIR # Remove the PYTHON_KPERF warning if [ -z ${PYTHON_EXE} ];then - cmake -DINCLUDE_TEST=${INCLUDE_TEST} -DPYTHON=${PYTHON} -DGO=${GO} -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} .. + cmake -DINCLUDE_TEST=${INCLUDE_TEST} -DPYTHON=${PYTHON} -DPYTHON_WHL=${WHL} -DGO=${GO} -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} .. else - cmake -DINCLUDE_TEST=${INCLUDE_TEST} -DPYTHON=${PYTHON} -DGO=${GO} -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DPYTHON_KPERF=${PYTHON_EXE} .. + cmake -DINCLUDE_TEST=${INCLUDE_TEST} -DPYTHON=${PYTHON} -DPYTHON_WHL=${WHL} -DGO=${GO} -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DPYTHON_KPERF=${PYTHON_EXE} .. fi make -j ${cpu_core_num} make install diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 12a7af4..b35bb89 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -15,8 +15,8 @@ project(python_libkperf) if(DEFINED PYTHON_KPERF AND NOT PYTHON_KPERF STREQUAL "") set(PYTHON_EXECUTABLE ${PYTHON_KPERF}) else() - find_package(PythonInterp 3.7 REQUIRED) - find_package(PythonLibs 3.7 REQUIRED) + find_package(PythonInterp 3.6 REQUIRED) + find_package(PythonLibs 3.6 REQUIRED) endif() message("PYTHON_EXECUTABLE is ${PYTHON_EXECUTABLE}") add_subdirectory(modules) \ No newline at end of file diff --git a/python/modules/CMakeLists.txt b/python/modules/CMakeLists.txt index 40b53a1..43c0f9d 100644 --- a/python/modules/CMakeLists.txt +++ b/python/modules/CMakeLists.txt @@ -20,9 +20,14 @@ configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_SOURCE_DIR}/setup.py ) +if(DEFINED PYTHON_WHL AND PYTHON_WHL) + set(BIN_TYPE "bdist_wheel") +else() + set(BIN_TYPE "install") +endif() add_custom_target(${PROJECT_NAME} ALL - COMMAND ${PYTHON_EXECUTABLE} setup.py install + COMMAND ${PYTHON_EXECUTABLE} setup.py ${BIN_TYPE} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) diff --git a/python/modules/setup.py.in b/python/modules/setup.py.in index 89204b9..5e10f04 100644 --- a/python/modules/setup.py.in +++ b/python/modules/setup.py.in @@ -21,6 +21,6 @@ setup( name='libkperf', version='1.0', packages=find_packages(), - data_files=[('_libkperf', [libkperf_path, libsym_path])] + data_files=[('/_libkperf', [libkperf_path, libsym_path])] ) -- Gitee From f2ece47817af9d7fefef09668989b7ef680be45a Mon Sep 17 00:00:00 2001 From: wangtingwang Date: Thu, 15 May 2025 10:23:15 +0800 Subject: [PATCH 12/35] remove cmake PYTHON_WHL warning --- build.sh | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/build.sh b/build.sh index 0e0ef3b..b4bb419 100644 --- a/build.sh +++ b/build.sh @@ -102,12 +102,22 @@ function build_elfin() { build_libkperf() { cd $BUILD_DIR - # Remove the PYTHON_KPERF warning - if [ -z ${PYTHON_EXE} ];then - cmake -DINCLUDE_TEST=${INCLUDE_TEST} -DPYTHON=${PYTHON} -DPYTHON_WHL=${WHL} -DGO=${GO} -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} .. - else - cmake -DINCLUDE_TEST=${INCLUDE_TEST} -DPYTHON=${PYTHON} -DPYTHON_WHL=${WHL} -DGO=${GO} -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DPYTHON_KPERF=${PYTHON_EXE} .. + # Remove the PYTHON_KPERF && PYTHON_WHL warning + CMAKE_ARGS=() + CMAKE_ARGS+=( + "-DINCLUDE_TEST=${INCLUDE_TEST}" + "-DPYTHON=${PYTHON}" + "-DGO=${GO}" + "-DCMAKE_INSTALL_PREFIX=${INSTALL_PATH}" + "-DCMAKE_BUILD_TYPE=${BUILD_TYPE}" + ) + if [ !-z ${PYTHON_EXE} ];then + CMAKE_ARGS+=("-DPYTHON_KPERF=${PYTHON_EXE}") fi + if [ ${PYTHON} ];then + CMAKE_ARGS+=("-DPYTHON_WHL=${WHL}") + fi + cmake "${CMAKE_ARGS[@]}" .. make -j ${cpu_core_num} make install echo "build libkperf success" -- Gitee From ab5989024c10d3ddf7885c27757c4b375bfb4df3 Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Thu, 15 May 2025 12:22:34 +0800 Subject: [PATCH 13/35] fix build.sh PYTHON_WHL & PYTHON_EXE condition bug --- build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index b4bb419..6fb1300 100644 --- a/build.sh +++ b/build.sh @@ -111,10 +111,10 @@ build_libkperf() "-DCMAKE_INSTALL_PREFIX=${INSTALL_PATH}" "-DCMAKE_BUILD_TYPE=${BUILD_TYPE}" ) - if [ !-z ${PYTHON_EXE} ];then + if [ ! -z ${PYTHON_EXE} ];then CMAKE_ARGS+=("-DPYTHON_KPERF=${PYTHON_EXE}") fi - if [ ${PYTHON} ];then + if [ "${PYTHON}" = "true" ];then CMAKE_ARGS+=("-DPYTHON_WHL=${WHL}") fi cmake "${CMAKE_ARGS[@]}" .. -- Gitee From e606ebb7efcd4938f23edd8448bdab68f893e688 Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Thu, 15 May 2025 15:56:38 +0800 Subject: [PATCH 14/35] fix setup.py.in _libkperf path bug --- python/modules/CMakeLists.txt | 11 +++++++---- python/modules/setup.py.in | 3 ++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/python/modules/CMakeLists.txt b/python/modules/CMakeLists.txt index 43c0f9d..7b8bafc 100644 --- a/python/modules/CMakeLists.txt +++ b/python/modules/CMakeLists.txt @@ -16,16 +16,19 @@ project(python_libkperf) set(LIBKPERF_PATH ${CMAKE_BINARY_DIR}/pmu/libkperf.so) set(LIBSYM_PATH ${CMAKE_BINARY_DIR}/symbol/libsym.so) -configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in - ${CMAKE_CURRENT_SOURCE_DIR}/setup.py -) if(DEFINED PYTHON_WHL AND PYTHON_WHL) set(BIN_TYPE "bdist_wheel") + set(SETUP_LIBKPERF_PATH "/_libkperf") else() set(BIN_TYPE "install") + set(SETUP_LIBKPERF_PATH "_libkperf") endif() +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in + ${CMAKE_CURRENT_SOURCE_DIR}/setup.py +) + add_custom_target(${PROJECT_NAME} ALL COMMAND ${PYTHON_EXECUTABLE} setup.py ${BIN_TYPE} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/python/modules/setup.py.in b/python/modules/setup.py.in index 5e10f04..7e9af74 100644 --- a/python/modules/setup.py.in +++ b/python/modules/setup.py.in @@ -16,11 +16,12 @@ from setuptools import setup, find_packages libkperf_path = '@LIBKPERF_PATH@' libsym_path = '@LIBSYM_PATH@' +_libkperf_path = '@SETUP_LIBKPERF_PATH@' setup( name='libkperf', version='1.0', packages=find_packages(), - data_files=[('/_libkperf', [libkperf_path, libsym_path])] + data_files=[(_libkperf_path, [libkperf_path, libsym_path])] ) -- Gitee From c31860cbae762097cad5df0995429287568bcf15 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 19 May 2025 09:25:21 +0800 Subject: [PATCH 15/35] collect ddr bandwidth by channel --- docs/test_pr.md | 0 go/src/libkperf/kperf/kperf.go | 19 +++++++ include/pmu.h | 9 +++- pmu/pmu_metric.cpp | 95 +++++++++++++++++++++++++++++++-- python/modules/_libkperf/Pmu.py | 49 ++++++++++++++++- python/modules/kperf/pmu.py | 7 +++ python/tests/test_metric.py | 30 +++-------- test/test_perf/test_metric.cpp | 53 ++++-------------- 8 files changed, 188 insertions(+), 74 deletions(-) delete mode 100644 docs/test_pr.md diff --git a/docs/test_pr.md b/docs/test_pr.md deleted file mode 100644 index e69de29..0000000 diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 2b5958c..bc57e94 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -37,6 +37,9 @@ struct MetricDataExt { unsigned coreId; unsigned clusterId; char* bdf; + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; }; void SetPeriod(struct PmuAttr* attr, unsigned period) { @@ -122,6 +125,11 @@ void IPmuGetMetricDataExt(struct PmuDeviceData* deviceData, struct MetricDataExt case PMU_METRIC_CLUSTER: metricData->clusterId = deviceData->clusterId; break; + case PMU_METRIC_CHANNEL: + metricData->channelId = deviceData->channelId; + metricData->ddrNumaId = deviceData->ddrNumaId; + metricData->socketId = deviceData->socketId; + break; } } @@ -291,6 +299,7 @@ var ( PMU_METRIC_NUMA C.enum_PmuMetricMode = C.PMU_METRIC_NUMA PMU_METRIC_CLUSTER C.enum_PmuMetricMode = C.PMU_METRIC_CLUSTER PMU_METRIC_BDF C.enum_PmuMetricMode = C.PMU_METRIC_BDF + PMU_METRIC_CHANNEL C.enum_PmuMetricMode = C.PMU_METRIC_CHANNEL ) var fdModeMap map[int]C.enum_PmuTaskType = make(map[int]C.enum_PmuTaskType) @@ -396,6 +405,12 @@ type PmuDeviceAttr struct { Bdf string } +type DdrDataStructure struct { + ChannelId uint32 + DdrNumaId uint32 + SocketId uint32 +} + type PmuDeviceData struct { Metric C.enum_PmuDeviceMetric // The metric value. The meaning of value depends on metric type. @@ -406,6 +421,7 @@ type PmuDeviceData struct { NumaId uint32 // for pernuma metric ClusterId uint32 // for percluster metric Bdf string // for perpcie metric + DdrDataStructure // for perchannel metric } type PmuDeviceDataVo struct { @@ -983,6 +999,9 @@ func PmuGetDevMetric(dataVo PmuDataVo, deviceAttr []PmuDeviceAttr) (PmuDeviceDat goDeviceList[i].NumaId = uint32(metricDataExt.numaId) goDeviceList[i].ClusterId = uint32(metricDataExt.clusterId) goDeviceList[i].Bdf = C.GoString(metricDataExt.bdf) + goDeviceList[i].ChannelId = uint32(metricDataExt.channelId) + goDeviceList[i].DdrNumaId = uint32(metricDataExt.ddrNumaId) + goDeviceList[i].SocketId = uint32(metricDataExt.socketId) } res.GoDeviceData = goDeviceList res.cDeviceData = metricData diff --git a/include/pmu.h b/include/pmu.h index af9bb2a..0ad0dc8 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -463,7 +463,8 @@ enum PmuMetricMode { PMU_METRIC_CORE, PMU_METRIC_NUMA, PMU_METRIC_CLUSTER, - PMU_METRIC_BDF + PMU_METRIC_BDF, + PMU_METRIC_CHANNEL, }; /** @@ -502,6 +503,12 @@ struct PmuDeviceData { unsigned clusterId; // for perpcie metric char *bdf; + // for perchannel metric of ddr + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index eedf8dd..c7bdde2 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -86,8 +87,9 @@ namespace KUNPENG_PMU { }; set percoreMetric = {PMU_L3_TRAFFIC, PMU_L3_MISS, PMU_L3_REF}; - set pernumaMetric = {PMU_DDR_READ_BW, PMU_DDR_WRITE_BW, PMU_L3_LAT}; + set pernumaMetric = {PMU_L3_LAT}; set perClusterMetric = {PMU_L3_LAT}; + set perChannelMetric = {PMU_DDR_READ_BW, PMU_DDR_WRITE_BW}; set perpcieMetric = {PMU_PCIE_RX_MRD_BW, PMU_PCIE_RX_MWR_BW, PMU_PCIE_TX_MRD_BW, @@ -888,6 +890,11 @@ namespace KUNPENG_PMU { unsigned numaId; unsigned clusterId; char *bdf; + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; @@ -936,7 +943,7 @@ namespace KUNPENG_PMU { switch(metric) { case PMU_DDR_READ_BW: case PMU_DDR_WRITE_BW: - return PMU_METRIC_NUMA; + return PMU_METRIC_CHANNEL; case PMU_L3_LAT: return PMU_METRIC_CLUSTER; case PMU_L3_TRAFFIC: @@ -1064,6 +1071,82 @@ namespace KUNPENG_PMU { return SUCCESS; } + //920B: Ch0~3 -> ddrc0/2/3/5 TB; Ch4~7 -> ddrc0/2/3/5 TA + //920, 920C/E: Ch0~3 -> ddrc0/1/2/3 TB; Ch4~7 -> ddrc0/1/2/3 TA + static unordered_map> DDRC_CHANNEL_MAP = { + {CHIP_TYPE::HIPA, {0, 1, 2, 3}}, + {CHIP_TYPE::HIPB, {0, 2, 3, 5}} + }; + + static void getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) + { + string devName; + string evtName; + GetDeviceName(evt, devName, evtName); + // ddrc channel index. eg: hisi_sccl3_ddrc3_1 --> 3_1 + string ddrcStr = "ddrc"; + size_t ddrcPos = devName.find(ddrcStr); + size_t channelIndex = ddrcPos + ddrcStr.length(); + string ddrcIndexStr = devName.substr(channelIndex); + // find index in DDRC_CHANNEL_MAP. eg: 3_1 --> 3, corresponds to channel 2 in HIPB + size_t separatorPos = ddrcIndexStr.find("_"); + int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); + + unsigned channelAddNum = 0; + if((ddrNumaId & 1) == 1) { // die B, + channelAddNum = 4; + } + CHIP_TYPE chipType = GetCpuType(); //get channel index + auto ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; + auto it = find(ddrcChannelList.begin(), ddrcChannelList.end(), ddrcIndex); + if (it != ddrcChannelList.end()) { + size_t index = distance(ddrcChannelList.begin(), it); + channelId = index + channelAddNum; + } + } + + struct channelKeyHash { + size_t operator()(const tuple& key) const { + auto channelIdHash = hash{}(get<0>(key)); + auto ddrNumaIdHash = hash{}(get<1>(key)); + auto socketIdHash = hash{}(get<2>(key)); + return channelIdHash ^ (ddrNumaIdHash << 1) ^ (socketIdHash << 2); + } + }; + + int AggregateByChannel(const PmuDeviceMetric metric, const vector &rawData, vector &devData) + { + unordered_map, PmuDeviceData, channelKeyHash> devDataByChannel; //Key: channelId, ddrNumaId, socketId + for (auto &data : rawData) { + unsigned channelId; + getChannelId(data.evtName, data.ddrNumaId, channelId); + auto ddrDatakey = make_tuple(channelId, data.ddrNumaId, data.socketId); + auto findData = devDataByChannel.find(ddrDatakey); + if (findData == devDataByChannel.end()) { + PmuDeviceData outData; + outData.metric = data.metric; + outData.count = data.count; + outData.mode = GetMetricMode(data.metric); + outData.channelId = channelId; + outData.ddrNumaId = data.ddrNumaId; + outData.socketId = data.ddrNumaId < 2 ? 0 : 1; // numa id 0-1 --> socket id 0; numa id 2-3 --> socket id 1 + devDataByChannel[ddrDatakey] = outData; + } else { + findData->second.count += data.count; + } + } + + vector, PmuDeviceData>> sortedVec(devDataByChannel.begin(), devDataByChannel.end()); + sort(sortedVec.begin(), sortedVec.end(), [](const auto& a, const auto& b) { + return a.first < b.first; + }); + for (auto &data : sortedVec) { + devData.push_back(data.second); + } + + return SUCCESS; + } + int PcieBWAggregate(const PmuDeviceMetric metric, const vector &rawData, vector &devData) { const auto& deviceConfig = GetDeviceMtricConfig(); @@ -1146,8 +1229,8 @@ namespace KUNPENG_PMU { {PMU_DDR_WRITE_BW, DDRBw}, {PMU_L3_TRAFFIC, L3Bw}}; map aggregateMap = { - {PMU_DDR_READ_BW, AggregateByNuma}, - {PMU_DDR_WRITE_BW, AggregateByNuma}, + {PMU_DDR_READ_BW, AggregateByChannel}, + {PMU_DDR_WRITE_BW, AggregateByChannel}, {PMU_L3_LAT, AggregateByCluster}, {PMU_PCIE_RX_MRD_BW, PcieBWAggregate}, {PMU_PCIE_RX_MWR_BW, PcieBWAggregate}, @@ -1256,6 +1339,10 @@ namespace KUNPENG_PMU { if (perClusterMetric.find(devAttr.metric) != perClusterMetric.end()) { devData.clusterId = pmuData[i].cpuTopo->coreId / clusterWidth; } + if (perChannelMetric.find(devAttr.metric) != pernumaMetric.end()) { + devData.ddrNumaId = pmuData[i].cpuTopo->numaId; + devData.socketId = pmuData[i].cpuTopo->socketId; + } if (IsBdfMetric(devAttr.metric)) { devData.bdf = devAttr.bdf; } diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index 51512a0..0cb863f 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -467,6 +467,12 @@ class PmuDeviceAttr: pmu_device_attr.__c_pmu_device_attr = c_pmu_device_attr return pmu_device_attr +class DdrDataStructure(ctypes.Structure): + _fields_ = [ + ('channelId', ctypes.c_uint), + ('ddrNumaId', ctypes.c_uint), + ('socketId', ctypes.c_uint) + ] class CtypesPmuDeviceData(ctypes.Structure): """ @@ -479,6 +485,11 @@ class CtypesPmuDeviceData(ctypes.Structure): unsigned numaId; unsigned clusterId; char *bdf; + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; """ @@ -487,7 +498,8 @@ class CtypesPmuDeviceData(ctypes.Structure): ('coreId', ctypes.c_uint), ('numaId', ctypes.c_uint), ('clusterId', ctypes.c_uint), - ('bdf', ctypes.c_char_p) + ('bdf', ctypes.c_char_p), + ('_structure', DdrDataStructure) ] _fields_ = [ @@ -521,6 +533,23 @@ class CtypesPmuDeviceData(ctypes.Structure): return self._union.bdf.decode(UTF_8) return "" + @property + def channelId(self) -> int: + if self.mode == 5 and self._union._structure.channelId: # PMU_METRIC_CHANNEL + return self._union._structure.channelId + return 0 + + @property + def ddrNumaId(self) -> int: + if self.mode == 5 and self._union._structure.ddrNumaId: # PMU_METRIC_CHANNEL + return self._union._structure.ddrNumaId + return 0 + + @property + def socketId(self) -> int: + if self.mode == 5 and self._union._structure.socketId: # PMU_METRIC_CHANNEL + return self._union._structure.socketId + return 0 class ImplPmuDeviceData: __slots__ = ['__c_pmu_device_data'] @@ -574,6 +603,24 @@ class ImplPmuDeviceData: return self.c_pmu_device_data._union.bdf.decode(UTF_8) return "" + @property + def channelId(self) -> int: + if self.mode == 5 and self.c_pmu_device_data._union._structure.channelId: # PMU_METRIC_CHANNEL + return self.c_pmu_device_data._union._structure.channelId + return 0 + + @property + def ddrNumaId(self) -> int: + if self.mode == 5 and self.c_pmu_device_data._union._structure.ddrNumaId: # PMU_METRIC_CHANNEL + return self.c_pmu_device_data._union._structure.ddrNumaId + return 0 + + @property + def socketId(self) -> int: + if self.mode == 5 and self.c_pmu_device_data._union._structure.socketId: # PMU_METRIC_CHANNEL + return self.c_pmu_device_data._union._structure.socketId + return 0 + @classmethod def from_c_pmu_device_data(cls, c_pmu_device_data: CtypesPmuDeviceData) -> 'ImplPmuDeviceData': pmu_device_data = cls() diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 975b122..e52d82d 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -173,6 +173,7 @@ class PmuMetricMode: PMU_METRIC_NUMA = 2 PMU_METRIC_CLUSTER = 3 PMU_METRIC_BDF = 4 + PMU_METRIC_CHANNEL = 5 class ImplPmuDeviceData(_libkperf.ImplPmuDeviceData): pass @@ -193,6 +194,12 @@ class PmuDeviceData(_libkperf.PmuDeviceData): unsigned numaId; // for perpcie metric char *bdf; + // for perchannel metric of ddr + struct { + unsigned channelId; + unsigned ddrNumaId; + unsigned socketId; + }; }; }; """ diff --git a/python/tests/test_metric.py b/python/tests/test_metric.py index 4a878ae..bf653ce 100644 --- a/python/tests/test_metric.py +++ b/python/tests/test_metric.py @@ -112,7 +112,8 @@ def test_get_numa_cores(): def test_collect_ddr_bandwidth(): dev_attr = [ - kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_READ_BW) + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_READ_BW), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW) ] pd = kperf.device_open(dev_attr) print(kperf.error()) @@ -125,9 +126,10 @@ def test_collect_ddr_bandwidth(): dev_data = None dev_data = kperf.get_device_metric(ori_data, dev_attr) - assert len(dev_data) == 4 - assert dev_data[0].numaId == 0 - assert dev_data[0].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + assert dev_data[0].count != 0 + assert dev_data[0].metric == kperf.PmuDeviceMetric.PMU_DDR_READ_BW + assert dev_data[0].mode == kperf.PmuMetricMode.PMU_METRIC_CHANNEL + assert dev_data[len(dev_data) - 1].metric == kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW print_dev_data_details(dev_data) kperf.close(pd) @@ -150,26 +152,6 @@ def test_collect_l3_latency(): print_dev_data_details(dev_data) kperf.close(pd) -def test_collect_l3_latency_and_ddr(): - dev_attr = [ - kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_L3_LAT), - kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW) - ] - pd = kperf.device_open(dev_attr) - print(kperf.error()) - assert pd != -1, f"Expected non-negative pd, but got {pd}" - kperf.enable(pd) - time.sleep(1) - kperf.disable(pd) - ori_data = kperf.read(pd) - assert len(ori_data) != -1, f"Expected non-negative ori_len, but got {len(ori_data)}" - - dev_data = kperf.get_device_metric(ori_data, dev_attr) - assert len(dev_data) == get_cluster_nums() + 4 - print_dev_data_details(dev_data) - kperf.close(pd) - - def test_collect_l3_traffic(): dev_attr = [ kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_L3_TRAFFIC) diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index 56bee10..d10ca39 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -104,9 +104,10 @@ TEST_F(TestMetric, GetNumaIdList) TEST_F(TestMetric, CollectDDRBandwidth) { - PmuDeviceAttr devAttr = {}; - devAttr.metric = PMU_DDR_READ_BW; - int pd = PmuDeviceOpen(&devAttr, 1); + PmuDeviceAttr devAttr[2] = {}; + devAttr[0].metric = PMU_DDR_READ_BW; + devAttr[1].metric = PMU_DDR_WRITE_BW; + int pd = PmuDeviceOpen(devAttr, 2); cout << Perror() << endl; ASSERT_NE(pd, -1); PmuEnable(pd); @@ -117,16 +118,11 @@ TEST_F(TestMetric, CollectDDRBandwidth) ASSERT_NE(oriLen, -1); PmuDeviceData *devData = nullptr; - auto len = PmuGetDevMetric(oriData, oriLen, &devAttr, 1, &devData); - ASSERT_EQ(len, 4); - ASSERT_EQ(devData[0].numaId, 0); - ASSERT_EQ(devData[0].mode, PMU_METRIC_NUMA); - ASSERT_EQ(devData[1].numaId, 1); - ASSERT_EQ(devData[1].mode, PMU_METRIC_NUMA); - ASSERT_EQ(devData[2].numaId, 2); - ASSERT_EQ(devData[2].mode, PMU_METRIC_NUMA); - ASSERT_EQ(devData[3].numaId, 3); - ASSERT_EQ(devData[3].mode, PMU_METRIC_NUMA); + auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); + ASSERT_NE(devData[0].count, 0); + ASSERT_EQ(devData[0].mode, PMU_METRIC_CHANNEL); + ASSERT_EQ(devData[0].metric, PMU_DDR_READ_BW); + ASSERT_EQ(devData[len - 1].metric, PMU_DDR_WRITE_BW); DevDataFree(devData); PmuDataFree(oriData); PmuClose(pd); @@ -160,37 +156,6 @@ TEST_F(TestMetric, CollectL3Latency) PmuClose(pd); } -TEST_F(TestMetric, CollectL3LatencyAndDDR) -{ - PmuDeviceAttr devAttr[2] = {}; - devAttr[0].metric = PMU_L3_LAT; - devAttr[1].metric = PMU_DDR_WRITE_BW; - - int pd = PmuDeviceOpen(devAttr, 2); - cout << Perror() << endl; - ASSERT_NE(pd, -1); - PmuEnable(pd); - sleep(1); - PmuDisable(pd); - PmuData* oriData = nullptr; - int oriLen = PmuRead(pd, &oriData); - ASSERT_NE(oriLen, -1); - - PmuDeviceData *devData = nullptr; - auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); - unsigned clusterCount = GetClusterCount(); - unsigned numaCount = GetNumaNodeCount(); - ASSERT_EQ(len, clusterCount + numaCount); - ASSERT_NE(devData[0].count, 0); - ASSERT_EQ(devData[0].metric, PMU_L3_LAT); - ASSERT_EQ(devData[0].mode, PMU_METRIC_CLUSTER); - ASSERT_EQ(devData[clusterCount].metric, PMU_DDR_WRITE_BW); - ASSERT_EQ(devData[clusterCount].mode, PMU_METRIC_NUMA); - DevDataFree(devData); - PmuDataFree(oriData); - PmuClose(pd); -} - TEST_F(TestMetric, CollectL3Traffic) { PmuDeviceAttr devAttr = {}; -- Gitee From d23ca52009b55ccf35eedd84c819af73cdda8e84 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 19 May 2025 09:33:09 +0800 Subject: [PATCH 16/35] collect ddr bandwidth by channel --- include/pmu.h | 2 +- pmu/pmu_metric.cpp | 39 ++++++++++++++++++++++++--------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/include/pmu.h b/include/pmu.h index 0ad0dc8..17baf67 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -464,7 +464,7 @@ enum PmuMetricMode { PMU_METRIC_NUMA, PMU_METRIC_CLUSTER, PMU_METRIC_BDF, - PMU_METRIC_CHANNEL, + PMU_METRIC_CHANNEL }; /** diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index c7bdde2..8fc5d12 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -107,7 +107,7 @@ namespace KUNPENG_PMU { if (it != MetricToString.end()) { return it->second; } - return ""; + return ""; } using PMU_METRIC_PAIR = std::pair; @@ -302,7 +302,7 @@ namespace KUNPENG_PMU { { CHIP_TYPE chipType = GetCpuType(); if (UNCORE_METRIC_CONFIG_MAP.find(chipType) == UNCORE_METRIC_CONFIG_MAP.end()) { - return {}; + return {}; } return UNCORE_METRIC_CONFIG_MAP.at(chipType); } @@ -1071,18 +1071,18 @@ namespace KUNPENG_PMU { return SUCCESS; } - //920B: Ch0~3 -> ddrc0/2/3/5 TB; Ch4~7 -> ddrc0/2/3/5 TA - //920, 920C/E: Ch0~3 -> ddrc0/1/2/3 TB; Ch4~7 -> ddrc0/1/2/3 TA static unordered_map> DDRC_CHANNEL_MAP = { {CHIP_TYPE::HIPA, {0, 1, 2, 3}}, {CHIP_TYPE::HIPB, {0, 2, 3, 5}} }; - static void getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) + static bool getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) { string devName; string evtName; - GetDeviceName(evt, devName, evtName); + if (!GetDeviceName(evt, devName, evtName)) { + return false; + } // ddrc channel index. eg: hisi_sccl3_ddrc3_1 --> 3_1 string ddrcStr = "ddrc"; size_t ddrcPos = devName.find(ddrcStr); @@ -1093,34 +1093,41 @@ namespace KUNPENG_PMU { int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); unsigned channelAddNum = 0; - if((ddrNumaId & 1) == 1) { // die B, + if((ddrNumaId & 1) == 1) { // channel id + 4 in sequence channelAddNum = 4; } CHIP_TYPE chipType = GetCpuType(); //get channel index + if (DDRC_CHANNEL_MAP.find(chipType) == DDRC_CHANNEL_MAP.end()) { + return false; + } auto ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; auto it = find(ddrcChannelList.begin(), ddrcChannelList.end(), ddrcIndex); if (it != ddrcChannelList.end()) { size_t index = distance(ddrcChannelList.begin(), it); channelId = index + channelAddNum; + return true; } + return false; } struct channelKeyHash { size_t operator()(const tuple& key) const { - auto channelIdHash = hash{}(get<0>(key)); - auto ddrNumaIdHash = hash{}(get<1>(key)); - auto socketIdHash = hash{}(get<2>(key)); - return channelIdHash ^ (ddrNumaIdHash << 1) ^ (socketIdHash << 2); + auto socketIdHash = hash{}(get<0>(key)); + auto channelIdHash = hash{}(get<1>(key)); + auto ddrNumaIdHash = hash{}(get<2>(key)); + return socketIdHash ^ (channelIdHash << 1) ^ (ddrNumaIdHash << 2); } }; int AggregateByChannel(const PmuDeviceMetric metric, const vector &rawData, vector &devData) { - unordered_map, PmuDeviceData, channelKeyHash> devDataByChannel; //Key: channelId, ddrNumaId, socketId + unordered_map, PmuDeviceData, channelKeyHash> devDataByChannel; //Key: socketId, channelId, ddrNumaId for (auto &data : rawData) { unsigned channelId; - getChannelId(data.evtName, data.ddrNumaId, channelId); - auto ddrDatakey = make_tuple(channelId, data.ddrNumaId, data.socketId); + if (!getChannelId(data.evtName, data.ddrNumaId, channelId)) { + continue; + } + auto ddrDatakey = make_tuple(data.socketId, channelId, data.ddrNumaId); auto findData = devDataByChannel.find(ddrDatakey); if (findData == devDataByChannel.end()) { PmuDeviceData outData; @@ -1137,7 +1144,9 @@ namespace KUNPENG_PMU { } vector, PmuDeviceData>> sortedVec(devDataByChannel.begin(), devDataByChannel.end()); - sort(sortedVec.begin(), sortedVec.end(), [](const auto& a, const auto& b) { + sort(sortedVec.begin(), sortedVec.end(), []( + const pair, PmuDeviceData>& a, + const pair, PmuDeviceData>& b) { return a.first < b.first; }); for (auto &data : sortedVec) { -- Gitee From e7a5dde133df7fd60c1c07f33be11c525c3acbea Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Tue, 20 May 2025 09:14:07 +0800 Subject: [PATCH 17/35] Add docs: collect ddr bandwidth by channel --- docs/Details_Usage.md | 49 ++++++++++++++++++++++++++----------------- docs/Go_API.md | 8 ++++++- docs/Python_API.md | 7 ++++++- 3 files changed, 43 insertions(+), 21 deletions(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 815f4d8..9816c29 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -600,7 +600,7 @@ pmu_attr = kperf.PmuAttr(evtList=evtList, includeNewFork=True) 注意,该功能是针对Counting模式,因为Sampling和SPE Sampling本身就会采集子线程的数据。 ### 采集DDRC带宽 -鲲鹏上提供了DDRC的pmu设备,用于采集DDR的性能数据,比如带宽等。libkperf提供了API,用于获取每个numa的DDR带宽数据。 +鲲鹏上提供了DDRC的pmu设备,用于采集DDR的性能数据,比如带宽等。libkperf提供了API,用于获取每个channel的DDR带宽数据。 参考代码: ```c++ @@ -620,15 +620,17 @@ PmuData *oriData = nullptr; int oriLen = PmuRead(pd, &oriData); PmuDeviceData *devData = nullptr; auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); -// 对于4个numa的服务器,devData的长度为8.前4个是读带宽,后4个是写带宽。 -for (int i=0;i<4;++i) { - // numaId表示数据对应的numa节点。 +// devData的长度为2 * n (总通道数)。前n个是读带宽,后n个是写带宽。 +for (int i = 0; i < len / 2; ++i) { + // socketId表示数据对应的socket节点。 + // ddrNumaId表示数据对应的numa节点。 + // channelID表示数据对应的通道ID。 // count是距离上次采集的DDR总读/写包长,单位是Byte, // 需要除以时间间隔得到带宽(这里的时间间隔是1秒)。 - cout << "read bandwidth(" << devData[i].numaId << "): " << devData[i].count/1024/1024 << "M/s\n"; + cout << "read bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } -for (int i=4;i<8;++i) { - cout << "write bandwidth(" << devData[i].numaId << "): " << devData[i].count/1024/1024 << "M/s\n"; +for (int i = len / 2; i < len; ++i) { + cout << "write bandwidth(Socket: " << devData[i].socketId << " Numa: " << devData[i].ddrNumaId << " Channel: " << devData[i].channelId << "): " << devData[i].count/1024/1024 << "M/s\n"; } DevDataFree(devData); PmuDataFree(oriData); @@ -649,9 +651,9 @@ ori_data = kperf.read(pd) dev_data = kperf.get_device_metric(ori_data, dev_attr) for data in dev_data.iter: if data.metric == kperf.PmuDeviceMetric.PMU_DDR_READ_BW: - print(f"read bandwidth({data.numaId}): {data.count/1024/1024} M/s") + print(f"read bandwidth(Socket: {data.socketId} Numa: {data.ddrNumaId} Channel: {data.channelId}): {data.count/1024/1024} M/s") if data.metric == kperf.PmuDeviceMetric.PMU_DDR_WRITE_BW: - print(f"write bandwidth({data.numaId}): {data.count/1024/1024} M/s") + print(f"write bandwidth(Socket: {data.socketId} Numa: {data.ddrNumaId} Channel: {data.channelId}): {data.count/1024/1024} M/s") ``` ```go @@ -665,10 +667,10 @@ dataVo, _ := kperf.PmuRead(fd) deivceDataVo, _ := kperf.PmuGetDevMetric(dataVo, deviceAttrs) for _, v := range deivceDataVo.GoDeviceData { if v.Metric == kperf.PMU_DDR_READ_BW { - fmt.Printf("read bandwidth(%v): %v M/s\n", v.NumaId, v.Count/1024/1024) + fmt.Printf("read bandwidth(Socket: %v Numa: %v Channel: %v): %v M/s\n", v.SocketId, v.DdrNumaId, v.ChannelId, v.Count/1024/1024) } if v.Metric == kperf.PMU_DDR_WRITE_BW { - fmt.Printf("write bandwidth(%v): %v M/s\n", v.NumaId, v.Count/1024/1024) + fmt.Printf("write bandwidth(Socket: %v Numa: %v Channel: %v): %v M/s\n", v.SocketId, v.DdrNumaId, v.ChannelId, v.Count/1024/1024) } } kperf.DevDataFree(deivceDataVo) @@ -678,14 +680,23 @@ kperf.PmuClose(fd) 执行上述代码,输出的结果类似如下: ``` -read bandwidth(0): 17.32 M/s -read bandwidth(1): 5.43 M/s -read bandwidth(2): 2.83 M/s -read bandwidth(3): 4.09 M/s -write bandwidth(0): 4.35 M/s -write bandwidth(1): 2.29 M/s -write bandwidth(2): 0.84 M/s -write bandwidth(3): 0.97 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 0): 6.08 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 1): 5.66 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 2): 6.23 M/s +read bandwidth(Socket: 0 Numa: 0 Channel: 3): 5.30 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 4): 4.21 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 5): 4.06 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 6): 3.99 M/s +read bandwidth(Socket: 0 Numa: 1 Channel: 7): 3.89 M/s +... +write bandwidth(Socket: 1 Numa: 2 Channel: 1): 1.49 M/s +write bandwidth(Socket: 1 Numa: 2 Channel: 2): 1.44 M/s +write bandwidth(Socket: 1 Numa: 2 Channel: 3): 1.39 M/s +write bandwidth(Socket: 1 Numa: 2 Channel: 4): 1.22 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 4): 1.44 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 5): 1.43 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 6): 1.40 M/s +write bandwidth(Socket: 1 Numa: 3 Channel: 7): 1.38 M/s ``` ### 采集L3 cache的时延 diff --git a/docs/Go_API.md b/docs/Go_API.md index fa1ba8f..18ca6ad 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -370,14 +370,20 @@ func PmuGetDevMetric(dataVo PmuDataVo, deviceAttr []PmuDeviceAttr) (PmuDeviceDat * []PmuDeviceAttr: 指定需要聚合的指标参数 * typ PmuDeviceDataVo struct: * GoDeviceData []PmuDeviceData +* type DdrDataStructure struct { + ChannelId uint32 ddr数据的channel编号 + DdrNumaId uint32 ddr数据的numa编号 + SocketId uint32 ddr数据的socket编号 + } * type PmuDeviceData struct: * Metric C.enum_PmuDeviceMetric 采集的指标 * Count float64 指标的计数值 - * Mode C.enum_PmuMetricMode 指标的采集类型,按core、按numa还是按bdf号 + * Mode C.enum_PmuMetricMode 指标的采集类型,按core、按numa、按channel还是按bdf号 * CoreId uint32 数据的core编号 * NumaId uint32 数据的numa编号 * ClusterId uint32 簇ID * Bdf string 数据的bdf编号 + * DdrDataStructure ddr相关的统计数据 ### kperf.DevDataFree diff --git a/docs/Python_API.md b/docs/Python_API.md index 1ed8766..3234f77 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -365,14 +365,19 @@ kperf.get_device_metric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) 对 * len: 数据长度 * iter: 返回iterator[ImplPmuDeviceData] * free: 释放当前PmuDeviceData +* class DdrDataStructure: + * channelId: ddr数据的channel编号 + * ddrNumaId: ddr数据的numa编号 + * socketId: ddr数据的socket编号 * class ImplPmuDeviceData: * metric: 采集的指标 * count:指标的计数值 - * mode: 指标的采集类型,按core、按numa还是按bdf号 + * mode: 指标的采集类型,按core、按numa、按channel还是按bdf号 * union: * coreId: 数据的core编号 * numaId: 数据的numa编号 * bdf: 数据的bdf编号 + * DdrDataStructure: ddr相关的统计数据 ### kperf.device_bdf_list -- Gitee From 4290afc99fdd924b8be1bdf67ee29f0cf888f42e Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Tue, 20 May 2025 19:41:23 +0800 Subject: [PATCH 18/35] Add llc_miss_ratio collect demo --- example/llc_miss_ratio.cpp | 346 +++++++++++++++++++++++++++++++++++++ 1 file changed, 346 insertions(+) create mode 100644 example/llc_miss_ratio.cpp diff --git a/example/llc_miss_ratio.cpp b/example/llc_miss_ratio.cpp new file mode 100644 index 0000000..5891f5d --- /dev/null +++ b/example/llc_miss_ratio.cpp @@ -0,0 +1,346 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: + * Create: 2025-05-13 + * Description: Collection capability for ddrc and l3c + * Current capability: Top-N thread sort of l3c_cache_miss ratio + ******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pcerrc.h" +#include "pmu.h" +#include "symbol.h" + +static std::map numaTotalDDRC; //numa Id --> average ddrc bandwidth +static std::unordered_map numaToCpuCore; //numa Id --> cpu core ids +static std::unordered_map numaToCpuNumber; //numa Id --> length of cpu cores +static std::vector pidBoundCpus; //bounded cpus of designated pid +static unsigned numaNum = 0; //number of NUMAs + +const int FLOAT_PRECISION = 2; +const int TIME_UNIT_TRANS = 1000; + +uint64_t topNum = 0; +uint64_t duration = 0; +uint64_t period = 0; + +void totalDDRCBandwidth() +{ + PmuDeviceAttr devAttr[2]; + devAttr[0].metric = PMU_DDR_READ_BW; + devAttr[1].metric = PMU_DDR_WRITE_BW; + int pd = PmuDeviceOpen(devAttr, 2); + PmuEnable(pd); + sleep(1); + PmuData *oriData = nullptr; + int oriLen = PmuRead(pd, &oriData); + PmuDeviceData *devData = nullptr; + auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); + std::unordered_map stats; + for (int i = 0; i < len; ++i) { + stats[devData[i].ddrNumaId] += devData[i].count / 1024 / 1024; + } + for (const auto &entry : stats) { + int id = entry.first; + double sum = entry.second; + numaTotalDDRC[id] = sum; + } + numaNum = numaTotalDDRC.size(); + DevDataFree(devData); + PmuDataFree(oriData); + PmuDisable(pd); +} + +// get numaId --> cpu core ids +void initNumaToCoreList() +{ + unsigned *coreList; + for (unsigned i = 0; i < numaNum; ++i) { + coreList = nullptr; + int len = PmuGetNumaCore(i, &coreList); + numaToCpuCore[i] = coreList; + numaToCpuNumber[i] = len; + } +} + +// parse the CPU core range in the format "0-255" or "0-3,5" +std::vector parseCpuRange(const std::string &rangeStr) +{ + std::vector cpus; + std::stringstream ss(rangeStr); + std::string part; + + while(getline(ss, part, ',')) { + size_t hyphen_pos = part.find("-"); + if (hyphen_pos != std::string::npos) { + int start = std::stoi(part.substr(0, hyphen_pos)); + int end = std::stoi(part.substr(hyphen_pos + 1)); + if (start > end) { + std::cerr << "Invalid CPU range: " << part << std::endl; + } + for (int i = start; i <= end; ++i) { + cpus.push_back(i); + } + } else { + cpus.push_back(std::stoi(part)); + } + } + + std::sort(cpus.begin(), cpus.end()); + cpus.erase(unique(cpus.begin(), cpus.end()), cpus.end()); + return cpus; +} + +// get cpu core of pid from /proc/[pid]/stat +std::string getCpuAffinityList(int pid) +{ + std::string path = "/proc/" + std::to_string(pid) + "/status"; + std::ifstream in(path); + if (!in.is_open()) { + std::cerr << "Not found: " << path << std::endl; + return ""; + } + std::string line; + const std::string targetKey = "Cpus_allowed_list:"; + while (getline(in, line)) { + if (line.find(targetKey) == 0) { + size_t pos = line.find("\t"); + if (pos == std::string::npos) + pos = targetKey.length(); + return line.substr(pos + 1); + } + } +} + +int getCpuCore(int pid) +{ + try { + std::string rangeStr = getCpuAffinityList(pid); + if (rangeStr == "") { + return -1; + } + pidBoundCpus = parseCpuRange(rangeStr); + } catch (const std::exception &e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } +} + +bool hasCommonCpu(const unsigned *cpuArray, size_t arraySize, const std::vector &cpuVector) +{ + if (cpuArray == nullptr || arraySize == 0 || cpuVector.empty()) { + return false; + } + + if (arraySize < cpuVector.size()) { + std::unordered_set arraySet(cpuArray, cpuArray + arraySize); + for (const auto &cpu : cpuVector) { + if (arraySet.count(cpu) > 0) { + return true; + } + } + } else { + std::unordered_set vecSet(cpuVector.begin(), cpuVector.end()); + for (size_t i = 0; i < arraySize; ++i) { + if (vecSet.count(cpuArray[i]) > 0) { + return true; + } + } + } + + return false; +} + +std::string GetL3CMissPercent(unsigned llc_miss, unsigned llc_cache) +{ + std::ostringstream oss; + double ratio = llc_cache != 0 ? static_cast(llc_miss) / llc_cache * 100.0 : 0.0; + oss << std::fixed << std::setprecision(FLOAT_PRECISION) << ratio; + return oss.str(); +} + + +void PrintHotSpotGraph(const std::unordered_map> tidData) +{ + std::vector>> sortedVec(tidData.begin(), tidData.end()); + std::sort(sortedVec.begin(), sortedVec.end(), [](const auto& a, const auto& b) { + double ratioA = (a.second.second == 0) ? 0.0 : static_cast(a.second.first) / a.second.second; + double ratioB = (b.second.second == 0) ? 0.0 : static_cast(b.second.first) / b.second.second; + return ratioA > ratioB; + }); + + std::cout << std::string(100, '=') << std::endl; + std::cout << std::string(100, '-') << std::endl; + std::cout << " " << std::setw(10) << " " << std::setw(20) << std::left << "Tid" << std::setw(20) << "llc_cache_miss" + << std::setw(20) << "llc_cache" << std::setw(20) << "llc_cache_miss_ratio" << std::endl; + std::cout << std::string(100, '-') << std::endl; + + size_t outputNum = std::min(topNum, tidData.size()); + for (int i = 0; i < outputNum; ++i) { + std::cout << " " << std::setw(10) << i << std::setw(20) << std::left << sortedVec[i].first << std::setw(20) + << sortedVec[i].second.first << std::setw(20) << sortedVec[i].second.second << std::setw(20) + << GetL3CMissPercent(sortedVec[i].second.first, sortedVec[i].second.second) + "%" << std::endl; + } + + std::cout << std::string(100, '_') << std::endl; +} + +int GetPmuDataHotspot(PmuData* pmuData, int pmuDataLen) +{ + if (pmuData == nullptr || pmuDataLen == 0) { + return SUCCESS; + } + + std::unordered_map> tidData; //tid --> (0x33, 0x32) + for (int i = 0; i < pmuDataLen; ++i) { + PmuData& data = pmuData[i]; + if (strcmp(data.evt, "r33") == 0) { + tidData[data.tid].first += data.count; + } + if (strcmp(data.evt, "r32") == 0) { + tidData[data.tid].second += data.count; + } + } + PrintHotSpotGraph(tidData); + return SUCCESS; +} + +void collectL3CMissRatio(int pid) { + char* evtList[2]; + evtList[0] = (char*)"r33"; + evtList[1] = (char*)"r32"; + PmuAttr attr = {0}; + attr.evtList = evtList; + attr.numEvt = 2; + attr.pidList = &pid; + attr.numPid = 1; + attr.cpuList = pidBoundCpus.data(); + attr.numCpu = pidBoundCpus.size(); + + int pd = PmuOpen(COUNTING, &attr); + if (pd == -1) { + std::cerr << "PmuOpen failed" << std::endl; + std::cerr << "error msg:" << Perror() << std::endl; + return; + } + + PmuEnable(pd); + int collectTimes = duration * TIME_UNIT_TRANS / period; + for (int i = 0; i < collectTimes; ++i) { + usleep(period * TIME_UNIT_TRANS); + PmuData* pmuData = nullptr; + int len = PmuRead(pd, &pmuData); + if (len == -1) { + std::cerr << "error msg:" << Perror() << std::endl; + return; + } + GetPmuDataHotspot(pmuData, len); + PmuDataFree(pmuData); + } + PmuDisable(pd); + PmuClose(pd); + return; +} + +// g++ -o llc_miss_ratio llc_miss_ratio.cpp -I ./output/include/ -L ./output/lib/ -lkperf -lsym +// export LD_LIBRARY_PATH=/XXX/libkperf/output/lib/:$LD_LIBRARY_PATH +void print_usage() { + std::cerr << "Usage: llc_miss_ratio \n"; + std::cerr << "--threshold : the collect threshold of total ddrc bandwidth, unit M/s\n"; + std::cerr << "--topNum : the top N thread of llc miss ratio collection\n"; + std::cerr << "--duration : the total collect time of llc_miss_ratio, unit s\n"; + std::cerr << "--period : the period of llc_miss_ratio collect, unit ms\n"; + std::cerr << " example: llc_miss_ratio 100 10 10 1000 \n"; +} + +int main(int argc, char** argv) +{ + if (argc < 5) { + print_usage(); + return 0; + } + double threshold = 0.0; + int pid = 0; + bool collectL3CMissFlag = false; + + try { + threshold = std::stod(argv[1]); + if (threshold <= 0) { + throw std::invalid_argument("threshold must be a positive number."); + } + + topNum = std::stod(argv[2]); + if (topNum <= 0) { + throw std::invalid_argument("TopNum must be a positive number."); + } + + duration = std::stod(argv[3]); + if (duration <= 0) { + throw std::invalid_argument("Duration must be a positive number."); + } + + period = std::stoi(argv[4]); + if (period <= 0) { + throw std::invalid_argument("Period must be a positive integer."); + } + + try { + pid = std::stoi(argv[5]); + } catch (const std::invalid_argument& e) { + std::cerr << "Not valid process id: " << e.what() << "\n"; + } + } catch (const std::exception& e) { + std::cerr << "Error parsing arguments: " << e.what() << "\n"; + print_usage(); + return EXIT_FAILURE; + } + + totalDDRCBandwidth(); + initNumaToCoreList(); + if(getCpuCore(pid) == -1) { + return EXIT_FAILURE; + } + + for (const auto &data : numaTotalDDRC) { + std::cout << "Numa ID: " << data.first << ", total bandwidth: " << data.second << "M/s"; + // bandwidth of numa greater than the threshold, check whether bounded cpus of pid correspond to this numa cores + if (data.second > threshold) { + auto cpuCoreList = numaToCpuCore[data.first]; + if (hasCommonCpu(cpuCoreList, numaToCpuNumber[data.first], pidBoundCpus)) { + std::cout << " --> exceed threshold, and the process is running on this numa"; + collectL3CMissFlag = true; + } else { + std::cout << " --> exceed threshold, the process is not running on this numa"; + } + } else { + std::cout << " --> not exceed threshold"; + } + std::cout << std::endl; + } + + if (collectL3CMissFlag) { + collectL3CMissRatio(pid); //begin to collect llc_miss and llc_cache event + } + + return 0; +} \ No newline at end of file -- Gitee From ce2d825c4989751c8adf4a2852999e2728671859 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Wed, 21 May 2025 14:38:19 +0800 Subject: [PATCH 19/35] =?UTF-8?q?uncore=E4=BA=8B=E4=BB=B6umask=E8=B4=9F?= =?UTF-8?q?=E6=95=B0=E6=83=85=E5=86=B5=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/cpu_freq.cpp | 4 ++-- pmu/cpu_freq.h | 4 ++-- pmu/pfm/uncore.cpp | 4 ++++ util/pcerr.cpp | 5 +++++ 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pmu/cpu_freq.cpp b/pmu/cpu_freq.cpp index 2d2b033..6f5a031 100644 --- a/pmu/cpu_freq.cpp +++ b/pmu/cpu_freq.cpp @@ -90,7 +90,7 @@ int CpuFreqManager::InitCpuFreqSampling(unsigned period) { return -1; } - this->sleepPeriod = static_cast(period) / 1000; + this->sleepPeriod = period * 1000; this->cpuFreqThread = std::thread([this]() { while (!isEnd) { if (!isEnable) { @@ -109,7 +109,7 @@ int CpuFreqManager::InitCpuFreqSampling(unsigned period) { this->freqListMap.insert({cpu, freqList}); } } - sleep(this->sleepPeriod); + usleep(this->sleepPeriod); } }); hasInit = true; diff --git a/pmu/cpu_freq.h b/pmu/cpu_freq.h index e6aaf96..0e23e44 100644 --- a/pmu/cpu_freq.h +++ b/pmu/cpu_freq.h @@ -25,7 +25,7 @@ class CpuFreqManager { public: - CpuFreqManager(): isEnable(false), isEnd(false), sleepPeriod(0.1) {}; + CpuFreqManager(): isEnable(false), isEnd(false), sleepPeriod(100) {}; ~CpuFreqManager() { std::lock_guard lock(initMutex); if(!hasInit) { @@ -55,7 +55,7 @@ private: std::thread cpuFreqThread; volatile bool isEnable; volatile bool isEnd; - double sleepPeriod; + unsigned int sleepPeriod; std::map> freqListMap; int CheckCpuFreqIsExist(); diff --git a/pmu/pfm/uncore.cpp b/pmu/pfm/uncore.cpp index e6fca1a..f02e079 100644 --- a/pmu/pfm/uncore.cpp +++ b/pmu/pfm/uncore.cpp @@ -61,6 +61,10 @@ static std::vector GetCpuMask(const string &devName) char maskStr[1024]; maskIn >> maskStr; + if (maskStr[0] == '0' || maskStr[0] == '-') { + return maskList; + } + char *tokStr = strtok(maskStr, ","); while (tokStr != nullptr) { if (strstr(tokStr, "-") != nullptr) { diff --git a/util/pcerr.cpp b/util/pcerr.cpp index 7d70a3e..3efc851 100644 --- a/util/pcerr.cpp +++ b/util/pcerr.cpp @@ -14,6 +14,7 @@ ******************************************************************************/ #include #include +#include #include "pcerrc.h" #include "pcerr.h" @@ -64,6 +65,8 @@ namespace pcerr { static std::string warnMsg = ""; static int errCode = SUCCESS; static std::string errMsg = ""; + static std::mutex errMutex; + static std::mutex warnMutex; static std::string GetCustomMsg(int code) { std::string msg; @@ -92,6 +95,7 @@ namespace pcerr { void New(int code, const std::string& msg) { + std::lock_guard lock(errMutex); errCode = code; errMsg = msg; } @@ -108,6 +112,7 @@ namespace pcerr { void SetWarn(int code, const std::string& msg) { + std::lock_guard lock(warnMutex); warnCode = code; warnMsg = msg; } -- Gitee From 7dcce8721fd31a9a82b9a6426de5c20647286590 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Thu, 22 May 2025 14:50:35 +0800 Subject: [PATCH 20/35] corrected information and spelling errors --- README.en.md | 2 +- README.md | 2 +- docs/Details_Usage.md | 4 +- docs/Go_API.md | 4 +- docs/Python_API.md | 8 +- example/llc_miss_ratio.cpp | 346 -------------------------- go/src/libkperf_test/libkperf_test.go | 2 +- pmu/pmu.cpp | 2 +- 8 files changed, 12 insertions(+), 358 deletions(-) delete mode 100644 example/llc_miss_ratio.cpp diff --git a/README.en.md b/README.en.md index 503138f..cae1f61 100644 --- a/README.en.md +++ b/README.en.md @@ -57,7 +57,7 @@ Minimum required GCC version: Minimum required Python version: -- python-3.7. +- python-3.6. To build a library with C API: diff --git a/README.md b/README.md index ee59dc4..e0441bc 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ v1.0: - gcc-4.8.5 和 glibc-2.17 最低依赖python版本: -- python-3.7 +- python-3.6 编译生成动态库和C的API: ```shell diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 9816c29..a8035ff 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -41,7 +41,7 @@ func main() { attr := kperf.PmuAttr{EvtList:[]string{"cycles", "branch-misses"}} pd, err := kperf.PmuOpen(kperf.COUNT, attr) if err != nil { - fmt.Printf("kperf pmuopen couting failed, expect err is nil, but is %v", err) + fmt.Printf("kperf pmuopen counting failed, expect err is nil, but is %v", err) return } } @@ -331,7 +331,7 @@ func main() { attr := kperf.PmuAttr{EvtList:evtList} pd, err := kperf.PmuOpen(kperf.COUNT, attr) if err != nil { - fmt.Printf("kperf pmuopen couting failed, expect err is nil, but is %v\n", err) + fmt.Printf("kperf pmuopen counting failed, expect err is nil, but is %v\n", err) return } } diff --git a/docs/Go_API.md b/docs/Go_API.md index 18ca6ad..bc5e4d8 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -335,8 +335,8 @@ func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事 * type PmuDeviceAttr struct: * Metic: 指定需要采集的指标 - * PMU_DDR_READ_BW 采集每个numa的ddrc的读带宽,单位:Bytes - * PMU_DDR_WRITE_BW 采集每个numa的ddrc的写带宽,单位:Bytes + * PMU_DDR_READ_BW 采集每个channel的ddrc的读带宽,单位:Bytes + * PMU_DDR_WRITE_BW 采集每个channel的ddrc的写带宽,单位:Bytes * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count diff --git a/docs/Python_API.md b/docs/Python_API.md index 3234f77..ddf8d38 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -5,7 +5,7 @@ kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) * class PmuTaskType - * COUTING PMU计数模式 + * COUNTING PMU计数模式 * SAMPLING PMU采样模式 * SPE_SAMPLING SPE采样模式 * class PmuAttr @@ -51,7 +51,7 @@ kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) * SPE_EVENT_MISPREDICTED = 0x80 # mispredict * minLatency 仅收集该latency或者更高的样本数据 * includeNewFork - 是否支持子线程拆分,仅在COUTING模式中支持 + 是否支持子线程拆分,仅在COUNTING模式中支持 * branchSampleFilter * KPERF_NO_BRANCH_SAMPLE = 0 不采集branch sample stack数据 * KPERF_SAMPLE_BRANCH_USER = 1 << 0 分支目标位于用户空间 @@ -330,8 +330,8 @@ for func_name in kperf.sys_call_func_list(): kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指标的能力 * class PmuDeviceAttr: * metic: 指定需要采集的指标 - * PMU_DDR_READ_BW 采集每个numa的ddrc的读带宽,单位:Bytes - * PMU_DDR_WRITE_BW 采集每个numa的ddrc的写带宽,单位:Bytes + * PMU_DDR_READ_BW 采集每个channel的ddrc的读带宽,单位:Bytes + * PMU_DDR_WRITE_BW 采集每个channel的ddrc的写带宽,单位:Bytes * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count diff --git a/example/llc_miss_ratio.cpp b/example/llc_miss_ratio.cpp deleted file mode 100644 index 5891f5d..0000000 --- a/example/llc_miss_ratio.cpp +++ /dev/null @@ -1,346 +0,0 @@ -/****************************************************************************** - * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. - * libkperf licensed under the Mulan PSL v2. - * You can use this software according to the terms and conditions of the Mulan PSL v2. - * You may obtain a copy of Mulan PSL v2 at: - * http://license.coscl.org.cn/MulanPSL2 - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR - * PURPOSE. - * See the Mulan PSL v2 for more details. - * Author: - * Create: 2025-05-13 - * Description: Collection capability for ddrc and l3c - * Current capability: Top-N thread sort of l3c_cache_miss ratio - ******************************************************************************/ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "pcerrc.h" -#include "pmu.h" -#include "symbol.h" - -static std::map numaTotalDDRC; //numa Id --> average ddrc bandwidth -static std::unordered_map numaToCpuCore; //numa Id --> cpu core ids -static std::unordered_map numaToCpuNumber; //numa Id --> length of cpu cores -static std::vector pidBoundCpus; //bounded cpus of designated pid -static unsigned numaNum = 0; //number of NUMAs - -const int FLOAT_PRECISION = 2; -const int TIME_UNIT_TRANS = 1000; - -uint64_t topNum = 0; -uint64_t duration = 0; -uint64_t period = 0; - -void totalDDRCBandwidth() -{ - PmuDeviceAttr devAttr[2]; - devAttr[0].metric = PMU_DDR_READ_BW; - devAttr[1].metric = PMU_DDR_WRITE_BW; - int pd = PmuDeviceOpen(devAttr, 2); - PmuEnable(pd); - sleep(1); - PmuData *oriData = nullptr; - int oriLen = PmuRead(pd, &oriData); - PmuDeviceData *devData = nullptr; - auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); - std::unordered_map stats; - for (int i = 0; i < len; ++i) { - stats[devData[i].ddrNumaId] += devData[i].count / 1024 / 1024; - } - for (const auto &entry : stats) { - int id = entry.first; - double sum = entry.second; - numaTotalDDRC[id] = sum; - } - numaNum = numaTotalDDRC.size(); - DevDataFree(devData); - PmuDataFree(oriData); - PmuDisable(pd); -} - -// get numaId --> cpu core ids -void initNumaToCoreList() -{ - unsigned *coreList; - for (unsigned i = 0; i < numaNum; ++i) { - coreList = nullptr; - int len = PmuGetNumaCore(i, &coreList); - numaToCpuCore[i] = coreList; - numaToCpuNumber[i] = len; - } -} - -// parse the CPU core range in the format "0-255" or "0-3,5" -std::vector parseCpuRange(const std::string &rangeStr) -{ - std::vector cpus; - std::stringstream ss(rangeStr); - std::string part; - - while(getline(ss, part, ',')) { - size_t hyphen_pos = part.find("-"); - if (hyphen_pos != std::string::npos) { - int start = std::stoi(part.substr(0, hyphen_pos)); - int end = std::stoi(part.substr(hyphen_pos + 1)); - if (start > end) { - std::cerr << "Invalid CPU range: " << part << std::endl; - } - for (int i = start; i <= end; ++i) { - cpus.push_back(i); - } - } else { - cpus.push_back(std::stoi(part)); - } - } - - std::sort(cpus.begin(), cpus.end()); - cpus.erase(unique(cpus.begin(), cpus.end()), cpus.end()); - return cpus; -} - -// get cpu core of pid from /proc/[pid]/stat -std::string getCpuAffinityList(int pid) -{ - std::string path = "/proc/" + std::to_string(pid) + "/status"; - std::ifstream in(path); - if (!in.is_open()) { - std::cerr << "Not found: " << path << std::endl; - return ""; - } - std::string line; - const std::string targetKey = "Cpus_allowed_list:"; - while (getline(in, line)) { - if (line.find(targetKey) == 0) { - size_t pos = line.find("\t"); - if (pos == std::string::npos) - pos = targetKey.length(); - return line.substr(pos + 1); - } - } -} - -int getCpuCore(int pid) -{ - try { - std::string rangeStr = getCpuAffinityList(pid); - if (rangeStr == "") { - return -1; - } - pidBoundCpus = parseCpuRange(rangeStr); - } catch (const std::exception &e) { - std::cerr << "Error: " << e.what() << std::endl; - return 1; - } -} - -bool hasCommonCpu(const unsigned *cpuArray, size_t arraySize, const std::vector &cpuVector) -{ - if (cpuArray == nullptr || arraySize == 0 || cpuVector.empty()) { - return false; - } - - if (arraySize < cpuVector.size()) { - std::unordered_set arraySet(cpuArray, cpuArray + arraySize); - for (const auto &cpu : cpuVector) { - if (arraySet.count(cpu) > 0) { - return true; - } - } - } else { - std::unordered_set vecSet(cpuVector.begin(), cpuVector.end()); - for (size_t i = 0; i < arraySize; ++i) { - if (vecSet.count(cpuArray[i]) > 0) { - return true; - } - } - } - - return false; -} - -std::string GetL3CMissPercent(unsigned llc_miss, unsigned llc_cache) -{ - std::ostringstream oss; - double ratio = llc_cache != 0 ? static_cast(llc_miss) / llc_cache * 100.0 : 0.0; - oss << std::fixed << std::setprecision(FLOAT_PRECISION) << ratio; - return oss.str(); -} - - -void PrintHotSpotGraph(const std::unordered_map> tidData) -{ - std::vector>> sortedVec(tidData.begin(), tidData.end()); - std::sort(sortedVec.begin(), sortedVec.end(), [](const auto& a, const auto& b) { - double ratioA = (a.second.second == 0) ? 0.0 : static_cast(a.second.first) / a.second.second; - double ratioB = (b.second.second == 0) ? 0.0 : static_cast(b.second.first) / b.second.second; - return ratioA > ratioB; - }); - - std::cout << std::string(100, '=') << std::endl; - std::cout << std::string(100, '-') << std::endl; - std::cout << " " << std::setw(10) << " " << std::setw(20) << std::left << "Tid" << std::setw(20) << "llc_cache_miss" - << std::setw(20) << "llc_cache" << std::setw(20) << "llc_cache_miss_ratio" << std::endl; - std::cout << std::string(100, '-') << std::endl; - - size_t outputNum = std::min(topNum, tidData.size()); - for (int i = 0; i < outputNum; ++i) { - std::cout << " " << std::setw(10) << i << std::setw(20) << std::left << sortedVec[i].first << std::setw(20) - << sortedVec[i].second.first << std::setw(20) << sortedVec[i].second.second << std::setw(20) - << GetL3CMissPercent(sortedVec[i].second.first, sortedVec[i].second.second) + "%" << std::endl; - } - - std::cout << std::string(100, '_') << std::endl; -} - -int GetPmuDataHotspot(PmuData* pmuData, int pmuDataLen) -{ - if (pmuData == nullptr || pmuDataLen == 0) { - return SUCCESS; - } - - std::unordered_map> tidData; //tid --> (0x33, 0x32) - for (int i = 0; i < pmuDataLen; ++i) { - PmuData& data = pmuData[i]; - if (strcmp(data.evt, "r33") == 0) { - tidData[data.tid].first += data.count; - } - if (strcmp(data.evt, "r32") == 0) { - tidData[data.tid].second += data.count; - } - } - PrintHotSpotGraph(tidData); - return SUCCESS; -} - -void collectL3CMissRatio(int pid) { - char* evtList[2]; - evtList[0] = (char*)"r33"; - evtList[1] = (char*)"r32"; - PmuAttr attr = {0}; - attr.evtList = evtList; - attr.numEvt = 2; - attr.pidList = &pid; - attr.numPid = 1; - attr.cpuList = pidBoundCpus.data(); - attr.numCpu = pidBoundCpus.size(); - - int pd = PmuOpen(COUNTING, &attr); - if (pd == -1) { - std::cerr << "PmuOpen failed" << std::endl; - std::cerr << "error msg:" << Perror() << std::endl; - return; - } - - PmuEnable(pd); - int collectTimes = duration * TIME_UNIT_TRANS / period; - for (int i = 0; i < collectTimes; ++i) { - usleep(period * TIME_UNIT_TRANS); - PmuData* pmuData = nullptr; - int len = PmuRead(pd, &pmuData); - if (len == -1) { - std::cerr << "error msg:" << Perror() << std::endl; - return; - } - GetPmuDataHotspot(pmuData, len); - PmuDataFree(pmuData); - } - PmuDisable(pd); - PmuClose(pd); - return; -} - -// g++ -o llc_miss_ratio llc_miss_ratio.cpp -I ./output/include/ -L ./output/lib/ -lkperf -lsym -// export LD_LIBRARY_PATH=/XXX/libkperf/output/lib/:$LD_LIBRARY_PATH -void print_usage() { - std::cerr << "Usage: llc_miss_ratio \n"; - std::cerr << "--threshold : the collect threshold of total ddrc bandwidth, unit M/s\n"; - std::cerr << "--topNum : the top N thread of llc miss ratio collection\n"; - std::cerr << "--duration : the total collect time of llc_miss_ratio, unit s\n"; - std::cerr << "--period : the period of llc_miss_ratio collect, unit ms\n"; - std::cerr << " example: llc_miss_ratio 100 10 10 1000 \n"; -} - -int main(int argc, char** argv) -{ - if (argc < 5) { - print_usage(); - return 0; - } - double threshold = 0.0; - int pid = 0; - bool collectL3CMissFlag = false; - - try { - threshold = std::stod(argv[1]); - if (threshold <= 0) { - throw std::invalid_argument("threshold must be a positive number."); - } - - topNum = std::stod(argv[2]); - if (topNum <= 0) { - throw std::invalid_argument("TopNum must be a positive number."); - } - - duration = std::stod(argv[3]); - if (duration <= 0) { - throw std::invalid_argument("Duration must be a positive number."); - } - - period = std::stoi(argv[4]); - if (period <= 0) { - throw std::invalid_argument("Period must be a positive integer."); - } - - try { - pid = std::stoi(argv[5]); - } catch (const std::invalid_argument& e) { - std::cerr << "Not valid process id: " << e.what() << "\n"; - } - } catch (const std::exception& e) { - std::cerr << "Error parsing arguments: " << e.what() << "\n"; - print_usage(); - return EXIT_FAILURE; - } - - totalDDRCBandwidth(); - initNumaToCoreList(); - if(getCpuCore(pid) == -1) { - return EXIT_FAILURE; - } - - for (const auto &data : numaTotalDDRC) { - std::cout << "Numa ID: " << data.first << ", total bandwidth: " << data.second << "M/s"; - // bandwidth of numa greater than the threshold, check whether bounded cpus of pid correspond to this numa cores - if (data.second > threshold) { - auto cpuCoreList = numaToCpuCore[data.first]; - if (hasCommonCpu(cpuCoreList, numaToCpuNumber[data.first], pidBoundCpus)) { - std::cout << " --> exceed threshold, and the process is running on this numa"; - collectL3CMissFlag = true; - } else { - std::cout << " --> exceed threshold, the process is not running on this numa"; - } - } else { - std::cout << " --> not exceed threshold"; - } - std::cout << std::endl; - } - - if (collectL3CMissFlag) { - collectL3CMissRatio(pid); //begin to collect llc_miss and llc_cache event - } - - return 0; -} \ No newline at end of file diff --git a/go/src/libkperf_test/libkperf_test.go b/go/src/libkperf_test/libkperf_test.go index 2f55951..e64ea6a 100644 --- a/go/src/libkperf_test/libkperf_test.go +++ b/go/src/libkperf_test/libkperf_test.go @@ -22,7 +22,7 @@ func TestCount(t *testing.T) { } for _, o := range dataVo.GoData { - t.Logf("================================Get Couting data success================================") + t.Logf("================================Get Counting data success================================") t.Logf("count base info comm=%v, evt=%v, pid=%v, tid=%v, coreId=%v, numaId=%v, sockedId=%v", o.Comm, o.Evt, o.Pid, o.Tid, o.CpuTopo.CoreId, o.CpuTopo.NumaId, o.CpuTopo.SocketId) t.Logf("count info count=%v, countPercent=%v", o.Count, o.CountPercent) } diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index c033dfc..4d55b86 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -176,7 +176,7 @@ static int CheckCollectTypeConfig(enum PmuTaskType collectType, struct PmuAttr * } #ifdef IS_X86 if (collectType != COUNTING && collectType != SAMPLING) { - New(LIBPERF_ERR_INVALID_TASK_TYPE, "The x86 architecture supports only the COUTING mode and SMAPLING mode"); + New(LIBPERF_ERR_INVALID_TASK_TYPE, "The x86 architecture supports only the COUNTING mode and SMAPLING mode"); return LIBPERF_ERR_INVALID_TASK_TYPE; } #endif -- Gitee From 4dcf412ab8ac4cacd67680bf4b1c23d0eb6aafbb Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Thu, 22 May 2025 17:30:46 +0800 Subject: [PATCH 21/35] =?UTF-8?q?=E5=8E=BB=E9=99=A4umask=3D0=E6=97=B6?= =?UTF-8?q?=E7=9A=84=E5=88=A4=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/pfm/uncore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pmu/pfm/uncore.cpp b/pmu/pfm/uncore.cpp index f02e079..6ae72cc 100644 --- a/pmu/pfm/uncore.cpp +++ b/pmu/pfm/uncore.cpp @@ -61,7 +61,7 @@ static std::vector GetCpuMask(const string &devName) char maskStr[1024]; maskIn >> maskStr; - if (maskStr[0] == '0' || maskStr[0] == '-') { + if (maskStr[0] == '-') { return maskList; } -- Gitee From 6beec781c64c1ba6a16687eb3da1742933f8d9f3 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 23 May 2025 11:10:46 +0800 Subject: [PATCH 22/35] =?UTF-8?q?=E5=A2=9E=E5=8A=A0tracepointer=E5=88=A4?= =?UTF-8?q?=E6=96=AD,=E5=87=8F=E5=B0=91=E6=96=87=E4=BB=B6=E5=88=A4?= =?UTF-8?q?=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/pmu.cpp | 6 +++--- pmu/pmu_list.cpp | 6 +++--- pmu/sampler.cpp | 7 +++++-- ...pointer_parser.cpp => trace_point_parser.cpp} | 16 ++++++++-------- ...ace_pointer_parser.h => trace_point_parser.h} | 12 ++++++------ 5 files changed, 25 insertions(+), 22 deletions(-) rename pmu/{trace_pointer_parser.cpp => trace_point_parser.cpp} (94%) rename pmu/{trace_pointer_parser.h => trace_point_parser.h} (89%) diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 4d55b86..1fd80ea 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -27,7 +27,7 @@ #include "pcerr.h" #include "safe_handler.h" #include "pmu_metric.h" -#include "trace_pointer_parser.h" +#include "trace_point_parser.h" #include "pmu.h" using namespace pcerr; @@ -972,7 +972,7 @@ int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *valu New(LIBPERF_ERR_INVALID_FIELD_ARGS, "rawData cannot be nullptr."); return LIBPERF_ERR_INVALID_FIELD_ARGS; } - return PointerPasser::ParsePointer(rawData->data, fieldName, value, vSize); + return TraceParser::ParseTraceData(rawData->data, fieldName, value, vSize); #endif } @@ -986,7 +986,7 @@ struct SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char return nullptr; } - SampleRawField *rt = PointerPasser::GetSampleRawField(rawData->data, fieldName); + SampleRawField *rt = TraceParser::GetSampleRawField(rawData->data, fieldName); if (rt) { New(SUCCESS); } diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 360b875..c0470c0 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -23,7 +23,7 @@ #include "pcerr.h" #include "util_time.h" #include "log.h" -#include "trace_pointer_parser.h" +#include "trace_point_parser.h" #include "pmu_event_list.h" #include "pmu_list.h" #include "pfm_event.h" @@ -457,7 +457,7 @@ namespace KUNPENG_PMU { EraseParentEventMap(pd); SymResolverDestroy(); PmuEventListFree(); - PointerPasser::FreeRawFieldMap(); + TraceParser::FreeRawFieldMap(); } int PmuList::NewPd() @@ -757,7 +757,7 @@ namespace KUNPENG_PMU { for (auto pd: findData->second.data) { if (pd.rawData != nullptr) { - PointerPasser::FreePointerData(pd.rawData->data); + TraceParser::FreeTraceData(pd.rawData->data); free(pd.rawData); pd.rawData = nullptr; } diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index 6cfd684..a12e709 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -31,7 +31,8 @@ #include "process_map.h" #include "log.h" #include "sampler.h" -#include "trace_pointer_parser.h" +#include "pfm_event.h" +#include "trace_point_parser.h" #include "common.h" using namespace std; @@ -239,7 +240,9 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( current->tid = static_cast(sample->tid); current->period = static_cast(sample->period); current->ts = static_cast(sample->time); - PointerPasser::ParserRawFormatData(current, sample, event, this->evt->name); + if (this->evt->pmuType == TRACE_TYPE) { + TraceParser::ParserRawFormatData(current, sample, event, this->evt->name); + } ParseBranchSampleData(current, sample, event, extPool); } diff --git a/pmu/trace_pointer_parser.cpp b/pmu/trace_point_parser.cpp similarity index 94% rename from pmu/trace_pointer_parser.cpp rename to pmu/trace_point_parser.cpp index 0e84fda..3e2b79b 100644 --- a/pmu/trace_pointer_parser.cpp +++ b/pmu/trace_point_parser.cpp @@ -13,7 +13,7 @@ * Description: Provides the capability of parsing pointer events. ******************************************************************************/ -#include "trace_pointer_parser.h" +#include "trace_point_parser.h" using namespace KUNPENG_PMU; using namespace pcerr; @@ -41,7 +41,7 @@ static std::string GetFormatRealPath(const std::string &evtName) { return GetRealPath(formatPath); } -bool PointerPasser::IsNeedFormat(std::ifstream &file, const std::string &evtName) { +bool TraceParser::IsNeedFormat(std::ifstream &file, const std::string &evtName) { std::string realPath; if (formatMap.find(evtName) != formatMap.end()) { realPath = formatMap.at(evtName); @@ -90,7 +90,7 @@ void ParseFormatFile(ifstream &file, const std::string &evtName) { efMap.insert({evtName, fnMap}); } -void PointerPasser::ParserRawFormatData(struct PmuData *pd, KUNPENG_PMU::PerfRawSample *sample, +void TraceParser::ParserRawFormatData(struct PmuData *pd, KUNPENG_PMU::PerfRawSample *sample, union KUNPENG_PMU::PerfEvent *event, const std::string &evtName) { ifstream file; @@ -171,7 +171,7 @@ int CheckFieldArgs(char *data, const string &fieldName, T *value, uint32_t vSize } template -int PointerPasser::ParseField(char *data, const std::string &fieldName, T *value, uint32_t vSize) { +int TraceParser::ParseField(char *data, const std::string &fieldName, T *value, uint32_t vSize) { int rt = CheckFieldArgs(data, fieldName, value, vSize); if (rt != SUCCESS) { return rt; @@ -201,12 +201,12 @@ int PointerPasser::ParseField(char *data, const std::string &fieldName, T *value return SUCCESS; } -int PointerPasser::ParsePointer(char *data, const std::string &fieldName, void *value, +int TraceParser::ParseTraceData(char *data, const std::string &fieldName, void *value, uint32_t vSize) { return ParseField(data, fieldName, value, vSize); } -void PointerPasser::FreePointerData(char *data) { +void TraceParser::FreeTraceData(char *data) { if (data == nullptr) { return; } @@ -217,7 +217,7 @@ void PointerPasser::FreePointerData(char *data) { data = nullptr; } -SampleRawField *PointerPasser::GetSampleRawField(char *data, const std::string &fieldName) { +SampleRawField *TraceParser::GetSampleRawField(char *data, const std::string &fieldName) { int ret = CheckFieldArgs(data, fieldName); if (ret != SUCCESS) { return nullptr; @@ -238,7 +238,7 @@ SampleRawField *PointerPasser::GetSampleRawField(char *data, const std::string & return fsrMap.at(field); } -void PointerPasser::FreeRawFieldMap() { +void TraceParser::FreeRawFieldMap() { for (auto it = fsrMap.begin(); it != fsrMap.end(); ++it) { if (!it->second) { continue; diff --git a/pmu/trace_pointer_parser.h b/pmu/trace_point_parser.h similarity index 89% rename from pmu/trace_pointer_parser.h rename to pmu/trace_point_parser.h index 701836f..a101d31 100644 --- a/pmu/trace_pointer_parser.h +++ b/pmu/trace_point_parser.h @@ -12,8 +12,8 @@ * Create: 2024-07-04 * Description: Provides the capability of parsing pointer events. ******************************************************************************/ -#ifndef LIBKPERF_TRACE_POINTER_PARSER_H -#define LIBKPERF_TRACE_POINTER_PARSER_H +#ifndef LIBKPERF_TRACE_POINT_PARSER_H +#define LIBKPERF_TRACE_POINT_PARSER_H #include #include @@ -46,7 +46,7 @@ namespace KUNPENG_PMU { } }; - class PointerPasser { + class TraceParser { public: /** * @brief determine whether the event is a pointer event. @@ -68,12 +68,12 @@ namespace KUNPENG_PMU { /** * @brief the method of parsing field. */ - static int ParsePointer(char *data, const string &fieldName, void *value, uint32_t vSize); + static int ParseTraceData(char *data, const string &fieldName, void *value, uint32_t vSize); /** * @brief free the data. */ - static void FreePointerData(char *data); + static void FreeTraceData(char *data); /** * @brief get the field named fieldName of this event. @@ -89,4 +89,4 @@ namespace KUNPENG_PMU { } -#endif //LIBKPERF_TRACE_POINTER_PARSER_H +#endif //LIBKPERF_TRACE_POINT_PARSER_H -- Gitee From ea4ca6528396f311914a7d181ac2677dd0b43561 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 26 May 2025 09:43:17 +0800 Subject: [PATCH 23/35] corrected spelling errors --- docs/Python_API.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Python_API.md b/docs/Python_API.md index ddf8d38..de323e9 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -329,7 +329,7 @@ for func_name in kperf.sys_call_func_list(): kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指标的能力 * class PmuDeviceAttr: - * metic: 指定需要采集的指标 + * metric: 指定需要采集的指标 * PMU_DDR_READ_BW 采集每个channel的ddrc的读带宽,单位:Bytes * PMU_DDR_WRITE_BW 采集每个channel的ddrc的写带宽,单位:Bytes * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes -- Gitee From ede82725c721325e69224e74b9921831fe5236ab Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 26 May 2025 11:00:13 +0800 Subject: [PATCH 24/35] collected spelling error --- docs/Go_API.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Go_API.md b/docs/Go_API.md index 26ae464..7199142 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -334,7 +334,7 @@ func main() { func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事件指标的能力 * type PmuDeviceAttr struct: - * Metic: 指定需要采集的指标 + * Metric: 指定需要采集的指标 * PMU_DDR_READ_BW 采集每个channel的ddrc的读带宽,单位:Bytes * PMU_DDR_WRITE_BW 采集每个channel的ddrc的写带宽,单位:Bytes * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes -- Gitee From 2a5e6bd573b08231a29eff45413e9dfb78488637 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Mon, 26 May 2025 16:39:35 +0800 Subject: [PATCH 25/35] =?UTF-8?q?unknow=20error=20=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E4=BF=A1=E6=81=AF=E8=AE=BE=E7=BD=AE=E4=BB=A5=E5=8F=8ApmuAppend?= =?UTF-8?q?Data=20=E6=9F=A5=E8=AF=A2=E5=88=A4=E6=96=AD=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/evt_list.cpp | 7 ++++++- pmu/pmu_list.cpp | 2 +- pmu/pmu_metric.cpp | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pmu/evt_list.cpp b/pmu/evt_list.cpp index 2af3e2f..bee7fa8 100644 --- a/pmu/evt_list.cpp +++ b/pmu/evt_list.cpp @@ -97,10 +97,10 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrIsMainPid()) { continue; } + if (err == LIBPERF_ERR_INVALID_EVENT) { if (branchSampleFilter != KPERF_NO_BRANCH_SAMPLE) { pcerr::SetCustomErr(err, "Invalid event:" + perfEvt->GetEvtName() + ", PMU Hardware or event type doesn't support branch stack sampling"); @@ -108,6 +108,11 @@ int KUNPENG_PMU::EvtList::Init(const bool groupEnable, const std::shared_ptrGetEvtName() + ", " + std::string{strerror(errno)}); } } + + if (err == UNKNOWN_ERROR) { + pcerr::SetCustomErr(err, std::string{strerror(errno)}); + } + return err; } fdList.insert(perfEvt->GetFd()); diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index c0470c0..31cc35e 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -413,7 +413,7 @@ namespace KUNPENG_PMU { } auto findToData = userDataList.find(*toData); - if (findFromData == userDataList.end()) { + if (findToData == userDataList.end()) { return LIBPERF_ERR_INVALID_PMU_DATA; } // For non-null target data list, append source list to end of target vector. diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 8fc5d12..d196c2f 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -1565,7 +1565,7 @@ int64_t PmuGetCpuFreq(unsigned core) cpuPath << SYS_CPU_INFO_PATH << core << "/cpufreq/scaling_cur_freq"; if (!ExistPath(cpuPath.str())) { - New(LIBPERF_ERR_CPUFREQ_NOT_CONFIG, "Kernel not config cpuFreq Or core exceed cpuNums. Not exist " + cpuPath.str()); + New(LIBPERF_ERR_CPUFREQ_NOT_CONFIG, "Kernel not config cpuFreq or core exceed cpuNums. Not exist " + cpuPath.str()); return -1; } std::string curFreqStr = ReadFileContent(cpuPath.str()); -- Gitee From fce16d3fcd91b17a5d1ad7e440e9956d95f32e70 Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Fri, 23 May 2025 16:16:59 +0800 Subject: [PATCH 26/35] the logic of symbol parsing is optimized for NO_SYMBOL_RESOLVE --- include/pcerrc.h | 1 + include/pmu.h | 8 ++++++ pmu/pmu.cpp | 5 ++++ pmu/pmu_list.cpp | 39 +++++++++++++++++++++----- pmu/pmu_list.h | 1 + pmu/sampler.cpp | 26 ++++++++---------- pmu/spe.cpp | 4 +-- python/modules/kperf/perror.py | 1 + symbol/symbol_resolve.cpp | 50 +++++++++++++++------------------- 9 files changed, 84 insertions(+), 51 deletions(-) diff --git a/include/pcerrc.h b/include/pcerrc.h index 259befa..909ae4e 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -109,6 +109,7 @@ extern "C" { #define LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 1065 #define LIBPERF_ERR_NOT_SUPPORT_METRIC 1066 #define LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD 1067 +#define LIBPERF_ERR_PMU_DATA_NO_FOUND 1068 #define UNKNOWN_ERROR 9999 diff --git a/include/pmu.h b/include/pmu.h index 747259d..e57c7cf 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -352,6 +352,14 @@ void PmuStop(int pd); */ int PmuRead(int pd, struct PmuData** pmuData); +/** +* @brief +* When symbol mode is SNO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function +* @param pmuData the data from PmuRead +* @return 0 indicates resolve success, otherwise return error code +*/ +int ResolvePmuDataSymbol(struct PmuData* pmuData); + /** * @brief * Append data list to another data list <*toData>. diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index c033dfc..c4b979a 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -769,6 +769,11 @@ int PmuRead(int pd, struct PmuData** pmuData) } } +int ResolvePmuDataSymbol(struct PmuData* pmuData) +{ + return PmuList::GetInstance()->ResolvePmuDataSymbol(pmuData); +} + void PmuClose(int pd) { SetWarn(SUCCESS); diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 360b875..86b491f 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -418,7 +418,9 @@ namespace KUNPENG_PMU { } // For non-null target data list, append source list to end of target vector. auto& dataVec = findToData->second.data; + auto& ipsVec = findToData->second.sampleIps; dataVec.insert(dataVec.end(), findFromData->second.data.begin(), findFromData->second.data.end()); + ipsVec.insert(ipsVec.end(), findFromData->second.sampleIps.begin(), findFromData->second.sampleIps.end()); len = dataVec.size(); if (*toData != dataVec.data()) { @@ -625,9 +627,6 @@ namespace KUNPENG_PMU { void PmuList::FillStackInfo(EventData& eventData) { auto symMode = symModeList[eventData.pd]; - if (symMode == NO_SYMBOL_RESOLVE) { - return; - } // Parse dwarf and elf info of each pid and get stack trace for each pmu data. for (size_t i = 0; i < eventData.data.size(); ++i) { auto& pmuData = eventData.data[i]; @@ -636,15 +635,44 @@ namespace KUNPENG_PMU { SymResolverRecordModuleNoDwarf(pmuData.pid); } else if (symMode == RESOLVE_ELF_DWARF) { SymResolverRecordModule(pmuData.pid); + } else if (symMode == NO_SYMBOL_RESOLVE) { + SymResolverRecordModule(pmuData.pid); + continue; } else { continue; } + if (pmuData.stack == nullptr) { pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); } } } + int PmuList::ResolvePmuDataSymbol(struct PmuData* iPmuData) + { + if (iPmuData == nullptr) { + New(LIBPERF_ERR_INVALID_PMU_DATA, "ipmuData is nullptr"); + return LIBPERF_ERR_INVALID_PMU_DATA; + } + auto userData = userDataList.find(iPmuData); + if (userData == userDataList.end()) { + New(LIBPERF_ERR_PMU_DATA_NO_FOUND, "ipmuData isn't in userDataList"); + return LIBPERF_ERR_PMU_DATA_NO_FOUND; + } + + auto& eventData = userDataList[iPmuData]; + auto symMode = symModeList[eventData.pd]; + for (size_t i = 0; i < eventData.data.size(); ++i) { + auto& pmuData = eventData.data[i]; + auto& ipsData = eventData.sampleIps[i]; + if (pmuData.stack == nullptr) { + pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); + } + } + New(SUCCESS); + return SUCCESS; + } + void PmuList::AggregateData(const std::vector& evData, std::vector& newEvData) { // Acccumulate stat data in previous PmuCollect for convenient use. @@ -1025,9 +1053,6 @@ namespace KUNPENG_PMU { int PmuList::InitSymbolRecordModule(const unsigned pd, PmuTaskAttr* taskParam) { SymbolMode symMode = GetSymbolMode(pd); - if (symMode == NO_SYMBOL_RESOLVE) { - return SUCCESS; - } if (taskParam->pmuEvt->collectType == COUNTING) { return SUCCESS; @@ -1053,7 +1078,7 @@ namespace KUNPENG_PMU { } } - if (this->symModeList[pd] == RESOLVE_ELF_DWARF) { + if (this->symModeList[pd] == RESOLVE_ELF_DWARF || this->symModeList[pd] == NO_SYMBOL_RESOLVE) { for (const auto& pid: pidList) { int rt = SymResolverRecordModule(pid); if (rt != SUCCESS) { diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h index b44ff55..523e2ad 100644 --- a/pmu/pmu_list.h +++ b/pmu/pmu_list.h @@ -76,6 +76,7 @@ public: void StoreSplitData(unsigned pd, std::pair& previousEventList, std::unordered_map& eventSplitMap); bool IsAllPidExit(const unsigned pd); + int ResolvePmuDataSymbol(struct PmuData* iPmuData); private: using ProcPtr = std::shared_ptr; diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index 6cfd684..788e1eb 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -216,22 +216,20 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( return; } KUNPENG_PMU::PerfRawSample *sample = (KUNPENG_PMU::PerfRawSample *)event->sample.array; - if (symMode != NO_SYMBOL_RESOLVE) { - // Copy ips from ring buffer and get stack info later. - if (evt->callStack == 0) { - int i = 0; - while (i < sample->nr && !IsValidIp(sample->ips[i])) { - i++; - } - if (i < sample->nr) { + // Copy ips from ring buffer and get stack info later. + if (evt->callStack == 0) { + int i = 0; + while (i < sample->nr && !IsValidIp(sample->ips[i])) { + i++; + } + if (i < sample->nr) { + ips->ips.push_back(sample->ips[i]); + } + } else { + for (int i = sample->nr - 1; i >= 0; --i) { + if (IsValidIp(sample->ips[i])) { ips->ips.push_back(sample->ips[i]); } - } else { - for (int i = sample->nr - 1; i >= 0; --i) { - if (IsValidIp(sample->ips[i])) { - ips->ips.push_back(sample->ips[i]); - } - } } } current->cpu = sample->cpu; diff --git a/pmu/spe.cpp b/pmu/spe.cpp index 0d1c7d9..2bc42c4 100644 --- a/pmu/spe.cpp +++ b/pmu/spe.cpp @@ -359,9 +359,9 @@ void Spe::CoreDummyData(struct SpeCoreContext *context, struct ContextSwitchData uint64_t off = dataTail % mpage->data_size; struct perf_event_header *header = (struct perf_event_header *)(ringBuf + off); - if (header->type == PERF_RECORD_MMAP && symbolMode != NO_SYMBOL_RESOLVE) { + if (header->type == PERF_RECORD_MMAP) { struct PerfRecordMmap *sample = (struct PerfRecordMmap *)header; - if (symbolMode == RESOLVE_ELF_DWARF) { + if (symbolMode == RESOLVE_ELF_DWARF || symbolMode == NO_SYMBOL_RESOLVE) { int ret = SymResolverUpdateModule(sample->tid, sample->filename, sample->addr); if (ret != SUCCESS) { // if the module fails to be updated, a warning is recorded to overwrite the failure error code. diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index 4ba96b2..20776de 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -108,6 +108,7 @@ class Error: LIBPERF_ERR_INTERFACE_NOT_SUPPORT_X86 =1065 LIBPERF_ERR_NOT_SUPPORT_METRIC = 1066 LIBPERF_ERR_INVALID_CPU_FREQ_PERIOD = 1067 + LIBPERF_ERR_PMU_DATA_NO_FOUND = 1068 UNKNOWN_ERROR = 9999 diff --git a/symbol/symbol_resolve.cpp b/symbol/symbol_resolve.cpp index 46f6ad5..2f971f4 100644 --- a/symbol/symbol_resolve.cpp +++ b/symbol/symbol_resolve.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include "name_resolve.h" #include "pcerr.h" #include "symbol_resolve.h" @@ -70,16 +71,15 @@ namespace { flag = false; } - static inline bool CheckIfFile(std::string mapline) + static inline bool CheckIfFile(const std::string& mapline) { - return (!((mapline.find(HUGEPAGE) != std::string::npos) || (mapline.find(DEV_ZERO) != std::string::npos) || - (mapline.find(ANON) != std::string::npos) || (mapline.find(STACK) != std::string::npos) || - (mapline.find(SOCKET) != std::string::npos) || (mapline.find(VSYSCALL) != std::string::npos) || - (mapline.find(HEAP) != std::string::npos) || (mapline.find(VDSO) != std::string::npos) || - (mapline.find(SYSV) != std::string::npos) || (mapline.find(VVAR) != std::string::npos)) && - (mapline.find(R_XP) != std::string::npos)) - ? true - : false; + const std::vector patterns = {HUGEPAGE, DEV_ZERO, ANON, STACK, SOCKET, VSYSCALL, HEAP ,VDSO, SYSV, VVAR}; + for (const auto& pattern :patterns) { + if (mapline.find(pattern) != std::string::npos) { + return false; + } + } + return mapline.find(R_XP) != std::string::npos; } static inline char* InitChar(int len) @@ -398,7 +398,7 @@ bool MyElf::IsExecFile() void MyElf::Emplace(unsigned long addr, const ELF_SYM& elfSym) { - this->symTab.insert({addr, elfSym}); + this->symTab.emplace(addr, elfSym); } ELF_SYM* MyElf::FindSymbol(unsigned long addr) @@ -548,15 +548,11 @@ int SymbolResolve::RecordModule(int pid, RecordModuleType recordModuleType) moduleSafeHandler.releaseLock(pid); return 0; } - char mapFile[MAP_LEN]; - if (snprintf(mapFile, MAP_LEN, "/proc/%d/maps", pid) < 0) { - moduleSafeHandler.releaseLock(pid); - return LIBSYM_ERR_SNPRINF_OPERATE_FAILED; - } + std::string mapFile = "/proc/" + std::to_string(pid) + "/maps"; std::ifstream file(mapFile); if (!file.is_open()) { pcerr::New(LIBSYM_ERR_OPEN_FILE_FAILED, - "libsym can't open file named " + std::string{mapFile} + " because of " + std::string{strerror(errno)}); + "libsym can't open file named " + mapFile + " because of " + std::string{strerror(errno)}); moduleSafeHandler.releaseLock(pid); return LIBSYM_ERR_OPEN_FILE_FAILED; } @@ -588,15 +584,11 @@ int SymbolResolve::UpdateModule(int pid, RecordModuleType recordModuleType) return SUCCESS; } // Get memory maps of pid. - char mapFile[MAP_LEN]; - if (snprintf(mapFile, MAP_LEN, "/proc/%d/maps", pid) < 0) { - moduleSafeHandler.releaseLock(pid); - return LIBSYM_ERR_SNPRINF_OPERATE_FAILED; - } + std::string mapFile = "/proc/" + std::to_string(pid) + "/maps"; std::ifstream file(mapFile); if (!file.is_open()) { pcerr::New(LIBSYM_ERR_OPEN_FILE_FAILED, - "libsym can't open file named " + std::string{mapFile} + " because of " + std::string{strerror(errno)}); + "libsym can't open file named " + mapFile + " because of " + std::string{strerror(errno)}); moduleSafeHandler.releaseLock(pid); return LIBSYM_ERR_OPEN_FILE_FAILED; } @@ -618,8 +610,8 @@ int SymbolResolve::UpdateModule(int pid, RecordModuleType recordModuleType) this->RecordDwarf(item->moduleName.c_str()); } } - for (auto mod : diffModVec) { - oldModVec.push_back(mod); + for (auto& mod : diffModVec) { + oldModVec.emplace_back(mod); } pcerr::New(SUCCESS); moduleSafeHandler.releaseLock(pid); @@ -1176,11 +1168,13 @@ std::vector> SymbolResolve::FindDiffMaps( const std::vector>& newMaps) const { std::vector> diffMaps; + std::set oldStarts; + for (const auto& oldMod : oldMaps) { + oldStarts.insert(oldMod->start); + } for (auto newMod : newMaps) { - for (auto oldMod : oldMaps) { - if (newMod->start != oldMod->start) { - diffMaps.push_back(newMod); - } + if (oldStarts.find(newMod->start) == oldStarts.end()) { + diffMaps.emplace_back(newMod); } } -- Gitee From 3cdc93f392f221b131ce70fade29cca76c77e47e Mon Sep 17 00:00:00 2001 From: glx Date: Wed, 28 May 2025 11:18:29 +0800 Subject: [PATCH 27/35] Fix compile error --- util/common.h | 1 + 1 file changed, 1 insertion(+) diff --git a/util/common.h b/util/common.h index 1c3bcb7..77b8a9b 100644 --- a/util/common.h +++ b/util/common.h @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef __x86_64__ #define IS_X86 1 -- Gitee From dab5283c0548f978b824178f3dab20e99007cb3e Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Thu, 29 May 2025 10:35:58 +0800 Subject: [PATCH 28/35] =?UTF-8?q?libkperf=20=E6=94=AF=E6=8C=81=E7=BC=96?= =?UTF-8?q?=E8=AF=91=E9=9D=99=E6=80=81=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/pmu.h | 2 +- pmu/CMakeLists.txt | 3 +++ symbol/CMakeLists.txt | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/pmu.h b/include/pmu.h index e57c7cf..d3cd74a 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -354,7 +354,7 @@ int PmuRead(int pd, struct PmuData** pmuData); /** * @brief -* When symbol mode is SNO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function +* When symbol mode is NO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function * @param pmuData the data from PmuRead * @return 0 indicates resolve success, otherwise return error code */ diff --git a/pmu/CMakeLists.txt b/pmu/CMakeLists.txt index c68bfe0..4af6e76 100644 --- a/pmu/CMakeLists.txt +++ b/pmu/CMakeLists.txt @@ -31,8 +31,11 @@ include_directories(${SYMBOL_FILE_DIR}) include_directories(${PMU_DECODER_DIR}) ADD_LIBRARY(kperf SHARED ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC}) +ADD_LIBRARY(kperf_static STATIC ${PMU_SRC} ${UTIL_SRC} ${PFM_SRC} ${PMU_DECODER_SRC}) +set_target_properties(kperf_static PROPERTIES OUTPUT_NAME "kperf") target_link_libraries(kperf numa sym) target_compile_options(kperf PRIVATE -fPIC) install(TARGETS kperf DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) +install(TARGETS kperf_static DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) file(GLOB HEADER_FILES ${PROJECT_TOP_DIR}/include/*.h) install(FILES ${HEADER_FILES} DESTINATION ${CMAKE_INSTALL_PREFIX}/include) diff --git a/symbol/CMakeLists.txt b/symbol/CMakeLists.txt index 920d59e..aaa8988 100644 --- a/symbol/CMakeLists.txt +++ b/symbol/CMakeLists.txt @@ -16,6 +16,9 @@ include_directories(${INCLUDE_DIR}) message(${THIRD_PARTY}/elfin-parser/elf) ADD_LIBRARY(sym SHARED ${SYMBOL_SRC}) +ADD_LIBRARY(sym_static STATIC ${SYMBOL_SRC}) +set_target_properties(sym_static PROPERTIES OUTPUT_NAME "sym") target_link_libraries(sym elf_static dwarf_static pthread) install(TARGETS sym DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) +install(TARGETS sym_static DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) install(FILES ${SYMBOL_FILE_DIR}/symbol.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include) -- Gitee From 4e471a2c3f8308362866e2c2659f2a1ce8d53db0 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Thu, 29 May 2025 15:23:29 +0800 Subject: [PATCH 29/35] =?UTF-8?q?=E9=80=82=E9=85=8D=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E4=BF=A1=E6=81=AF,=E5=BD=93=E6=97=A0=E6=9D=83=E9=99=90?= =?UTF-8?q?=E6=97=B6=E6=8A=9B=E5=87=BA=E5=AF=B9=E5=BA=94=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/pmu.cpp | 33 +++++++++++++++++++++++++++++++++ pmu/pmu_event_list.cpp | 27 +++++---------------------- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index b8a7a87..cba1713 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -830,6 +830,24 @@ static void PrepareCpuList(PmuAttr *attr, PmuTaskAttr *taskParam, PmuEvt* pmuEvt } } +static bool PerfEventSupported(__u64 type, __u64 config) +{ + perf_event_attr attr{}; + memset(&attr, 0, sizeof(attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = type; + attr.config = config; + attr.disabled = 1; + attr.inherit = 1; + attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; + int fd = KUNPENG_PMU::PerfEventOpen(&attr, -1, 0, -1, 0); + if (fd < 0) { + return false; + } + close(fd); + return true; +} + static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *attr, const char* evtName, const int group_id) { unique_ptr taskParam(CreateNode(), PmuTaskAttrFree); @@ -851,6 +869,9 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att } else { pmuEvt = GetPmuEvent(evtName, collectType); if (pmuEvt == nullptr) { + if (Perrorno() != SUCCESS) { + return nullptr; + } #ifdef IS_X86 New(LIBPERF_ERR_INVALID_EVENT, "Invalid event: " + string(evtName) + ";x86 just supports core event and raw event"); #else @@ -858,6 +879,18 @@ static struct PmuTaskAttr* AssignTaskParam(PmuTaskType collectType, PmuAttr *att #endif return nullptr; } + + if (!PerfEventSupported(pmuEvt->type, pmuEvt->config)) { + int err = MapErrno(errno); + if (err == LIBPERF_ERR_NO_PERMISSION) { + New(LIBPERF_ERR_NO_PERMISSION, "Current user does not have the permission to collect the event.Swtich to the root user and run the 'echo -1 > /proc/sys/kernel/perf_event_paranoid'"); + } else if(err == UNKNOWN_ERROR) { + New(UNKNOWN_ERROR, std::string{strerror(errno)}); + } else { + New(err); + } + return nullptr; + } } /** * Assign cpus to collect diff --git a/pmu/pmu_event_list.cpp b/pmu/pmu_event_list.cpp index ab83ffd..547ccb2 100644 --- a/pmu/pmu_event_list.cpp +++ b/pmu/pmu_event_list.cpp @@ -101,24 +101,6 @@ static void GetTraceSubFolder(const std::string& traceFolder, const string& devN closedir(dir); } -static bool PerfEventSupported(__u64 type, __u64 config) -{ - perf_event_attr attr{}; - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(struct perf_event_attr); - attr.type = type; - attr.config = config; - attr.disabled = 1; - attr.inherit = 1; - attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | PERF_FORMAT_ID; - int fd = KUNPENG_PMU::PerfEventOpen(&attr, -1, 0, -1, 0); - if (fd < 0) { - return false; - } - close(fd); - return true; -} - const char** QueryCoreEvent(unsigned *numEvt) { if (!coreEventList.empty()) { @@ -128,9 +110,6 @@ const char** QueryCoreEvent(unsigned *numEvt) auto coreEventMap = KUNPENG_PMU::CORE_EVENT_MAP.at(GetCpuType()); for (auto& pair : coreEventMap) { auto eventName = pair.first; - if (!PerfEventSupported(pair.second.type, pair.second.config)) { - continue; - } char* eventNameCopy = new char[eventName.length() + 1]; strcpy(eventNameCopy, eventName.c_str()); coreEventList.emplace_back(eventNameCopy); @@ -203,6 +182,11 @@ const char** QueryTraceEvent(unsigned *numEvt) struct dirent *entry; const string &traceFolder = GetTraceEventDir(); if (traceFolder.empty()) { + if (errno == EACCES) { + New(LIBPERF_ERR_NO_PERMISSION, "no permission to access '/sys/kernel/tracing/events/' or '/sys/kernel/debug/tracing/events/'"); + } else { + New(LIBPERF_ERR_INVALID_EVENT, "can't find '/sys/kernel/tracing/events/' or '/sys/kernel/debug/tracing/events/'"); + } return traceEventList.data(); } DIR *dir = opendir(traceFolder.c_str()); @@ -282,7 +266,6 @@ const char** PmuEventList(enum PmuEventType eventType, unsigned *numEvt) New(LIBPERF_ERR_QUERY_EVENT_LIST_FAILED, "Query event failed."); return nullptr; } - New(SUCCESS); return eventList; } -- Gitee From 97aa47bb18863a5677a9f45c58ae8bb3ad8e7226 Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 30 May 2025 10:47:23 +0800 Subject: [PATCH 30/35] =?UTF-8?q?go=E9=80=82=E9=85=8DResolvePmuDataSymbol?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go/src/libkperf/kperf/kperf.go | 25 +++++++++++++++++++ go/src/libkperf_test/libkperf_test.go | 36 +++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 99fb52d..b21f304 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -728,6 +728,31 @@ func PmuDumpData(dataVo PmuDataVo, filePath string, dumpDwf bool) error { } return nil } + +// When symbol mode is SNO_SYMBOL_RESOLVE, you can use this resolve PmuData Symbol after PmuRead function +// param PmuDataVo the data from PmuRead +// return nil indicates resolve success, otherwise return error code +func ResolvePmuDataSymbol(dataVo PmuDataVo) error { + err := C.ResolvePmuDataSymbol(dataVo.cData) + if int(err) != 0 { + return errors.New(C.GoString(C.Perror())) + } + dataLen := len(dataVo.GoData) + ptr := unsafe.Pointer(dataVo.cData) + slice := reflect.SliceHeader { + Data: uintptr(ptr), + Len: dataLen, + Cap: dataLen, + } + cPmuDatas := *(*[]C.struct_PmuData)(unsafe.Pointer(&slice)) + for i := 0; i < dataLen; i++ { + dataObj := cPmuDatas[i] + if dataObj.stack != nil { + dataVo.GoData[i].appendSymbols(dataObj) + } + } + return nil +} // Initialize the trace collection target // On success, a trace collect task id is returned which is the unique identity for the task diff --git a/go/src/libkperf_test/libkperf_test.go b/go/src/libkperf_test/libkperf_test.go index 28848fe..d06683a 100644 --- a/go/src/libkperf_test/libkperf_test.go +++ b/go/src/libkperf_test/libkperf_test.go @@ -287,3 +287,39 @@ func TestPmuGetCpuFreqDetail(t *testing.T) { kperf.PmuCloseCpuFreqSampling() } + +func TestResolvePmuDataSymbol(t *testing.T) { + attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} + fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) + if err != nil { + t.Fatalf("kperf pmuopen sample failed, expect err is nil, but is %v", err) + } + + kperf.PmuEnable(fd) + time.Sleep(time.Second) + kperf.PmuDisable(fd) + + dataVo, err := kperf.PmuRead(fd) + if err != nil { + t.Fatalf("kperf pmuread failed, expect err is nil, but is %v", err) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) != 0 { + t.Fatalf("expect symbol data is empty, but is not") + } + } + + parseErr := kperf.ResolvePmuDataSymbol(dataVo) + if parseErr != nil { + t.Fatalf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v", parseErr) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) == 0 { + t.Fatalf("expect symbol data is not empty, but is empty") + } + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(fd) +} \ No newline at end of file -- Gitee From 2bee826470cfc61846c7d4aa2b7fb6cb4b1a1cff Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Fri, 30 May 2025 10:16:28 +0800 Subject: [PATCH 31/35] =?UTF-8?q?=E6=8F=90=E4=BE=9BResolvePmuDataSymbol?= =?UTF-8?q?=E7=9A=84python=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/modules/_libkperf/Pmu.py | 10 ++++++++++ python/modules/kperf/pmu.py | 9 +++++++++ 2 files changed, 19 insertions(+) diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index e99eb94..ff11186 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -1572,6 +1572,15 @@ def PmuRead(pd: int) -> PmuData: c_data_len = c_PmuRead(c_pd, ctypes.byref(c_data_pointer)) return PmuData(c_data_pointer, c_data_len) +def ResolvePmuDataSymbol(pmuData: ctypes.POINTER(CtypesPmuData)) -> int: + """ + int ResolvePmuDataSymbol(struct PmuData* pmuData); + """ + c_ResolvePmuDataSymbol = kperf_so.ResolvePmuDataSymbol + c_ResolvePmuDataSymbol.argtypes = [ctypes.POINTER(CtypesPmuData)] + c_ResolvePmuDataSymbol.restype = ctypes.c_int + + return c_ResolvePmuDataSymbol(pmuData) def PmuAppendData(fromData: ctypes.POINTER(CtypesPmuData), toData: ctypes.POINTER(ctypes.POINTER(CtypesPmuData))) -> int: @@ -2039,4 +2048,5 @@ __all__ = [ 'PmuReadCpuFreqDetail', 'PmuCloseCpuFreqSampling', 'PmuCpuFreqDetail', + 'ResolvePmuDataSymbol' ] diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 90aeaf9..7453021 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -383,6 +383,14 @@ def read(pd: int) -> PmuData: """ return _libkperf.PmuRead(pd) +def resolvePmuDataSymbol(pmuData: PmuData) -> PmuData: + """ + when kperf symbol mode is NO_SYMBOL_RESOLVE during PmuRead(), this function can be used to resolve stack symbols + :param: pmuData + :return: pmu data + """ + return _libkperf.ResolvePmuDataSymbol(pmuData.pointer()) + def stop(pd: int) -> None: """ @@ -588,4 +596,5 @@ __all__ = [ 'open_cpu_freq_sampling', 'close_cpu_freq_sampling', 'read_cpu_freq_detail', + 'resolvePmuDataSymbol' ] -- Gitee From 591f3fb149eab3a350be24334f9774da02e6a857 Mon Sep 17 00:00:00 2001 From: twwang <920347125@qq.com> Date: Fri, 30 May 2025 17:34:05 +0800 Subject: [PATCH 32/35] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=87=BD=E6=95=B0?= =?UTF-8?q?=E8=BF=94=E5=9B=9E=E5=80=BC=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/modules/kperf/pmu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 7453021..6bbb474 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -383,7 +383,7 @@ def read(pd: int) -> PmuData: """ return _libkperf.PmuRead(pd) -def resolvePmuDataSymbol(pmuData: PmuData) -> PmuData: +def resolvePmuDataSymbol(pmuData: PmuData) -> int: """ when kperf symbol mode is NO_SYMBOL_RESOLVE during PmuRead(), this function can be used to resolve stack symbols :param: pmuData -- Gitee From 25785e3ee24ea1708eb1b33f5dadba1f073564dc Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Fri, 30 May 2025 17:18:32 +0800 Subject: [PATCH 33/35] =?UTF-8?q?=E4=B8=8D=E7=9B=B4=E6=8E=A5=E6=9A=B4?= =?UTF-8?q?=E9=9C=B2symbol=E6=8E=A5=E5=8F=A3=E4=BA=A7=E7=94=9F=E7=9A=84?= =?UTF-8?q?=E5=BC=82=E5=B8=B8,=20=E8=80=8C=E4=BD=9C=E4=B8=BAwarning?= =?UTF-8?q?=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pmu/pmu_list.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 2e6c4c3..d0a47c4 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -646,6 +646,12 @@ namespace KUNPENG_PMU { pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); } } + //Exceptions generated by the symbol interface are not directly exposed and are processed as warnings. + int err = Perrorno(); + if (err < LIBPERF_ERR_NO_AVAIL_PD && err >= LIBSYM_ERR_BASE) { + pcerr::SetWarn(err, Perror()); + New(SUCCESS); + } } int PmuList::ResolvePmuDataSymbol(struct PmuData* iPmuData) -- Gitee From d697d2cf98dc2fc2c09b5408df2551cdb1a6289e Mon Sep 17 00:00:00 2001 From: Galaxy Date: Wed, 4 Jun 2025 08:43:45 +0000 Subject: [PATCH 34/35] =?UTF-8?q?=E9=80=82=E9=85=8Done=20numa=20per=20sock?= =?UTF-8?q?et=E7=9A=84ddrc=E9=87=87=E9=9B=86=E5=8A=9F=E8=83=BD=20=E9=B2=B2?= =?UTF-8?q?=E9=B9=8F=E6=9E=B6=E6=9E=84=E4=B8=8B=EF=BC=8C=E5=AF=B9=E4=BA=8E?= =?UTF-8?q?one=20numa=20per=20socket=E7=9A=84=E5=9C=BA=E6=99=AF=EF=BC=8Cdd?= =?UTF-8?q?rc=20pmu=E8=AE=BE=E5=A4=87=E5=92=8Csocket=E7=9A=84=E6=98=A0?= =?UTF-8?q?=E5=B0=84=E5=85=B3=E7=B3=BB=E4=B8=8D=E5=8F=98=EF=BC=8C=E4=BD=86?= =?UTF-8?q?=E6=98=AF=E5=92=8Cnuma=E7=9A=84=E6=98=A0=E5=B0=84=E5=85=B3?= =?UTF-8?q?=E7=B3=BB=E5=8F=91=E7=94=9F=E4=BA=86=E5=8F=98=E5=8C=96=EF=BC=9A?= =?UTF-8?q?=20hisi=5Fsccl3=5FddrcX=20->=20socket=200=20numa=200=20hisi=5Fs?= =?UTF-8?q?ccl1=5FddrcX=20->=20socket=200=20numa=200=20hisi=5Fsccl11=5Fddr?= =?UTF-8?q?cX=20->=20socket=201=20numa=201=20hisi=5Fsccl9=5FddrcX=20->=20s?= =?UTF-8?q?ocket=201=20numa=201=20=E6=89=80=E4=BB=A5=E6=97=A0=E6=B3=95?= =?UTF-8?q?=E9=80=9A=E8=BF=87ddrcNumaId=E6=9D=A5=E5=88=A4=E6=96=ADpmu?= =?UTF-8?q?=E8=AE=BE=E5=A4=87=E5=92=8Cchannel=20id=E7=9A=84=E6=98=A0?= =?UTF-8?q?=E5=B0=84=E5=85=B3=E7=B3=BB=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 目前借助sccl id和ddrc id共同来决定channel id的映射: sccl 3 ddrc 0 -> socket 0 channel 0 sccl 3 ddrc 2 -> socket 0 channel 1 sccl 3 ddrc 3 -> socket 0 channel 2 sccl 3 ddrc 5 -> socket 0 channel 3 sccl 1 ddrc 0 -> socket 0 channel 4 sccl 1 ddrc 2 -> socket 0 channel 5 sccl 1 ddrc 3 -> socket 0 channel 6 sccl 1 ddrc 5 -> socket 0 channel 7 ... Signed-off-by: Galaxy --- pmu/pmu_metric.cpp | 60 ++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index d196c2f..66c70c4 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -38,6 +38,7 @@ using namespace std; using namespace pcerr; +using IdxMap = unordered_map>; static unsigned maxCpuNum = 0; static vector coreArray; @@ -1071,10 +1072,33 @@ namespace KUNPENG_PMU { return SUCCESS; } - static unordered_map> DDRC_CHANNEL_MAP = { - {CHIP_TYPE::HIPA, {0, 1, 2, 3}}, - {CHIP_TYPE::HIPB, {0, 2, 3, 5}} + static IdxMap DDRC_CHANNEL_MAP_HIPA = { + {1, {{0, 0}, {1, 1}, {2, 2}, {3, 3}}}, + {3, {{0, 4}, {1, 5}, {2, 6}, {3, 7}}}, + {5, {{0, 0}, {1, 1}, {2, 2}, {3, 3}}}, + {7, {{0, 4}, {1, 5}, {2, 6}, {3, 7}}}, }; + static IdxMap DDRC_CHANNEL_MAP_HIPB = { + {3, {{0, 0}, {2, 1}, {3, 2}, {5, 3}}}, + {1, {{0, 4}, {2, 5}, {3, 6}, {5, 7}}}, + {11, {{0, 0}, {2, 1}, {3, 2}, {5, 3}}}, + {9, {{0, 4}, {2, 5}, {3, 6}, {5, 7}}}, + }; + + static unordered_map DDRC_CHANNEL_MAP = { + {HIPA, DDRC_CHANNEL_MAP_HIPA}, + {HIPB, DDRC_CHANNEL_MAP_HIPB}, + }; + + static int ParseDDRIdx(const string &devName, const string prefix) + { + size_t ddrcPos = devName.find(prefix); + size_t channelIndex = ddrcPos + prefix.length(); + string ddrcIndexStr = devName.substr(channelIndex); + size_t separatorPos = ddrcIndexStr.find("_"); + int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); + return ddrcIndex; + } static bool getChannelId(const char *evt, const unsigned ddrNumaId, unsigned &channelId) { @@ -1084,28 +1108,22 @@ namespace KUNPENG_PMU { return false; } // ddrc channel index. eg: hisi_sccl3_ddrc3_1 --> 3_1 - string ddrcStr = "ddrc"; - size_t ddrcPos = devName.find(ddrcStr); - size_t channelIndex = ddrcPos + ddrcStr.length(); - string ddrcIndexStr = devName.substr(channelIndex); - // find index in DDRC_CHANNEL_MAP. eg: 3_1 --> 3, corresponds to channel 2 in HIPB - size_t separatorPos = ddrcIndexStr.find("_"); - int ddrcIndex = separatorPos != string::npos ? stoi(ddrcIndexStr.substr(0, separatorPos)) : stoi(ddrcIndexStr); + int ddrcIndex = ParseDDRIdx(devName, "ddrc"); + int scclIndex = ParseDDRIdx(devName, "sccl"); - unsigned channelAddNum = 0; - if((ddrNumaId & 1) == 1) { // channel id + 4 in sequence - channelAddNum = 4; - } CHIP_TYPE chipType = GetCpuType(); //get channel index if (DDRC_CHANNEL_MAP.find(chipType) == DDRC_CHANNEL_MAP.end()) { return false; } - auto ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; - auto it = find(ddrcChannelList.begin(), ddrcChannelList.end(), ddrcIndex); - if (it != ddrcChannelList.end()) { - size_t index = distance(ddrcChannelList.begin(), it); - channelId = index + channelAddNum; - return true; + + auto &ddrcChannelList = DDRC_CHANNEL_MAP[chipType]; + auto ddrIdxMap = ddrcChannelList.find(scclIndex); + if (ddrIdxMap != ddrcChannelList.end()) { + auto channelIdx = ddrIdxMap->second.find(ddrcIndex); + if (channelIdx != ddrIdxMap->second.end()) { + channelId = channelIdx->second; + return true; + } } return false; } @@ -1136,7 +1154,7 @@ namespace KUNPENG_PMU { outData.mode = GetMetricMode(data.metric); outData.channelId = channelId; outData.ddrNumaId = data.ddrNumaId; - outData.socketId = data.ddrNumaId < 2 ? 0 : 1; // numa id 0-1 --> socket id 0; numa id 2-3 --> socket id 1 + outData.socketId = data.socketId; devDataByChannel[ddrDatakey] = outData; } else { findData->second.count += data.count; -- Gitee From ac75a68c978da53a3bf1701bc0beb3574ba5911f Mon Sep 17 00:00:00 2001 From: echodo <2220386943@qq.com> Date: Tue, 3 Jun 2025 18:12:02 +0800 Subject: [PATCH 35/35] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=96=B0=E6=8E=A5?= =?UTF-8?q?=E5=8F=A3=E7=9A=84=E6=96=87=E6=A1=A3=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Go_API.md | 79 +++++++++++++++++++++++++++++++++++++++++++++- docs/Python_API.md | 51 ++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 1 deletion(-) diff --git a/docs/Go_API.md b/docs/Go_API.md index 7199142..fd0a346 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -438,7 +438,8 @@ func main() { ``` -### kperf.PmuGetCpuFreq +### kperf.PmuGetCpuFreq + func PmuGetCpuFreq(core uint) (int64, error) 查询当前系统指定core的实时CPU频率 * core cpu coreId @@ -457,4 +458,80 @@ func main() { } fmt.Printf("coreId %v freq is %v\n", coreId, freq) } +``` + +### kperf.PmuOpenCpuFreqSampling + +func PmuOpenCpuFreqSampling(period uint) (error) 开启cpu频率采集 + +### kperf.PmuCloseCpuFreqSampling + +func PmuCloseCpuFreqSampling() 关闭cpu频率采集 + +### kperf.PmuReadCpuFreqDetail + +func PmuReadCpuFreqDetail() ([]PmuCpuFreqDetail) 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 +```go +import "libkperf/kperf" +import "fmt" + +func main() { + err := kperf.PmuOpenCpuFreqSampling(100) + if err != nil { + fmt.Printf("kperf PmuOpenCpuFreqSampling failed, expect err is nil, but is %v", err) + } + + freqList := kperf.PmuReadCpuFreqDetail() + for _, v := range freqList { + fmt.Printf("cpuId=%v, minFreq=%d, maxFreq=%d, avgFreq=%d", v.CpuId, v.MinFreq, v.MaxFreq, v.AvgFreq) + } + + kperf.PmuCloseCpuFreqSampling() +} +``` + +### kperf.ResolvePmuDataSymbol + +func ResolvePmuDataSymbol(dataVo PmuDataVo) error 当SymbolMode不设置或者设置为0时,可通过该接口解析PmuRead返回的PmuData数据中的符号 +```go +import "libkperf/kperf" +import "fmt" + +func main() { + attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, CallStack:true, SampleRate: 1000, UseFreq:true} + fd, err := kperf.PmuOpen(kperf.SAMPLE, attr) + if err != nil { + fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v", err) + return + } + + kperf.PmuEnable(fd) + time.Sleep(time.Second) + kperf.PmuDisable(fd) + + dataVo, err := kperf.PmuRead(fd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v", err) + return + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) != 0 { + fmt.Printf("expect symbol data is empty, but is not") + } + } + + parseErr := kperf.ResolvePmuDataSymbol(dataVo) + if parseErr != nil { + fmt.Printf("kperf ResolvePmuDataSymbol failed, expect err is nil, but is %v", parseErr) + } + + for _, o := range dataVo.GoData { + if len(o.Symbols) == 0 { + fmt.Printf("expect symbol data is not empty, but is empty") + } + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(fd) +} ``` \ No newline at end of file diff --git a/docs/Python_API.md b/docs/Python_API.md index de323e9..a0a1968 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -428,4 +428,55 @@ kperf.get_numa_core(numaId: int): 查询指定numaId下对应的core列表 # python代码示例 numaId = 1 numa_cores = kperf.get_numa_core(numaId) +``` + +### kperf.open_cpu_freq_sampling + +def open_cpu_freq_sampling(period: int) 开启cpu频率采集 + +### kperf.close_cpu_freq_sampling + +def close_cpu_freq_sampling() 关闭cpu频率采集 + +### kperf.read_cpu_freq_detail + +def read_cpu_freq_detail() -> CpuFreqDetail 读取开启频率采集到读取时间内的cpu最大频率、最小频率以及平均频率 +```python +#python代码示例 +err = kperf.open_cpu_freq_sampling(100) +if err != 0: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +dataList = kperf.read_cpu_freq_detail() +for item in dataList.iter: + print(f"cpuId={item.cpuId} minFreq={item.minFreq} maxFreq={item.maxFreq} avgFreq={item.avgFreq}") + +kperf.close_cpu_freq_sampling() +``` + +### kperf.resolvePmuDataSymbol + +def resolvePmuDataSymbol(pmuData: PmuData) -> int: 当SymbolMode不设置或者设置为0时,可通过该接口解析read返回的PmuData数据中的符号 +```python +#python代码示例 +event_name = "cycles" +pmu_attr = kperf.PmuAttr( + evtList=[event_name], + sampleRate=1000, + callStack=True, + useFreq=True, +) +fd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) +if fd == -1: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +kperf.enable(fd) +time.sleep(1) +kperf.disable(fd) +pmu_data = kperf.read(fd) +err = kperf.resolvePmuDataSymbol(pmu_data) +if err != 0: + print(f"error number: {kperf.errorno()} error message: {kperf.error()}") + exit(1) +kperf.close(fd) ``` \ No newline at end of file -- Gitee