From 0ce089a645a54f77d8284c7982d113c0c7c5f653 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Mon, 21 Jul 2025 19:26:07 +0800 Subject: [PATCH] add PCIE latency uncore event && revise unit of PCIE bandwidth --- docs/C_C++_API.md | 17 ++- docs/Details_Usage.md | 119 +++++++++++++++++-- docs/Go_API.md | 15 ++- docs/Python_API.md | 15 ++- go/src/libkperf/kperf/kperf.go | 32 +++++- include/pcerrc.h | 7 +- include/pmu.h | 19 +++- pmu/pmu_metric.cpp | 196 ++++++++++++++++++++++++-------- python/modules/_libkperf/Pmu.py | 47 +++++++- python/modules/kperf/perror.py | 7 +- python/modules/kperf/pmu.py | 30 +++-- python/tests/test_metric.py | 20 ++++ test/test_perf/test_metric.cpp | 34 ++++++ util/common.cpp | 4 +- 14 files changed, 463 insertions(+), 99 deletions(-) diff --git a/docs/C_C++_API.md b/docs/C_C++_API.md index a47397f..5dfe421 100644 --- a/docs/C_C++_API.md +++ b/docs/C_C++_API.md @@ -239,12 +239,18 @@ * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count * PMU_L3_LAT 采集每个cluster的L3的总时延,单位:cycles - * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/ns - * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/ns - * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns - * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns + * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/us + * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/us + * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/us + * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/us + * PMU_PCIE_RX_MRD_LAT 采集pcie设备的rx方向上的读延时,单位:ns + * PMU_PCIE_RX_MWR_LAT 采集pcie设备的rx方向上的写延时,单位:ns + * PMU_PCIE_TX_MRD_LAT 采集pcie设备的tx方向上的读延时,单位:ns * PMU_SMMU_TRAN 采集指定smmu设备的地址转换次数,单位:count - * char *bdf: 指定需要采集设备的bdf号,只对pcie和smmu指标有效 + * PMU_HHA_CROSS_NUMA 采集每个numa的跨numa访问HHA的操作比例 + * PMU_HHA_CROSS_SOCKET 采集每个numa的跨socket访问HHA的操作比例 + * char *bdf: 指定需要采集设备的bdf号,只对pcie带宽和smmu指标有效 + * char *port: 指定需要采集设备的port号,只对pcie延时指标有效 * 与PmuOpen类似,返回task Id 返回值 > 0 初始化成功 返回值 = -1 初始化失败,可通过Perror()查看错误信息 @@ -267,6 +273,7 @@ * unsigned numaId: 数据的numa编号 * unsigned clusterId: 簇ID * unsigned bdf: 数据的bdf编号 + * unsigned port: 数据的port编号 * struct: ddr相关的统计数据 * unsigned channelId: ddr数据的channel编号 * unsigned ddrNumaId: ddr数据的numa编号 diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 5eba4b3..850a76c 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -689,9 +689,9 @@ for (int i=0;i +#include "symbol.h" +#include "pmu.h" + +PmuDeviceAttr devAttr[1]; +// 采集PCIE设备RX的读延时,PMU_PCIE_RX_MWR_LAT为RX的写延时,PMU_PCIE_TX_MRD_LAT为TX的读延时 +devAttr[0].metric = PMU_PCIE_RX_MRD_LAT; +// 设置PCIE的port号 +devAttr[0].port = "c0:00.0"; +// 初始化采集任务 +int pd = PmuDeviceOpen(devAttr, 1); +// 开始采集 +PmuEnable(pd); +sleep(1); +PmuData *oriData = nullptr; +int oriLen = PmuRead(pd, &oriData); +PmuDeviceData *devData = nullptr; +auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 1, &devData); +// devData的长度等于pcie设备的个数 +for (int i=0;i 0表示初始化成功,pd == -1初始化失败,可通过kperf.error()查看错误信息,以下是一个kperf.device_open的示例 ```go @@ -393,6 +397,7 @@ func PmuGetDevMetric(dataVo PmuDataVo, deviceAttr []PmuDeviceAttr) (PmuDeviceDat * NumaId uint32 数据的numa编号 * ClusterId uint32 簇ID * Bdf string 数据的bdf编号 + * Port string 数据的port编号 * DdrDataStructure ddr相关的统计数据 ### kperf.DevDataFree diff --git a/docs/Python_API.md b/docs/Python_API.md index 3f6e21f..e0f28ef 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -345,14 +345,18 @@ kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指 * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count * PMU_L3_LAT 采集每个cluster的L3的总时延,单位:cycles - * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/ns - * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/ns - * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns - * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns + * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/us + * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/us + * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/us + * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/us + * PMU_PCIE_RX_MRD_LAT 采集pcie设备的rx方向上的读延时,单位:ns + * PMU_PCIE_RX_MWR_LAT 采集pcie设备的rx方向上的写延时,单位:ns + * PMU_PCIE_TX_MRD_LAT 采集pcie设备的tx方向上的读延时,单位:ns * PMU_SMMU_TRAN 采集指定smmu设备的地址转换次数,单位:count * PMU_HHA_CROSS_NUMA 采集每个numa的跨numa访问HHA的操作比例 * PMU_HHA_CROSS_SOCKET 采集每个numa的跨socket访问HHA的操作比例 - * bdf: 指定需要采集设备的bdf号,只对pcie和smmu指标有效 + * bdf: 指定需要采集设备的bdf号,只对pcie带宽和smmu指标有效 + * port: 指定需要采集设备的port号,只对pcie延时指标有效 * 返回值是int类型,pd > 0表示初始化成功,pd == -1初始化失败,可通过kperf.error()查看错误信息,以下是一个kperf.device_open的示例 ```python @@ -391,6 +395,7 @@ kperf.get_device_metric(pmu_data: PmuData, device_attr: List[PmuDeviceAttr]) 对 * numaId: 数据的numa编号 * cluster: 簇ID * bdf: 数据的bdf编号 + * port: 数据的port编号 * DdrDataStructure: ddr相关的统计数据 diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 2d46d6d..b6c45de 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -38,6 +38,7 @@ struct MetricDataExt { unsigned coreId; unsigned clusterId; char* bdf; + char* port; unsigned channelId; unsigned ddrNumaId; unsigned socketId; @@ -122,6 +123,7 @@ void IPmuGetMetricDataExt(struct PmuDeviceData* deviceData, struct MetricDataExt break; case PMU_METRIC_BDF: metricData->bdf = deviceData->bdf; + metricData->port = deviceData->port; break; case PMU_METRIC_CLUSTER: metricData->clusterId = deviceData->clusterId; @@ -273,14 +275,23 @@ var ( // Collect pcie rx bandwidth. // Perpcie metric. // Collect pcie rx bandwidth for pcie device. - // Unit: Bytes/s + // Unit: Bytes/us PMU_PCIE_RX_MRD_BW C.enum_PmuDeviceMetric = C.PMU_PCIE_RX_MRD_BW PMU_PCIE_RX_MWR_BW C.enum_PmuDeviceMetric = C.PMU_PCIE_RX_MWR_BW // Perpcie metric. // Collect pcie tx bandwidth for pcie device. - // Unit: Bytes/s + // Unit: Bytes/us PMU_PCIE_TX_MRD_BW C.enum_PmuDeviceMetric = C.PMU_PCIE_TX_MRD_BW PMU_PCIE_TX_MWR_BW C.enum_PmuDeviceMetric = C.PMU_PCIE_TX_MWR_BW + // Perpcie metric. + // Collect pcie rx latency for pcie device. + // Unit: ns + PMU_PCIE_RX_MRD_LAT C.enum_PmuDeviceMetric = C.PMU_PCIE_RX_MRD_LAT + PMU_PCIE_RX_MWR_LAT C.enum_PmuDeviceMetric = C.PMU_PCIE_RX_MWR_LAT + // Perpcie metric. + // Collect pcie tx latency for pcie device. + // Unit: ns + PMU_PCIE_TX_MRD_LAT C.enum_PmuDeviceMetric = C.PMU_PCIE_TX_MRD_LAT // Perpcie metric. // Collect smmu address transaction. // Unit: count @@ -412,9 +423,12 @@ type PmuTraceDataVo struct { type PmuDeviceAttr struct { Metric C.enum_PmuDeviceMetric - // Used for PMU_PCIE_XXX and PMU_SMMU_XXX to collect a specific pcie device. + // Used for PMU_PCIE_XXX_BW and PMU_SMMU_XXX to collect a specific pcie device. // The string of bdf is something like '7a:01.0'. Bdf string + // Used for PMU_PCIE_XXX_LAT to collect latency data. + // Only one port supported. + Port string } type DdrDataStructure struct { @@ -433,6 +447,7 @@ type PmuDeviceData struct { NumaId uint32 // for pernuma metric ClusterId uint32 // for percluster metric Bdf string // for perpcie metric + Port string // for perpcie metric DdrDataStructure // for perchannel metric } @@ -1027,6 +1042,11 @@ func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) { } else { cAttr[i].bdf = nil } + if len(v.Port) > 0 { + cAttr[i].port = C.CString(v.Port) + } else { + cAttr[i].port = nil + } } deviceTaskId := C.PmuDeviceOpen(&cAttr[0], C.uint(len(attr))) if int(deviceTaskId) == -1 { @@ -1052,6 +1072,11 @@ func PmuGetDevMetric(dataVo PmuDataVo, deviceAttr []PmuDeviceAttr) (PmuDeviceDat } else { cAttr[i].bdf = nil } + if len(v.Port) > 0 { + cAttr[i].port = C.CString(v.Port) + } else { + cAttr[i].port = nil + } } metricLen := C.int(0) metricData := C.IPmuGetMetric(dataVo.cData, C.uint(len(dataVo.GoData)), &cAttr[0], C.uint(len(deviceAttr)), &metricLen) @@ -1078,6 +1103,7 @@ func PmuGetDevMetric(dataVo PmuDataVo, deviceAttr []PmuDeviceAttr) (PmuDeviceDat goDeviceList[i].NumaId = uint32(metricDataExt.numaId) goDeviceList[i].ClusterId = uint32(metricDataExt.clusterId) goDeviceList[i].Bdf = C.GoString(metricDataExt.bdf) + goDeviceList[i].Port = C.GoString(metricDataExt.port) goDeviceList[i].ChannelId = uint32(metricDataExt.channelId) goDeviceList[i].DdrNumaId = uint32(metricDataExt.ddrNumaId) goDeviceList[i].SocketId = uint32(metricDataExt.socketId) diff --git a/include/pcerrc.h b/include/pcerrc.h index 3b03821..fbd3522 100644 --- a/include/pcerrc.h +++ b/include/pcerrc.h @@ -98,11 +98,11 @@ extern "C" { #define LIBPERF_ERR_INVALID_PMU_DEVICES_BDF 1051 #define LIBPERF_ERR_OPEN_INVALID_FILE 1052 #define LIBPERF_ERR_INVALID_BDF_VALUE 1053 -#define LIBPERF_ERR_NOT_SOUUPUT_PCIE_BDF 1054 -#define LIBPERF_ERR_NOT_SOUUPUT_SMMU_BDF 1055 +#define LIBPERF_ERR_NOT_SUPPORT_PCIE_BDF 1054 +#define LIBPERF_ERR_NOT_SUPPORT_SMMU_BDF 1055 #define LIBPERF_ERR_INVALID_IOSMMU_DIR 1056 #define LIBPERF_ERR_INVALID_SMMU_NAME 1057 -#define LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING 1058 +#define LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING 1058 #define LIBPERF_ERR_OPEN_PCI_FILE_FAILD 1059 #define LIBPERF_ERR_INVALID_MTRIC_PARAM 1060 #define LIBPERF_ERR_PMU_DEVICES_NULL 1061 @@ -116,6 +116,7 @@ extern "C" { #define LIBPERF_ERR_INVALID_CGROUP_LIST 1069 #define LIBPERF_ERR_NOT_SUPPORT_PMU_FILE 1070 #define LIBPERF_ERR_INVALID_PMU_FILE 1071 +#define LIBPERF_ERR_NOT_SUPPORT_PCIE_PORT 1072 #define UNKNOWN_ERROR 9999 diff --git a/include/pmu.h b/include/pmu.h index dcc1833..1cbff17 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -455,15 +455,24 @@ enum PmuDeviceMetric { // Collect pcie rx bandwidth. // Perpcie metric. // Collect pcie rx bandwidth for pcie device. - // Unit: Bytes/ns + // Unit: Bytes/us PMU_PCIE_RX_MRD_BW, PMU_PCIE_RX_MWR_BW, // Perpcie metric. // Collect pcie tx bandwidth for pcie device. - // Unit: Bytes/ns + // Unit: Bytes/us PMU_PCIE_TX_MRD_BW, PMU_PCIE_TX_MWR_BW, // Perpcie metric. + // Collect pcie rx latency for pcie device. + // Unit: ns + PMU_PCIE_RX_MRD_LAT, + PMU_PCIE_RX_MWR_LAT, + // Perpcie metric. + // Collect pcie tx latency for pcie device. + // Unit: ns + PMU_PCIE_TX_MRD_LAT, + // Perpcie metric. // Collect smmu address transaction. // Unit: count PMU_SMMU_TRAN, @@ -478,9 +487,12 @@ enum PmuDeviceMetric { struct PmuDeviceAttr { enum PmuDeviceMetric metric; - // Used for PMU_PCIE_XXX and PMU_SMMU_XXX to collect a specifi pcie device. + // Used for PMU_PCIE_XXX_BW and PMU_SMMU_XXX to collect a specifi pcie device. // The string of bdf is something like '7a:01.0'. char *bdf; + // Used for PMU_PCIE_XXX_LAT to collect latency data. + // Only one port supported. + char *port; }; enum PmuBdfType { @@ -533,6 +545,7 @@ struct PmuDeviceData { unsigned clusterId; // for perpcie metric char *bdf; + char *port; // for perchannel metric of ddr struct { unsigned channelId; diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 64c9908..7541325 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -84,6 +84,9 @@ namespace KUNPENG_PMU { {PmuDeviceMetric::PMU_PCIE_RX_MWR_BW, "PMU_PCIE_RX_MWR_BW"}, {PmuDeviceMetric::PMU_PCIE_TX_MRD_BW, "PMU_PCIE_TX_MRD_BW"}, {PmuDeviceMetric::PMU_PCIE_TX_MWR_BW, "PMU_PCIE_TX_MWR_BW"}, + {PmuDeviceMetric::PMU_PCIE_RX_MRD_LAT, "PMU_PCIE_RX_MRD_LAT"}, + {PmuDeviceMetric::PMU_PCIE_RX_MWR_LAT, "PMU_PCIE_RX_MWR_LAT"}, + {PmuDeviceMetric::PMU_PCIE_TX_MRD_LAT, "PMU_PCIE_TX_MRD_LAT"}, {PmuDeviceMetric::PMU_SMMU_TRAN, "PMU_SMMU_TRAN"}, {PmuDeviceMetric::PMU_HHA_CROSS_NUMA, "PMU_HHA_CROSS_NUMA"}, {PmuDeviceMetric::PMU_HHA_CROSS_SOCKET, "PMU_HHA_CROSS_SOCKET"}, @@ -97,6 +100,9 @@ namespace KUNPENG_PMU { PMU_PCIE_RX_MWR_BW, PMU_PCIE_TX_MRD_BW, PMU_PCIE_TX_MWR_BW, + PMU_PCIE_RX_MRD_LAT, + PMU_PCIE_RX_MWR_LAT, + PMU_PCIE_TX_MRD_LAT, PMU_SMMU_TRAN}; static bool IsBdfMetric(PmuDeviceMetric metric) @@ -260,6 +266,42 @@ namespace KUNPENG_PMU { } }; + PMU_METRIC_PAIR PCIE_RX_MRD_LAT = { + PmuDeviceMetric::PMU_PCIE_RX_MRD_LAT, + { + "hisi_pcie", + "core", + {"0x0210", "0x10210"}, + "", + "port=", + 1 + } + }; + + PMU_METRIC_PAIR PCIE_RX_MWR_LAT = { + PmuDeviceMetric::PMU_PCIE_RX_MWR_LAT, + { + "hisi_pcie", + "core", + {"0x0010", "0x10010"}, + "", + "port=", + 1 + } + }; + + PMU_METRIC_PAIR PCIE_TX_MRD_LAT = { + PmuDeviceMetric::PMU_PCIE_TX_MRD_LAT, + { + "hisi_pcie", + "core", + {"0x0011", "0x10011"}, + "", + "port=", + 1 + } + }; + PMU_METRIC_PAIR SMMU_TRAN = { PmuDeviceMetric::PMU_SMMU_TRAN, { @@ -319,6 +361,9 @@ namespace KUNPENG_PMU { METRIC_CONFIG::PCIE_RX_MWR_BW, METRIC_CONFIG::PCIE_TX_MRD_BW, METRIC_CONFIG::PCIE_TX_MWR_BW, + METRIC_CONFIG::PCIE_RX_MRD_LAT, + METRIC_CONFIG::PCIE_RX_MWR_LAT, + METRIC_CONFIG::PCIE_TX_MRD_LAT, METRIC_CONFIG::SMMU_TRAN, METRIC_CONFIG::HHA_CROSS_NUMA, METRIC_CONFIG::HHA_CROSS_SOCKET, @@ -512,8 +557,8 @@ namespace KUNPENG_PMU { } auto classifiedDevices = ClassifyDevicesByPrefix(pcieConfig.devicePrefix, pcieConfig.subDeviceName, pcieConfig.splitPosition); if (classifiedDevices.empty()) { - New(LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING, "No pcie pmu device is not exist in the " + SYS_DEVICES); - return LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING; + New(LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING, "No pcie pmu device is not exist in the " + SYS_DEVICES); + return LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING; } for (const auto& device : classifiedDevices) { const auto& pcieNuma = device.first; @@ -521,8 +566,8 @@ namespace KUNPENG_PMU { for (auto& pciePmu : pciePmus) { string bdfBusPath = SYS_DEVICES + "/" + pciePmu + "/bus"; if (!ExistPath(bdfBusPath)) { - New(LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING, "pcie pmu bus file is empty"); - return LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING; + New(LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING, "pcie pmu bus file is empty"); + return LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING; } string bdfMinPath = SYS_DEVICES + "/" + pciePmu + "/bdf_min"; string bdfMaxPath = SYS_DEVICES + "/" + pciePmu + "/bdf_max"; @@ -532,8 +577,8 @@ namespace KUNPENG_PMU { try { bus = stoul(bdfBusStr, nullptr, 16); } catch (const std::exception& e) { - New(LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING, "pcie pmu bus file is invalid"); - return LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING; + New(LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING, "pcie pmu bus file is invalid"); + return LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING; } if (bus == 0) { continue; @@ -547,8 +592,8 @@ namespace KUNPENG_PMU { string bdfMinStr = ReadFileContent(bdfMinPath); string bdfMaxStr = ReadFileContent(bdfMaxPath); if (bdfMinStr.empty() || bdfMaxStr.empty()) { - New(LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING, "pcie pmu bdfMin or bdfMax file is empty"); - return LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING; + New(LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING, "pcie pmu bdfMin or bdfMax file is empty"); + return LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING; } int bdfMin = 0; int bdfMax = 0; @@ -556,8 +601,8 @@ namespace KUNPENG_PMU { bdfMin = stoul(bdfMinStr, nullptr, 16); bdfMax = stoul(bdfMaxStr, nullptr, 16); } catch (const std::exception& e) { - New(LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING, "pcie pmu bdfMin or bdfMax file is invalid"); - return LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING; + New(LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING, "pcie pmu bdfMin or bdfMax file is invalid"); + return LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING; } pciePmuBdfRang[pciePmu] = std::make_tuple(bdfBusStr.substr(strlen("0x")), bdfMin, bdfMax); } @@ -708,8 +753,8 @@ namespace KUNPENG_PMU { { auto it = bdfToSmmuPmuMap.find(bdf); if (it == bdfToSmmuPmuMap.end()) { - New(LIBPERF_ERR_NOT_SOUUPUT_SMMU_BDF, "BDF Value " + bdf + " not found in any SMMU Directory."); - return LIBPERF_ERR_NOT_SOUUPUT_SMMU_BDF; + New(LIBPERF_ERR_NOT_SUPPORT_SMMU_BDF, "BDF Value " + bdf + " not found in any SMMU Directory."); + return LIBPERF_ERR_NOT_SUPPORT_SMMU_BDF; } smmuPmuName = it->second; New(SUCCESS); @@ -720,8 +765,8 @@ namespace KUNPENG_PMU { { auto bdfIt = bdfToPcieMap.find(bdf); if (bdfIt == bdfToPcieMap.end()) { - New(LIBPERF_ERR_NOT_SOUUPUT_PCIE_BDF, "bdf value " + bdf + " is not managed by any PCIe Device."); - return LIBPERF_ERR_NOT_SOUUPUT_PCIE_BDF; + New(LIBPERF_ERR_NOT_SUPPORT_PCIE_BDF, "bdf value " + bdf + " is not managed by any PCIe Device."); + return LIBPERF_ERR_NOT_SUPPORT_PCIE_BDF; } pciePmuName = bdfIt->second; New(SUCCESS); @@ -733,15 +778,24 @@ namespace KUNPENG_PMU { { vector eventList; if (IsBdfMetric(deviceAttr.metric)) { - string bdf = deviceAttr.bdf; + string bdf; + string port; for (const auto& evt : metricConfig.events) { string device = ""; if (metricConfig.bdfParameter == "bdf=") { + bdf = deviceAttr.bdf; int err = FindPcieDeviceByBdf(bdf, device); if (err != SUCCESS) { return {}; } - } else { + } else if (metricConfig.bdfParameter == "port=") { + port = deviceAttr.port; + int err = FindPcieDeviceByBdf(port, device); + if (err != SUCCESS) { + return {}; + } + } else if (metricConfig.bdfParameter == "filter_stream_id=") { + bdf = deviceAttr.bdf; int err = FindSmmuDeviceByBdf(bdf, device); if (err != SUCCESS) { return {}; @@ -751,12 +805,16 @@ namespace KUNPENG_PMU { if (!metricConfig.extraConfig.empty()) { eventString += "," + metricConfig.extraConfig; } - if (!metricConfig.bdfParameter.empty() && !bdf.empty()) { - stringstream bdfValue; + if (!metricConfig.bdfParameter.empty()) { + stringstream paramValue; uint16_t userBdf = 0; - ConvertBdfStringToValue(bdf, userBdf); - bdfValue << "0x" << hex << userBdf; - eventString += "," + metricConfig.bdfParameter + bdfValue.str(); + if (!bdf.empty()) { + ConvertBdfStringToValue(bdf, userBdf); + } else { + ConvertBdfStringToValue(port, userBdf); + } + paramValue << "0x" << hex << userBdf; + eventString += "," + metricConfig.bdfParameter + paramValue.str(); } eventString += "/"; eventList.push_back(eventString); @@ -840,20 +898,37 @@ namespace KUNPENG_PMU { static int CheckBdf(struct PmuDeviceAttr& deviceAttr) { - if (IsBdfMetric(deviceAttr.metric) && deviceAttr.bdf == nullptr) { - New(LIBPERF_ERR_INVALID_PMU_DEVICES_BDF, "When collecting pcie or smmu metric, bdf value can not is nullptr!"); - return LIBPERF_ERR_INVALID_PMU_DEVICES_BDF; + if (deviceAttr.metric >= PmuDeviceMetric::PMU_PCIE_RX_MRD_BW && deviceAttr.metric <= PmuDeviceMetric::PMU_PCIE_TX_MWR_BW) { + if (deviceAttr.bdf == nullptr) { + New(LIBPERF_ERR_INVALID_PMU_DEVICES_BDF, "When collecting pcie bandwidth, bdf value can not be nullptr!"); + return LIBPERF_ERR_INVALID_PMU_DEVICES_BDF; + } + if (!CheckPcieBdf(deviceAttr.bdf)) { + New(LIBPERF_ERR_NOT_SUPPORT_PCIE_BDF, "this bdf not support pcie metric counting." + " Please use PmuDeviceBdfList to query."); + return LIBPERF_ERR_NOT_SUPPORT_PCIE_BDF; + } } - if (deviceAttr.metric >= PmuDeviceMetric::PMU_PCIE_RX_MRD_BW && deviceAttr.metric <= PmuDeviceMetric::PMU_PCIE_TX_MWR_BW - && !CheckPcieBdf(deviceAttr.bdf)) { - New(LIBPERF_ERR_NOT_SOUUPUT_PCIE_BDF, "this bdf not support pcie metric counting." - " Please use PmuDeviceBdfList to query."); - return LIBPERF_ERR_NOT_SOUUPUT_PCIE_BDF; + if (deviceAttr.metric >= PmuDeviceMetric::PMU_PCIE_RX_MRD_LAT && deviceAttr.metric <= PmuDeviceMetric::PMU_PCIE_TX_MRD_LAT) { + if (deviceAttr.port == nullptr) { + New(LIBPERF_ERR_NOT_SUPPORT_PCIE_PORT, "When collect pcie latency, the port can not be nullptr!"); + return LIBPERF_ERR_NOT_SUPPORT_PCIE_PORT; + } + if (!CheckPcieBdf(deviceAttr.port)) { + New(LIBPERF_ERR_NOT_SUPPORT_PCIE_PORT, "the port not support pcie metric counting. "); + return LIBPERF_ERR_NOT_SUPPORT_PCIE_PORT; + } } - if (deviceAttr.metric == PmuDeviceMetric::PMU_SMMU_TRAN && !CheckSmmuBdf(deviceAttr.bdf)) { - New(LIBPERF_ERR_NOT_SOUUPUT_SMMU_BDF, "this bdf not support smmu metric counting." - " Please use PmuDeviceBdfList to query."); - return LIBPERF_ERR_NOT_SOUUPUT_SMMU_BDF; + if (deviceAttr.metric == PmuDeviceMetric::PMU_SMMU_TRAN) { + if (deviceAttr.bdf == nullptr) { + New(LIBPERF_ERR_INVALID_PMU_DEVICES_BDF, "When collecting smmu metric, bdf value can not be nullptr!"); + return LIBPERF_ERR_INVALID_PMU_DEVICES_BDF; + } + if (!CheckSmmuBdf(deviceAttr.bdf)) { + New(LIBPERF_ERR_NOT_SUPPORT_SMMU_BDF, "this bdf not support smmu metric counting." + " Please use PmuDeviceBdfList to query."); + return LIBPERF_ERR_NOT_SUPPORT_SMMU_BDF; + } } New(SUCCESS); return SUCCESS; @@ -890,7 +965,11 @@ namespace KUNPENG_PMU { for (int i = 0; i < len; ++i) { std::string key = ""; if (IsBdfMetric(attr[i].metric)) { - key = std::to_string(attr[i].metric) + "_" + attr[i].bdf; + if (attr[i].bdf != nullptr) { + key = std::to_string(attr[i].metric) + "_" + attr[i].bdf; + } else { + key = std::to_string(attr[i].metric) + "_" + attr[i].port; + } } else { key = std::to_string(attr[i].metric); } @@ -912,6 +991,7 @@ namespace KUNPENG_PMU { unsigned numaId; unsigned clusterId; char *bdf; + char *port; struct { unsigned channelId; unsigned ddrNumaId; @@ -987,6 +1067,9 @@ namespace KUNPENG_PMU { case PMU_PCIE_RX_MWR_BW: case PMU_PCIE_TX_MRD_BW: case PMU_PCIE_TX_MWR_BW: + case PMU_PCIE_RX_MWR_LAT: + case PMU_PCIE_RX_MRD_LAT: + case PMU_PCIE_TX_MRD_LAT: case PMU_SMMU_TRAN: return PMU_METRIC_BDF; case PMU_HHA_CROSS_NUMA: @@ -1241,7 +1324,7 @@ namespace KUNPENG_PMU { return SUCCESS; } - int PcieBWAggregate(const PmuDeviceMetric metric, const vector &rawData, vector &devData) + int PcieAggregate(const PmuDeviceMetric metric, const vector &rawData, vector &devData) { const auto& deviceConfig = GetDeviceMtricConfig(); const auto& findConfig = deviceConfig.find(metric); @@ -1276,16 +1359,21 @@ namespace KUNPENG_PMU { if (findLenData == data.second.end() || findLatData == data.second.end()) { continue; } - // Compute bandwidth: (packet length)/(latency) - double bw = 0.0; + + double value = 0.0; if (findLatData->second.count != 0) { - bw = (double)(4 * findLenData->second.count) / findLatData->second.count; + // Compute average latency: (total latency) / (total sum), unit: ns + value = (double)(findLenData->second.count) / findLatData->second.count; + if (metric >= PmuDeviceMetric::PMU_PCIE_RX_MRD_BW && metric <= PmuDeviceMetric::PMU_PCIE_TX_MWR_BW) { + // Compute bandwidth: (packet length) * 4 / (latency), unit: Bytes/us + value *= 4; + } } else { - bw = -1; + value = -1; } PmuDeviceData outData; outData.metric = metric; - outData.count = bw; + outData.count = value; outData.mode = GetMetricMode(metric); outData.bdf = findLenData->second.bdf; devData.push_back(outData); @@ -1323,14 +1411,18 @@ namespace KUNPENG_PMU { {PMU_DDR_WRITE_BW, DDRBw}, {PMU_L3_TRAFFIC, L3Bw}, {PMU_L3_LAT, L3Lat}}; + map aggregateMap = { {PMU_DDR_READ_BW, AggregateByChannel}, {PMU_DDR_WRITE_BW, AggregateByChannel}, {PMU_L3_LAT, AggregateByCluster}, - {PMU_PCIE_RX_MRD_BW, PcieBWAggregate}, - {PMU_PCIE_RX_MWR_BW, PcieBWAggregate}, - {PMU_PCIE_TX_MRD_BW, PcieBWAggregate}, - {PMU_PCIE_TX_MWR_BW, PcieBWAggregate}, + {PMU_PCIE_RX_MRD_BW, PcieAggregate}, + {PMU_PCIE_RX_MWR_BW, PcieAggregate}, + {PMU_PCIE_TX_MRD_BW, PcieAggregate}, + {PMU_PCIE_TX_MWR_BW, PcieAggregate}, + {PMU_PCIE_RX_MRD_LAT, PcieAggregate}, + {PMU_PCIE_RX_MWR_LAT, PcieAggregate}, + {PMU_PCIE_TX_MRD_LAT, PcieAggregate}, {PMU_SMMU_TRAN, SmmuTransAggregate}, {PMU_HHA_CROSS_NUMA, AggregateByNuma}, {PMU_HHA_CROSS_SOCKET, AggregateByNuma}, @@ -1361,6 +1453,9 @@ namespace KUNPENG_PMU { // For pcie events, check if event is related with specifi bdf. if (IsBdfMetric(devAttr.metric)) { auto bdfStr = ExtractEvtStr("bdf", evtName); + if (bdfStr.empty()) { + bdfStr = ExtractEvtStr("port", evtName); + } if (bdfStr.empty()) { bdfStr = ExtractEvtStr("filter_stream_id", evtName); } @@ -1368,7 +1463,12 @@ namespace KUNPENG_PMU { return false; } uint16_t expectBdf; - int ret = ConvertBdfStringToValue(devAttr.bdf, expectBdf); + int ret; + if (devAttr.bdf != nullptr) { + ret = ConvertBdfStringToValue(devAttr.bdf, expectBdf); + } else { + ret = ConvertBdfStringToValue(devAttr.port, expectBdf); + } if (ret != SUCCESS) { return false; } @@ -1441,7 +1541,11 @@ namespace KUNPENG_PMU { devData.socketId = pmuData[i].cpuTopo->socketId; } if (IsBdfMetric(devAttr.metric)) { - devData.bdf = devAttr.bdf; + if (devAttr.bdf != nullptr) { + devData.bdf = devAttr.bdf; + } else { + devData.port = devAttr.port; + } } devDataList.emplace_back(devData); } diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index 572e461..e1059a8 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -439,16 +439,19 @@ class CtypesPmuDeviceAttr(ctypes.Structure): struct PmuDeviceAttr { enum PmuDeviceMetric metric; char *bdf; + char *port; }; """ _fields_ = [ ('metric', ctypes.c_int), - ('bdf', ctypes.c_char_p) + ('bdf', ctypes.c_char_p), + ('port', ctypes.c_char_p) ] def __init__(self, metric=0, - bdf= None, + bdf=None, + port=None, *args, **kw): super(CtypesPmuDeviceAttr, self).__init__(*args, **kw) @@ -457,17 +460,22 @@ class CtypesPmuDeviceAttr(ctypes.Structure): self.bdf = ctypes.c_char_p(bdf.encode(UTF_8)) else: self.bdf = None - + if port: + self.port = ctypes.c_char_p(port.encode(UTF_8)) + else: + self.port = None class PmuDeviceAttr(object): __slots__ = ['__c_pmu_device_attr'] def __init__(self, metric=0, - bdf= None): + bdf=None, + port=None): self.__c_pmu_device_attr = CtypesPmuDeviceAttr( metric=metric, - bdf=bdf + bdf=bdf, + port=port ) @property @@ -495,6 +503,19 @@ class PmuDeviceAttr(object): else: self.c_pmu_device_attr.bdf = None + @property + def port(self): + if self.c_pmu_device_attr.port: + return self.c_pmu_device_attr.port.decode(UTF_8) + return None + + @port.setter + def port(self, port): + if port: + self.c_pmu_device_attr.port = ctypes.c_char_p(port.encode(UTF_8)) + else: + self.c_pmu_device_attr.port = None + @classmethod def from_c_pmu_device_attr(cls, c_pmu_device_attr): pmu_device_attr = cls() @@ -519,6 +540,7 @@ class CtypesPmuDeviceData(ctypes.Structure): unsigned numaId; unsigned clusterId; char *bdf; + char *port; struct { unsigned channelId; unsigned ddrNumaId; @@ -533,6 +555,7 @@ class CtypesPmuDeviceData(ctypes.Structure): ('numaId', ctypes.c_uint), ('clusterId', ctypes.c_uint), ('bdf', ctypes.c_char_p), + ('port', ctypes.c_char_p), ('_structure', DdrDataStructure) ] @@ -567,6 +590,12 @@ class CtypesPmuDeviceData(ctypes.Structure): return self._union.bdf.decode(UTF_8) return "" + @property + def port(self): + if self.mode == 4 and self._union.port: # PMU_METRIC_BDF + return self._union.port.decode(UTF_8) + return "" + @property def channelId(self): if self.mode == 5 and self._union._structure.channelId: # PMU_METRIC_CHANNEL @@ -636,7 +665,13 @@ class ImplPmuDeviceData: if self.mode == 4 and self.c_pmu_device_data._union.bdf: # PMU_METRIC_BDF return self.c_pmu_device_data._union.bdf.decode(UTF_8) return "" - + + @property + def port(self): + if self.mode == 4 and self.c_pmu_device_data._union.port: # PMU_METRIC_BDF + return self.c_pmu_device_data._union.port.decode(UTF_8) + return "" + @property def channelId(self): if self.mode == 5 and self.c_pmu_device_data._union._structure.channelId: # PMU_METRIC_CHANNEL diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index a6edaa8..e0d210c 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -97,11 +97,11 @@ class Error: LIBPERF_ERR_INVALID_PMU_DEVICES_BDF = 1051 LIBPERF_ERR_OPEN_INVALID_FILE = 1052 LIBPERF_ERR_INVALID_BDF_VALUE = 1053 - LIBPERF_ERR_NOT_SOUUPUT_PCIE_BDF = 1054 - LIBPERF_ERR_NOT_SOUUPUT_SMMU_BDF = 1055 + LIBPERF_ERR_NOT_SUPPORT_PCIE_BDF = 1054 + LIBPERF_ERR_NOT_SUPPORT_SMMU_BDF = 1055 LIBPERF_ERR_INVALID_IOSMMU_DIR = 1056 LIBPERF_ERR_INVALID_SMMU_NAME = 1057 - LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING = 1058 + LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING = 1058 LIBPERF_ERR_OPEN_PCI_FILE_FAILD = 1059 LIBPERF_ERR_INVALID_MTRIC_PARAM = 1060 LIBPERF_ERR_PMU_DEVICES_NULL = 1061 @@ -115,6 +115,7 @@ class Error: LIBPERF_ERR_INVALID_CGROUP_LIST = 1069 LIBPERF_ERR_NOT_SUPPORT_PMU_FILE = 1070 LIBPERF_ERR_INVALID_PMU_FILE = 1071 + LIBPERF_ERR_NOT_SUPPORT_PCIE_PORT = 1072 UNKNOWN_ERROR = 9999 diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 668f5d1..8840038 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -134,39 +134,52 @@ class PmuDeviceMetric: # Collect pcie rx bandwidth. # Perpcie metric. # Collect pcie rx bandwidth for pcie device. - # Unit: Bytes/s + # Unit: Bytes/us PMU_PCIE_RX_MRD_BW = 6 PMU_PCIE_RX_MWR_BW = 7 # Perpcie metric. # Collect pcie tx bandwidth for pcie device. - # Unit: Bytes/s + # Unit: Bytes/us PMU_PCIE_TX_MRD_BW = 8 PMU_PCIE_TX_MWR_BW = 9 # Perpcie metric. + # Collect pcie rx latency for pcie device. + # Unit: ns + PMU_PCIE_RX_MRD_LAT = 10 + PMU_PCIE_RX_MWR_LAT = 11 + # Perpcie metric. + # Collect pcie tx latency for pcie device. + # Unit: ns + PMU_PCIE_TX_MRD_LAT = 12 + # Perpcie metric. # Collect smmu address transaction. # Unit: count - PMU_SMMU_TRAN = 10 + PMU_SMMU_TRAN = 13 # Pernuma metric. # Collect rate of cross-numa operations received by HHA. - PMU_HHA_CROSS_NUMA = 11 + PMU_HHA_CROSS_NUMA = 14 # Pernuma metric. # Collect rate of cross-socket operations received by HHA. - PMU_HHA_CROSS_SOCKET = 12 + PMU_HHA_CROSS_SOCKET = 15 class PmuDeviceAttr(_libkperf.PmuDeviceAttr): """ struct PmuDeviceAttr { enum PmuDeviceMetric metric; - // Used for PMU_PCIE_XXX and PMU_SMMU_XXX to collect a specifi pcie device. + // Used for PMU_PCIE_XXX_BW and PMU_SMMU_XXX to collect a specifi pcie device. // The string of bdf is something like '7a:01.0'. char *bdf; + // Used for PMU_PCIE_XXX_LAT to collect latency data. + // Only one port supported. + char *port; }; """ - def __init__(self, metric, bdf=None): + def __init__(self, metric, bdf=None, port=None): super(PmuDeviceAttr, self).__init__( metric=metric, - bdf=bdf + bdf=bdf, + port=port ) class PmuBdfType: @@ -200,6 +213,7 @@ class PmuDeviceData(_libkperf.PmuDeviceData): unsigned numaId; // for perpcie metric char *bdf; + char *port; // for perchannel metric of ddr struct { unsigned channelId; diff --git a/python/tests/test_metric.py b/python/tests/test_metric.py index 90c254b..e0aba09 100644 --- a/python/tests/test_metric.py +++ b/python/tests/test_metric.py @@ -278,6 +278,26 @@ def test_collect_hha_cross(): print_dev_data_details(dev_data) kperf.close(pd) +def test_get_metric_pcie_latency(): + bdf_list_iter = kperf.device_bdf_list(kperf.PmuBdfType.PMU_BDF_TYPE_PCIE) + dev_attr = [ + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_PCIE_RX_MRD_LAT, port=port) + for port in bdf_list_iter + ] + pd = kperf.device_open(dev_attr) + print(kperf.error()) + assert pd != -1, f"Expected non-negative pd, but got {pd}" + kperf.enable(pd) + time.sleep(1) + kperf.disable(pd) + ori_data = kperf.read(pd) + assert len(ori_data) != -1, f"Expected non-negative ori_len, but got {len(ori_data)}" + + dev_data = kperf.get_device_metric(ori_data, dev_attr) + assert len(dev_data) == len(dev_attr) + print_dev_data_details(dev_data) + kperf.close(pd) + if __name__ == '__main__': # 提示用户使用pytest 运行测试文件 print("This is a pytest script. Run it using the 'pytest' command.") diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index d6b5e10..ff2dfa9 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -320,4 +320,38 @@ TEST_F(TestMetric, GetMetricHHACross) DevDataFree(devData); PmuDataFree(oriData); PmuClose(pd); +} + +TEST_F(TestMetric, GetMetricPcieLatency) +{ + const char** bdfList = nullptr; + unsigned bdfLen = 0; + bdfList = PmuDeviceBdfList(PMU_BDF_TYPE_PCIE, &bdfLen); + PmuDeviceAttr devAttr[bdfLen] = {}; + for (int i = 0; i < bdfLen; ++i) { + devAttr[i].metric = PMU_PCIE_RX_MRD_LAT; + devAttr[i].port = strdup(bdfList[i]); + } + + int pd = PmuDeviceOpen(devAttr, bdfLen); + ASSERT_NE(pd, -1); + PmuEnable(pd); + sleep(1); + PmuDisable(pd); + PmuData* oriData = nullptr; + int oriLen = PmuRead(pd, &oriData); + ASSERT_NE(oriLen, -1); + + PmuDeviceData *devData = nullptr; + auto len = PmuGetDevMetric(oriData, oriLen, devAttr, bdfLen, &devData); + ASSERT_EQ(len, bdfLen); + ASSERT_EQ(devData[0].metric, PMU_PCIE_RX_MRD_LAT); + ASSERT_EQ(devData[0].mode, PMU_METRIC_BDF); + ASSERT_TRUE(strcmp(devData[0].port, bdfList[0]) == 0); + for (int i = 0; i < bdfLen; ++i) { + free(devAttr[i].port); + } + DevDataFree(devData); + PmuDataFree(oriData); + PmuClose(pd); } \ No newline at end of file diff --git a/util/common.cpp b/util/common.cpp index 389e258..a4dde7c 100644 --- a/util/common.cpp +++ b/util/common.cpp @@ -90,8 +90,8 @@ int ConvertHexStrToInt(const std::string& hexStr, uint64_t& bus) try { bus = stoul(hexStr, nullptr, 16); } catch (const std::exception& e) { - pcerr::New(LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING, "hexStr: " + hexStr + " is invalid"); - return LIBPERF_ERR_NOT_SOUUPUT_PCIE_COUNTING; + pcerr::New(LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING, "hexStr: " + hexStr + " is invalid"); + return LIBPERF_ERR_NOT_SUPPORT_PCIE_COUNTING; } return SUCCESS; } -- Gitee