From 52fbc1a670dc520aa36b216465ea0bf79dd811c4 Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Wed, 18 Jun 2025 16:52:13 +0800 Subject: [PATCH] Add HHA uncore event --- docs/Details_Usage.md | 96 ++++++++++++++++++++++++++ docs/Go_API.md | 2 + docs/Python_API.md | 4 +- go/src/libkperf/kperf/kperf.go | 18 +++-- include/pmu.h | 16 +++-- pmu/pmu_metric.cpp | 121 +++++++++++++++++++++++++-------- python/modules/kperf/pmu.py | 18 +++-- python/tests/test_metric.py | 22 ++++++ test/test_perf/test_metric.cpp | 25 +++++++ 9 files changed, 276 insertions(+), 46 deletions(-) diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index 68c5f2c..5c33863 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -1043,6 +1043,102 @@ kperf.PmuClose(fd) pcie bw(16:04.0): 124122412 Bytes/ns ``` +### 采集跨numa/跨socket访问HHA比例 +libkperf提供了采集跨numa/跨socket访问HHA的操作比例的能力,用于分析访存型应用的性能瓶颈,采集以numa为粒度。 + +参考代码: +```c++ +// c++代码示例 +#include +#include "symbol.h" +#include "pmu.h" + +PmuDeviceAttr devAttr[2]; +// 采集跨numa访问HHA的操作比例 +devAttr[0].metric = PMU_HHA_CROSS_NUMA; +// 采集跨socket访问HHA的操作比例 +devAttr[1].metric = PMU_HHA_CROSS_SOCKET; +// 初始化采集任务 +int pd = PmuDeviceOpen(devAttr, 2); +// 开始采集 +PmuEnable(pd); +sleep(1); +PmuData *oriData = nullptr; +int oriLen = PmuRead(pd, &oriData); +PmuDeviceData *devData = nullptr; +auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); +// devData的长度等于设备numa的个数 +for (int i = 0; i < len / 2; ++i) { + cout << "HHA cross-numa operations ratio (Numa: " << devData[i].numaId << "): " << devData[i].count<< "\n"; +} +for (int i = len / 2; i < len; ++i) { + cout << "HHA cross-socket operations ratio (Numa: " << devData[i].numaId << "): " << devData[i].count<< "\n"; +} +DevDataFree(devData); +PmuDataFree(oriData); +PmuDisable(pd); +``` + +```python +# python代码示例 +import kperf +import time + +dev_attr = [ + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET) +] +pd = kperf.device_open(dev_attr) +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) +ori_data = kperf.read(pd) +dev_data = kperf.get_device_metric(ori_data, dev_attr) +for data in dev_data.iter: + if data.metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA: + print(f"HHA cross-numa operations ratio (Numa: {data.numaId}): {data.count}") + if data.metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET: + print(f"HHA cross-socket operations ratio (Numa: {data.numaId}): {data.count}") +``` + +```go +// go代码用例 +import "libkperf/kperf" +import "fmt" +import "time" + +deviceAttrs := []kperf.PmuDeviceAttr{kperf.PmuDeviceAttr{Metric: kperf.PMU_HHA_CROSS_NUMA}, kperf.PmuDeviceAttr{Metric: kperf.PMU_HHA_CROSS_SOCKET}} +fd, _ := kperf.PmuDeviceOpen(deviceAttrs) +kperf.PmuEnable(fd) +time.Sleep(1 * time.Second) +kperf.PmuDisable(fd) +dataVo, _ := kperf.PmuRead(fd) +deivceDataVo, _ := kperf.PmuGetDevMetric(dataVo, deviceAttrs) +for _, v := range deivceDataVo.GoDeviceData { + if v.Metric == kperf.PMU_HHA_CROSS_NUMA { + fmt.Printf("HHA cross-numa operations ratio (Numa: %v): %v\n", v.NumaId, v.Count) + } + if v.Metric == kperf.PMU_HHA_CROSS_SOCKET { + fmt.Printf("HHA cross-socket operations ratio (Numa: %v): %v\n", v.NumaId, v.Count) + } +} +kperf.DevDataFree(deivceDataVo) +kperf.PmuDataFree(dataVo) +kperf.PmuClose(fd) +``` + +执行上述代码,输出的结果类似如下: +``` +HHA cross-numa operations ratio (Numa: 0): 0.438888 +HHA cross-numa operations ratio (Numa: 1): 0.0248052 +HHA cross-numa operations ratio (Numa: 2): 0.0277224 +HHA cross-numa operations ratio (Numa: 3): 0.181404 +HHA cross-socket operations ratio (Numa: 0): 0.999437 +HHA cross-socket operations ratio (Numa: 1): 0.0253748 +HHA cross-socket operations ratio (Numa: 2): 0.329864 +HHA cross-socket operations ratio (Numa: 3): 0.18956 +``` + ### 采集系统调用函数耗时信息 libkperf基于tracepoint事件采集能力,在原有能力的基础上,重新封装了一组相关的调用API,来提供采集系统调用函数耗时信息的能力,类似于perf trace命令 diff --git a/docs/Go_API.md b/docs/Go_API.md index 30a5c3f..33d7a7c 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -346,6 +346,8 @@ func PmuDeviceOpen(attr []PmuDeviceAttr) (int, error) 初始化采集uncore事 * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_SMMU_TRAN 采集指定smmu设备的地址转换次数,单位:count + * PMU_HHA_CROSS_NUMA 采集每个numa的跨numa访问HHA的操作比例 + * PMU_HHA_CROSS_SOCKET 采集每个numa的跨socket访问HHA的操作比例 * Bdf: 指定需要采集设备的bdf号,只对pcie和smmu指标有效 * 返回值是int和error,pd > 0表示初始化成功,pd == -1初始化失败,可通过kperf.error()查看错误信息,以下是一个kperf.device_open的示例 diff --git a/docs/Python_API.md b/docs/Python_API.md index 2ec007a..ffebcb2 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -339,12 +339,14 @@ kperf.device_open(dev_attr: List[PmuDeviceAttr]) 初始化采集uncore事件指 * PMU_L3_TRAFFIC 采集每个core的L3的访问字节数,单位:Bytes * PMU_L3_MISS 采集每个core的L3的miss数量,单位:count * PMU_L3_REF 采集每个core的L3的总访问数量,单位:count - * PMU_L3_LAT 采集每个numa的L3的总时延,单位:cycles + * PMU_L3_LAT 采集每个cluster的L3的总时延,单位:cycles * PMU_PCIE_RX_MRD_BW 采集pcie设备的rx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_RX_MWR_BW 采集pcie设备的rx方向上的写带宽,单位:Bytes/ns * PMU_PCIE_TX_MRD_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_PCIE_TX_MWR_BW 采集pcie设备的tx方向上的读带宽,单位:Bytes/ns * PMU_SMMU_TRAN 采集指定smmu设备的地址转换次数,单位:count + * PMU_HHA_CROSS_NUMA 采集每个numa的跨numa访问HHA的操作比例 + * PMU_HHA_CROSS_SOCKET 采集每个numa的跨socket访问HHA的操作比例 * bdf: 指定需要采集设备的bdf号,只对pcie和smmu指标有效 * 返回值是int类型,pd > 0表示初始化成功,pd == -1初始化失败,可通过kperf.error()查看错误信息,以下是一个kperf.device_open的示例 diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index a27c112..e6bd507 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -246,12 +246,12 @@ var ( // PmuDeviceMetric var ( - // Pernuma metric. - // Collect ddr read bandwidth for each numa node. + // Perchannel metric. + // Collect ddr read bandwidth for each channel. // Unit: Bytes/s PMU_DDR_READ_BW C.enum_PmuDeviceMetric = C.PMU_DDR_READ_BW - // Pernuma metric. - // Collect ddr write bandwidth for each numa node. + // Perchannel metric. + // Collect ddr write bandwidth for each channel. // Unit: Bytes/s PMU_DDR_WRITE_BW C.enum_PmuDeviceMetric = C.PMU_DDR_WRITE_BW // Percore metric. @@ -266,8 +266,8 @@ var ( // Collect L3 total reference count, including miss and hit count. // Unit: count PMU_L3_REF C.enum_PmuDeviceMetric = C.PMU_L3_REF - // Pernuma metric. - // Collect L3 total latency for each numa node. + // Percluster metric. + // Collect L3 total latency for each cluster node. // Unit: cycles PMU_L3_LAT C.enum_PmuDeviceMetric = C.PMU_L3_LAT // Collect pcie rx bandwidth. @@ -285,6 +285,12 @@ var ( // Collect smmu address transaction. // Unit: count PMU_SMMU_TRAN C.enum_PmuDeviceMetric = C.PMU_SMMU_TRAN + // Pernuma metric. + // Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA C.enum_PmuDeviceMetric = C.PMU_HHA_CROSS_NUMA + // Pernuma metric. + // Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET C.enum_PmuDeviceMetric = C.PMU_HHA_CROSS_SOCKET ) // PmuBdfType diff --git a/include/pmu.h b/include/pmu.h index 1063cdb..3d9d684 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -419,12 +419,12 @@ int PmuGetField(struct SampleRawData *rawData, const char *fieldName, void *valu struct SampleRawField *PmuGetFieldExp(struct SampleRawData *rawData, const char *fieldName); enum PmuDeviceMetric { - // Pernuma metric. - // Collect ddr read bandwidth for each numa node. + // Perchannel metric. + // Collect ddr read bandwidth for each channel. // Unit: Bytes PMU_DDR_READ_BW, - // Pernuma metric. - // Collect ddr write bandwidth for each numa node. + // Perchannel metric. + // Collect ddr write bandwidth for each channel. // Unit: Bytes PMU_DDR_WRITE_BW, // Percore metric. @@ -457,7 +457,13 @@ enum PmuDeviceMetric { // Perpcie metric. // Collect smmu address transaction. // Unit: count - PMU_SMMU_TRAN + PMU_SMMU_TRAN, + // Pernuma metric. + // Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA, + // Pernuma metric. + // Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET }; struct PmuDeviceAttr { diff --git a/pmu/pmu_metric.cpp b/pmu/pmu_metric.cpp index 66c70c4..1fe04d7 100644 --- a/pmu/pmu_metric.cpp +++ b/pmu/pmu_metric.cpp @@ -84,11 +84,13 @@ namespace KUNPENG_PMU { {PmuDeviceMetric::PMU_PCIE_RX_MWR_BW, "PMU_PCIE_RX_MWR_BW"}, {PmuDeviceMetric::PMU_PCIE_TX_MRD_BW, "PMU_PCIE_TX_MRD_BW"}, {PmuDeviceMetric::PMU_PCIE_TX_MWR_BW, "PMU_PCIE_TX_MWR_BW"}, - {PmuDeviceMetric::PMU_SMMU_TRAN, "PMU_SMMU_TRAN"} + {PmuDeviceMetric::PMU_SMMU_TRAN, "PMU_SMMU_TRAN"}, + {PmuDeviceMetric::PMU_HHA_CROSS_NUMA, "PMU_HHA_CROSS_NUMA"}, + {PmuDeviceMetric::PMU_HHA_CROSS_SOCKET, "PMU_HHA_CROSS_SOCKET"}, }; set percoreMetric = {PMU_L3_TRAFFIC, PMU_L3_MISS, PMU_L3_REF}; - set pernumaMetric = {PMU_L3_LAT}; + set pernumaMetric = {PMU_HHA_CROSS_NUMA, PMU_HHA_CROSS_SOCKET}; set perClusterMetric = {PMU_L3_LAT}; set perChannelMetric = {PMU_DDR_READ_BW, PMU_DDR_WRITE_BW}; set perpcieMetric = {PMU_PCIE_RX_MRD_BW, @@ -269,6 +271,30 @@ namespace KUNPENG_PMU { 2 } }; + + PMU_METRIC_PAIR HHA_CROSS_NUMA = { + PmuDeviceMetric::PMU_HHA_CROSS_NUMA, + { + "hisi_sccl", + "hha", + {"0x0", "0x02"}, + "", + "", + 0 + } + }; + + PMU_METRIC_PAIR HHA_CROSS_SOCKET = { + PmuDeviceMetric::PMU_HHA_CROSS_SOCKET, + { + "hisi_sccl", + "hha", + {"0x0", "0x01"}, + "", + "", + 0 + } + }; } static const map HIP_A_UNCORE_METRIC_MAP { @@ -278,6 +304,8 @@ namespace KUNPENG_PMU { METRIC_CONFIG::L3_MISS, METRIC_CONFIG::L3_REF, METRIC_CONFIG::SMMU_TRAN, + METRIC_CONFIG::HHA_CROSS_NUMA, + METRIC_CONFIG::HHA_CROSS_SOCKET, }; static const map HIP_B_UNCORE_METRIC_MAP { @@ -292,6 +320,8 @@ namespace KUNPENG_PMU { METRIC_CONFIG::PCIE_TX_MRD_BW, METRIC_CONFIG::PCIE_TX_MWR_BW, METRIC_CONFIG::SMMU_TRAN, + METRIC_CONFIG::HHA_CROSS_NUMA, + METRIC_CONFIG::HHA_CROSS_SOCKET, }; const UNCORE_METRIC_MAP UNCORE_METRIC_CONFIG_MAP = { @@ -852,7 +882,7 @@ namespace KUNPENG_PMU { } // remove duplicate device attribute - static int RemoveDupDeviceAttr(struct PmuDeviceAttr *attr, unsigned len, std::vector& deviceAttr, bool l3ReDup) + static int RemoveDupDeviceAttr(struct PmuDeviceAttr *attr, unsigned len, std::vector& deviceAttr) { std::unordered_set uniqueSet; for (int i = 0; i < len; ++i) { @@ -864,17 +894,6 @@ namespace KUNPENG_PMU { } if (uniqueSet.find(key) == uniqueSet.end()) { - // when in deviceopen remove the same PMU_L3_TRAFFIC and PMU_L3_REF, - // but when getDevMetric we need to keep them. - if (l3ReDup == true && - (attr[i].metric == PmuDeviceMetric::PMU_L3_TRAFFIC || attr[i].metric == PmuDeviceMetric::PMU_L3_REF)) { - if (uniqueSet.find(std::to_string(PmuDeviceMetric::PMU_L3_TRAFFIC)) != uniqueSet.end()) { - continue; - } - if (uniqueSet.find(std::to_string(PmuDeviceMetric::PMU_L3_REF)) != uniqueSet.end()) { - continue; - } - } uniqueSet.insert(key); deviceAttr.emplace_back(attr[i]); } @@ -957,6 +976,9 @@ namespace KUNPENG_PMU { case PMU_PCIE_TX_MWR_BW: case PMU_SMMU_TRAN: return PMU_METRIC_BDF; + case PMU_HHA_CROSS_NUMA: + case PMU_HHA_CROSS_SOCKET: + return PMU_METRIC_NUMA; } return PMU_METRIC_INVALID; } @@ -977,25 +999,57 @@ namespace KUNPENG_PMU { int AggregateByNuma(const PmuDeviceMetric metric, const vector &rawData, vector &devData) { - map devDataByNuma; + const auto& deviceConfig = GetDeviceMtricConfig(); + const auto& findConfig = deviceConfig.find(metric); + if (findConfig == deviceConfig.end()) { + return SUCCESS; + } + auto &evts = findConfig->second.events; + if (evts.size() != 2) { + return SUCCESS; + } + // Event name for total access count. + string totalEvt = evts[0]; + // Event name for cross-numa/cross-socket count. + string crossEvt = evts[1]; + // Sort data by numa, and then sort by event string. + map> devDataByNuma; for (auto &data : rawData) { + string devName; + string evtName; + if (!GetDeviceName(data.evtName, devName, evtName)) { + continue; + } + auto evtConfig = ExtractEvtStr("config", evtName); auto findData = devDataByNuma.find(data.numaId); if (findData == devDataByNuma.end()) { - PmuDeviceData outData; - outData.metric = data.metric; - outData.count = data.count; - outData.mode = GetMetricMode(data.metric); - outData.numaId = data.numaId; - devDataByNuma[data.numaId] = outData; + devDataByNuma[data.numaId][evtConfig] = data; } else { - findData->second.count += data.count; + devDataByNuma[data.numaId][evtConfig].count += data.count; } } for (auto &data : devDataByNuma) { - devData.push_back(data.second); + // Get events of cross-numa/cross-socket access count and total access count. + auto findcrossData = data.second.find(crossEvt); + auto findtotalData = data.second.find(totalEvt); + if (findcrossData == data.second.end() || findtotalData == data.second.end()) { + continue; + } + // Compute ratio: cross access count / total access count + double ratio = 0.0; + if (findtotalData->second.count != 0) { + ratio = (double)(findcrossData->second.count) / findtotalData->second.count; + } else { + ratio = -1; + } + PmuDeviceData outData; + outData.metric = metric; + outData.count = ratio; + outData.mode = GetMetricMode(metric); + outData.numaId = data.first; + devData.push_back(outData); } - return SUCCESS; } @@ -1264,6 +1318,8 @@ namespace KUNPENG_PMU { {PMU_PCIE_TX_MRD_BW, PcieBWAggregate}, {PMU_PCIE_TX_MWR_BW, PcieBWAggregate}, {PMU_SMMU_TRAN, SmmuTransAggregate}, + {PMU_HHA_CROSS_NUMA, AggregateByNuma}, + {PMU_HHA_CROSS_SOCKET, AggregateByNuma}, }; static bool IsMetricEvent(const string &devName, const string &evtName, const PmuDeviceAttr &devAttr) @@ -1366,7 +1422,7 @@ namespace KUNPENG_PMU { if (perClusterMetric.find(devAttr.metric) != perClusterMetric.end()) { devData.clusterId = pmuData[i].cpuTopo->coreId / clusterWidth; } - if (perChannelMetric.find(devAttr.metric) != pernumaMetric.end()) { + if (perChannelMetric.find(devAttr.metric) != perChannelMetric.end()) { devData.ddrNumaId = pmuData[i].cpuTopo->numaId; devData.socketId = pmuData[i].cpuTopo->socketId; } @@ -1454,7 +1510,7 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) } // Remove duplicate device attributes. vector deviceAttr; - if (RemoveDupDeviceAttr(attr, len, deviceAttr, true) != SUCCESS) { + if (RemoveDupDeviceAttr(attr, len, deviceAttr) != SUCCESS) { return -1; } vector configEvtList; @@ -1466,8 +1522,17 @@ int PmuDeviceOpen(struct PmuDeviceAttr *attr, unsigned len) configEvtList.insert(configEvtList.end(), temp.begin(), temp.end()); } - vector evts; + //remove the same event of PMU_L3_TRAFFIC and PMU_L3_REF, PMU_HHA_CROSS_NUMA and PMU_HHA_CROSS_SOCKET + unordered_set tmpEvents; + vector filteredEvtList; for (auto& evt : configEvtList) { + if (tmpEvents.find(evt) == tmpEvents.end()) { + tmpEvents.insert(evt); + filteredEvtList.push_back(evt); + } + } + vector evts; + for (auto& evt : filteredEvtList) { evts.push_back(const_cast(evt.c_str())); } @@ -1519,7 +1584,7 @@ int PmuGetDevMetric(struct PmuData *pmuData, unsigned len, } // Remove duplicate device attributes. vector deviceAttr; - if (RemoveDupDeviceAttr(attr, attrLen, deviceAttr, false) != SUCCESS) { + if (RemoveDupDeviceAttr(attr, attrLen, deviceAttr) != SUCCESS) { return -1; } // Filter pmuData by metric and generate InnerDeviceData, diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index 2f0a2b5..213125d 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -107,12 +107,12 @@ class SymbolMode: RESOLVE_ELF_DWARF = 2 # Resolve elf and dwarf. All fields in Symbol will be valid. class PmuDeviceMetric: - # Pernuma metric. - # Collect ddr read bandwidth for each numa node. + # Perchannel metric. + # Collect ddr read bandwidth for each channel. # Unit: Bytes/s PMU_DDR_READ_BW = 0 - # Pernuma metric. - # Collect ddr write bandwidth for each numa node. + # Perchannel metric. + # Collect ddr write bandwidth for each channel. # Unit: Bytes/s PMU_DDR_WRITE_BW = 1 # Percore metric. @@ -127,8 +127,8 @@ class PmuDeviceMetric: # Collect L3 total reference count, including miss and hit count. # Unit: count PMU_L3_REF = 4 - # Pernuma metric. - # Collect L3 total latency for each numa node. + # Percluster metric. + # Collect L3 total latency for each cluster node. # Unit: cycles PMU_L3_LAT = 5 # Collect pcie rx bandwidth. @@ -146,6 +146,12 @@ class PmuDeviceMetric: # Collect smmu address transaction. # Unit: count PMU_SMMU_TRAN = 10 + # Pernuma metric. + # Collect rate of cross-numa operations received by HHA. + PMU_HHA_CROSS_NUMA = 11 + # Pernuma metric. + # Collect rate of cross-socket operations received by HHA. + PMU_HHA_CROSS_SOCKET = 12 class PmuDeviceAttr(_libkperf.PmuDeviceAttr): """ diff --git a/python/tests/test_metric.py b/python/tests/test_metric.py index bf653ce..90c254b 100644 --- a/python/tests/test_metric.py +++ b/python/tests/test_metric.py @@ -256,6 +256,28 @@ def test_get_metric_smmu_transaction(): print_dev_data_details(dev_data) kperf.close(pd) +def test_collect_hha_cross(): + dev_attr = [ + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET), + kperf.PmuDeviceAttr(metric=kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA) + ] + pd = kperf.device_open(dev_attr) + print(kperf.error()) + assert pd != -1, f"Expected non-negative pd, but got {pd}" + kperf.enable(pd) + time.sleep(1) + kperf.disable(pd) + ori_data = kperf.read(pd) + assert len(ori_data) != -1, f"Expected non-negative ori_len, but got {len(ori_data)}" + + dev_data = kperf.get_device_metric(ori_data, dev_attr) + assert dev_data[0].metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_SOCKET + assert dev_data[0].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + assert dev_data[-1].metric == kperf.PmuDeviceMetric.PMU_HHA_CROSS_NUMA + assert dev_data[-1].mode == kperf.PmuMetricMode.PMU_METRIC_NUMA + print_dev_data_details(dev_data) + kperf.close(pd) + if __name__ == '__main__': # 提示用户使用pytest 运行测试文件 print("This is a pytest script. Run it using the 'pytest' command.") diff --git a/test/test_perf/test_metric.cpp b/test/test_perf/test_metric.cpp index d10ca39..68710cb 100644 --- a/test/test_perf/test_metric.cpp +++ b/test/test_perf/test_metric.cpp @@ -309,4 +309,29 @@ TEST_F(TestMetric, GetMetricSmmuTransaction) DevDataFree(devData); PmuDataFree(oriData); PmuClose(pd); +} + +TEST_F(TestMetric, GetMetricHHACross) +{ + PmuDeviceAttr devAttr[2] = {}; + devAttr[0].metric = PMU_HHA_CROSS_NUMA; + devAttr[1].metric = PMU_HHA_CROSS_SOCKET; + int pd = PmuDeviceOpen(devAttr, 2); + ASSERT_NE(pd, -1); + PmuEnable(pd); + sleep(1); + PmuDisable(pd); + PmuData* oriData = nullptr; + int oriLen = PmuRead(pd, &oriData); + ASSERT_NE(oriLen, -1); + + PmuDeviceData *devData = nullptr; + auto len = PmuGetDevMetric(oriData, oriLen, devAttr, 2, &devData); + ASSERT_EQ(devData[0].metric, PMU_HHA_CROSS_NUMA); + ASSERT_EQ(devData[0].mode, PMU_METRIC_NUMA); + ASSERT_EQ(devData[len - 1].metric, PMU_HHA_CROSS_SOCKET); + ASSERT_EQ(devData[len - 1].mode, PMU_METRIC_NUMA); + DevDataFree(devData); + PmuDataFree(oriData); + PmuClose(pd); } \ No newline at end of file -- Gitee