From 50ddf068198dd51570bfda8256bca226e681f8c3 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 20 Jun 2023 14:46:03 +0530 Subject: [PATCH 1/5] perf/amd: Prevent grouping of IBS events ANBZ: #24134 commit 7c2128235eff99b448af8f4b5b2933495bf1a440 upstream. IBS PMUs can have only one event active at any point in time. Restrict grouping of multiple IBS events. Reported-by: Sandipan Das Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230620091603.269-1-ravi.bangoria@amd.com --- arch/x86/events/amd/ibs.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index ae44421023fa..dd34616d8990 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -247,11 +247,33 @@ int forward_event_to_ibs(struct perf_event *event) return -ENOENT; } +/* + * Grouping of IBS events is not possible since IBS can have only + * one event active at any point in time. + */ +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling; + + if (event->group_leader == event) + return 0; + + if (event->group_leader->pmu == event->pmu) + return -EINVAL; + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling->pmu == event->pmu) + return -EINVAL; + } + return 0; +} + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct perf_ibs *perf_ibs; u64 max_cnt, config; + int ret; perf_ibs = get_ibs_pmu(event->attr.type); if (!perf_ibs) @@ -265,6 +287,10 @@ static int perf_ibs_init(struct perf_event *event) if (config & ~perf_ibs->config_mask) return -EINVAL; + ret = validate_group(event); + if (ret) + return ret; + if (hwc->sample_period) { if (config & perf_ibs->cnt_mask) /* raw max_cnt may not be set */ -- Gitee From fe49a983ff187f97e58a25ab64f09a03411d1d8a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Sat, 1 Oct 2022 11:37:05 +0530 Subject: [PATCH 2/5] perf/mem: Rename PERF_MEM_LVLNUM_EXTN_MEM to PERF_MEM_LVLNUM_CXL ANBZ: #24134 commit cb6c18b5a41622c7a439508f7421f8766a91cb87 upstream. PERF_MEM_LVLNUM_EXTN_MEM was introduced to cover CXL devices but it's bit ambiguous name and also not generic enough to cover cxl.cache and cxl.io devices. Rename it to PERF_MEM_LVLNUM_CXL to be more specific. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/f6268268-b4e9-9ed6-0453-65792644d953@amd.com --- arch/x86/events/amd/ibs.c | 2 +- include/uapi/linux/perf_event.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index dd34616d8990..9484c5b9bd4d 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -836,7 +836,7 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, /* Extension Memory */ if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) { - data_src->mem_lvl_num = PERF_MEM_LVLNUM_EXTN_MEM; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL; if (op_data2->rmt_node) { data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; /* IBS doesn't provide Remote socket detail */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 5b4dcb47f961..488db78fe6b4 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1270,7 +1270,7 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ /* 5-0x8 available */ -#define PERF_MEM_LVLNUM_EXTN_MEM 0x09 /* Extension memory */ +#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ -- Gitee From 91925e0aa61260f605226f633fe27535db53ef39 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 25 Jul 2023 20:32:04 +0530 Subject: [PATCH 3/5] perf/mem: Introduce PERF_MEM_LVLNUM_UNC ANBZ: #24134 commit 526fffabc5fb63e80eb890c74b6570df2570c87f upstream. Older API PERF_MEM_LVL_UNC can be replaced by PERF_MEM_LVLNUM_UNC. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230725150206.184-2-ravi.bangoria@amd.com --- include/uapi/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 488db78fe6b4..81a9a4ae7b10 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1269,7 +1269,8 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x8 available */ +/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -- Gitee From ac497be2c341d8e508b637fb5464b707b404ad25 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 25 Jul 2023 20:32:06 +0530 Subject: [PATCH 4/5] perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src ANBZ: #24134 commit 8bfc20baa9355d2ce6b031ff8bdb62c9456923f7 upstream. Kernel IBS driver wasn't using new PERF_MEM_* APIs due to some of its limitations. Mainly: 1. mem_lvl_num doesn't allow setting multiple sources whereas old API allows it. Setting multiple data sources is useful because IBS on pre-zen4 uarch doesn't provide fine granular DataSrc details (there is only one such DataSrc(2h) though). 2. perf mem sorting logic (sort__lvl_cmp()) ignores mem_lvl_num. perf c2c (c2c_decode_stats()) does not use mem_lvl_num at all. 1st one can be handled using ANY_CACHE with HOPS_0. 2nd is purely perf tool specific issue and should be fixed separately. Signed-off-by: Namhyung Kim Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230725150206.184-4-ravi.bangoria@amd.com --- arch/x86/events/amd/ibs.c | 156 +++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 88 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 9484c5b9bd4d..9075393ff60e 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -738,38 +738,63 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2) return op_data2->data_src_lo; } -static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, - union ibs_op_data3 *op_data3, - struct perf_sample_data *data) +#define L(x) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT)) +#define LN(x) PERF_MEM_S(LVLNUM, x) +#define REM PERF_MEM_S(REMOTE, REMOTE) +#define HOPS(x) PERF_MEM_S(HOPS, x) + +static u64 g_data_src[8] = { + [IBS_DATA_SRC_LOC_CACHE] = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0), + [IBS_DATA_SRC_DRAM] = L(LOC_RAM) | LN(RAM), + [IBS_DATA_SRC_REM_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), + [IBS_DATA_SRC_IO] = L(IO) | LN(IO), +}; + +#define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM) +#define RMT_NODE_APPLICABLE(x) (RMT_NODE_BITS & (1 << x)) + +static u64 g_zen4_data_src[32] = { + [IBS_DATA_SRC_EXT_LOC_CACHE] = L(L3) | LN(L3), + [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0), + [IBS_DATA_SRC_EXT_DRAM] = L(LOC_RAM) | LN(RAM), + [IBS_DATA_SRC_EXT_FAR_CCX_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), + [IBS_DATA_SRC_EXT_PMEM] = LN(PMEM), + [IBS_DATA_SRC_EXT_IO] = L(IO) | LN(IO), + [IBS_DATA_SRC_EXT_EXT_MEM] = LN(CXL), +}; + +#define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \ + (1 << IBS_DATA_SRC_EXT_PMEM) | \ + (1 << IBS_DATA_SRC_EXT_EXT_MEM)) +#define ZEN4_RMT_NODE_APPLICABLE(x) (ZEN4_RMT_NODE_BITS & (1 << x)) + +static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, + union ibs_op_data3 *op_data3, + struct perf_sample_data *data) { union perf_mem_data_src *data_src = &data->data_src; u8 ibs_data_src = perf_ibs_data_src(op_data2); data_src->mem_lvl = 0; + data_src->mem_lvl_num = 0; /* * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached * memory accesses. So, check DcUcMemAcc bit early. */ - if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) { - data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT; - return; - } + if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) + return L(UNC) | LN(UNC); /* L1 Hit */ - if (op_data3->dc_miss == 0) { - data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; - return; - } + if (op_data3->dc_miss == 0) + return L(L1) | LN(L1); /* L2 Hit */ if (op_data3->l2_miss == 0) { /* Erratum #1293 */ if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF || - !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { - data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; - return; - } + !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) + return L(L2) | LN(L2); } /* @@ -779,82 +804,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, if (data_src->mem_op != PERF_MEM_OP_LOAD) goto check_mab; - /* L3 Hit */ if (ibs_caps & IBS_CAPS_ZEN4) { - if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) { - data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; - return; - } - } else { - if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { - data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 | - PERF_MEM_LVL_HIT; - return; - } - } + u64 val = g_zen4_data_src[ibs_data_src]; - /* A peer cache in a near CCX */ - if (ibs_caps & IBS_CAPS_ZEN4 && - ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) { - data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; - return; - } + if (!val) + goto check_mab; - /* A peer cache in a far CCX */ - if (ibs_caps & IBS_CAPS_ZEN4) { - if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) { - data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT; - return; - } - } else { - if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) { - data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT; - return; + /* HOPS_1 because IBS doesn't provide remote socket detail */ + if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) { + if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) + val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); + else + val |= REM | HOPS(1); } - } - /* DRAM */ - if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) { - if (op_data2->rmt_node == 0) - data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; - else - data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; - return; - } + return val; + } else { + u64 val = g_data_src[ibs_data_src]; - /* PMEM */ - if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) { - data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM; - if (op_data2->rmt_node) { - data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; - /* IBS doesn't provide Remote socket detail */ - data_src->mem_hops = PERF_MEM_HOPS_1; - } - return; - } + if (!val) + goto check_mab; - /* Extension Memory */ - if (ibs_caps & IBS_CAPS_ZEN4 && - ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) { - data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL; - if (op_data2->rmt_node) { - data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; - /* IBS doesn't provide Remote socket detail */ - data_src->mem_hops = PERF_MEM_HOPS_1; + /* HOPS_1 because IBS doesn't provide remote socket detail */ + if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) { + if (ibs_data_src == IBS_DATA_SRC_DRAM) + val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); + else + val |= REM | HOPS(1); } - return; - } - /* IO */ - if (ibs_data_src == IBS_DATA_SRC_EXT_IO) { - data_src->mem_lvl = PERF_MEM_LVL_IO; - data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; - if (op_data2->rmt_node) { - data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; - /* IBS doesn't provide Remote socket detail */ - data_src->mem_hops = PERF_MEM_HOPS_1; - } - return; + return val; } check_mab: @@ -865,12 +844,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set * MAB only when IBS fails to provide DataSrc. */ - if (op_data3->dc_miss_no_mab_alloc) { - data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; - return; - } + if (op_data3->dc_miss_no_mab_alloc) + return L(LFB) | LN(LFB); - data_src->mem_lvl = PERF_MEM_LVL_NA; + /* Don't set HIT with NA */ + return PERF_MEM_S(LVL, NA) | LN(NA); } static bool perf_ibs_cache_hit_st_valid(void) @@ -960,7 +938,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, union ibs_op_data2 *op_data2, union ibs_op_data3 *op_data3) { - perf_ibs_get_mem_lvl(op_data2, op_data3, data); + union perf_mem_data_src *data_src = &data->data_src; + + data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data); perf_ibs_get_mem_snoop(op_data2, data); perf_ibs_get_tlb_lvl(op_data3, data); perf_ibs_get_mem_lock(op_data3, data); -- Gitee From 2886a83025aaed04ec982781d55d860586c40fcc Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Tue, 19 Mar 2024 16:22:11 +0800 Subject: [PATCH 5/5] anolis: perf mem/c2c: Add load store event mapping for Hygon ANBZ: #24134 Hygon support perf mem/c2c as AMD Zen CPU does, and use "ibs_op//" event as mem-ldst event. Signed-off-by: Qi Liu --- tools/perf/Documentation/perf-c2c.txt | 6 +++--- tools/perf/arch/x86/util/env.c | 15 +++++++++++++++ tools/perf/arch/x86/util/env.h | 1 + tools/perf/arch/x86/util/mem-events.c | 2 +- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 5f7f55624b6f..0c47da963949 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -21,8 +21,8 @@ you to track down the cacheline contentions. On Intel, the tool is based on load latency and precise store facility events provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling -with thresholding feature. On AMD, the tool uses IBS op pmu (due to hardware -limitations, perf c2c is not supported on Zen3 cpus). +with thresholding feature. On AMD and Hygon, the tool uses IBS op pmu (due to +hardware limitations, perf c2c is not supported on Zen3 cpus). These events provide: - memory address of the access @@ -142,7 +142,7 @@ default on Intel: cpu/mem-loads,ldlat=30/P cpu/mem-stores/P -following on AMD: +following on AMD and Hygon: ibs_op// diff --git a/tools/perf/arch/x86/util/env.c b/tools/perf/arch/x86/util/env.c index 33b87f8ac1cc..bcd7b65f54d7 100644 --- a/tools/perf/arch/x86/util/env.c +++ b/tools/perf/arch/x86/util/env.c @@ -17,3 +17,18 @@ bool x86__is_amd_cpu(void) ret: return is_amd >= 1 ? true : false; } + +bool x86__is_hygon_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + static int is_hygon; /* 0: Uninitialized, 1: Yes, -1: No */ + + if (is_hygon) + goto ret; + + perf_env__cpuid(&env); + is_hygon = env.cpuid && strstarts(env.cpuid, "HygonGenuine") ? 1 : -1; + perf_env__exit(&env); +ret: + return is_hygon >= 1 ? true : false; +} diff --git a/tools/perf/arch/x86/util/env.h b/tools/perf/arch/x86/util/env.h index d78f080b6b3f..904d5e228360 100644 --- a/tools/perf/arch/x86/util/env.h +++ b/tools/perf/arch/x86/util/env.h @@ -3,5 +3,6 @@ #define _X86_ENV_H bool x86__is_amd_cpu(void); +bool x86__is_hygon_cpu(void); #endif /* _X86_ENV_H */ diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c index efc0fae9ed0a..7fbace1587cb 100644 --- a/tools/perf/arch/x86/util/mem-events.c +++ b/tools/perf/arch/x86/util/mem-events.c @@ -32,7 +32,7 @@ struct perf_mem_event *perf_mem_events__ptr(int i) if (i >= PERF_MEM_EVENTS__MAX) return NULL; - if (x86__is_amd_cpu()) + if (x86__is_amd_cpu() || x86__is_hygon_cpu()) return &perf_mem_events_amd[i]; return &perf_mem_events_intel[i]; -- Gitee