From dd43a16cf45d9f62b81bcee7a3d6804987741872 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2022 18:02:22 +0100 Subject: [PATCH 001/173] arm64/sysreg: Introduce helpers for access to sysreg fields [Upstream commit e6a6b34f97efe3ded077b31f4370b4c1206c9e56] The macros we define for the bitfields within sysregs have very regular names, especially once we switch to automatic generation of those macros. Take advantage of this to define wrappers around FIELD_PREP() allowing us to simplify setting values in fields either numerically SYS_FIELD_PREP(SCTLR_EL1, TCF0, 0x0) or using the values of enumerations within the fields SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYMM) Suggested-by: Mark Rutland Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220503170233.507788-2-broonie@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: huwentao --- arch/arm64/include/asm/sysreg.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 761b26417a5d..b61de43de7c5 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -924,4 +924,10 @@ #endif +#define SYS_FIELD_PREP(reg, field, val) \ + FIELD_PREP(reg##_##field##_MASK, val) + +#define SYS_FIELD_PREP_ENUM(reg, field, val) \ + FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val) + #endif /* __ASM_SYSREG_H */ -- Gitee From 9452075f939c57f069cb97c3fb20d6bfbf7d6797 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 24 Feb 2022 11:06:54 +0530 Subject: [PATCH 002/173] perf: Add irq and exception return branch types [Upstream commit cedd3614e5d9c80908099c19f8716714ce0610b1] This expands generic branch type classification by adding two more entries there in i.e irq and exception return. Also updates the x86 implementation to process X86_BR_IRET and X86_BR_IRQ records as appropriate. This changes branch types reported to user space on x86 platform but it should not be a problem. The possible scenarios and impacts are enumerated here. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1645681014-3346-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: huwentao --- arch/x86/events/intel/lbr.c | 4 ++-- include/uapi/linux/perf_event.h | 2 ++ tools/include/uapi/linux/perf_event.h | 2 ++ tools/perf/util/branch.c | 4 +++- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index aaf3852d26da..269bc9c327fd 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1267,10 +1267,10 @@ static int branch_map[X86_BR_TYPE_MAP_MAX] = { PERF_BR_SYSCALL, /* X86_BR_SYSCALL */ PERF_BR_SYSRET, /* X86_BR_SYSRET */ PERF_BR_UNKNOWN, /* X86_BR_INT */ - PERF_BR_UNKNOWN, /* X86_BR_IRET */ + PERF_BR_ERET, /* X86_BR_IRET */ PERF_BR_COND, /* X86_BR_JCC */ PERF_BR_UNCOND, /* X86_BR_JMP */ - PERF_BR_UNKNOWN, /* X86_BR_IRQ */ + PERF_BR_IRQ, /* X86_BR_IRQ */ PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ PERF_BR_UNKNOWN, /* X86_BR_ABORT */ PERF_BR_UNKNOWN, /* X86_BR_IN_TX */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 81c8bfa18e90..383073f5ce95 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -235,6 +235,8 @@ enum { PERF_BR_SYSRET = 8, /* syscall return */ PERF_BR_COND_CALL = 9, /* conditional function call */ PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_ERET = 11, /* exception return */ + PERF_BR_IRQ = 12, /* irq */ PERF_BR_MAX, }; diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 0eeab4d13bad..59a1c0172470 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -250,6 +250,8 @@ enum { PERF_BR_SYSRET = 8, /* syscall return */ PERF_BR_COND_CALL = 9, /* conditional function call */ PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_ERET = 11, /* exception return */ + PERF_BR_IRQ = 12, /* irq */ PERF_BR_MAX, }; diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index 2285b1eb3128..a9a909db8cc7 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -49,7 +49,9 @@ const char *branch_type_name(int type) "SYSCALL", "SYSRET", "COND_CALL", - "COND_RET" + "COND_RET", + "ERET", + "IRQ" }; if (type >= 0 && type < PERF_BR_MAX) -- Gitee From 031cb65b0f948a7102f5476e4c79456717ddf6bb Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:15 +0530 Subject: [PATCH 003/173] perf: Add system error and not in transaction branch types [Upstream commit a724ec82966d57e4b5d36341d3e3dc1a3c011564] This expands generic branch type classification by adding two more entries there in i.e system error and not in transaction. This also updates the x86 implementation to process X86_BR_NO_TX records as appropriate. This changes branch types reported to user space on x86 platform but it should not be a problem. The possible scenarios and impacts are enumerated here. -------------------------------------------------------------------------- | kernel | perf tool | Impact | -------------------------------------------------------------------------- | old | old | Works as before | -------------------------------------------------------------------------- | old | new | PERF_BR_UNKNOWN is processed | -------------------------------------------------------------------------- | new | old | PERF_BR_NO_TX is blocked via old PERF_BR_MAX | -------------------------------------------------------------------------- | new | new | PERF_BR_NO_TX is recognized | -------------------------------------------------------------------------- When PERF_BR_NO_TX is blocked via old PERF_BR_MAX (new kernel with old perf tool) the user space might throw up an warning complaining about an unrecognized branch types being reported, but it's expected. PERF_BR_SERROR & PERF_BR_NO_TX branch types will be used for BRBE implementation on arm64 platform. PERF_BR_NO_TX complements 'abort' and 'in_tx' elements in perf_branch_entry which represent other transaction states for a given branch record. Because this completes the transaction state classification. Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-2-anshuman.khandual@arm.com Signed-off-by: huwentao --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 383073f5ce95..7db8e3d13871 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -237,6 +237,8 @@ enum { PERF_BR_COND_RET = 10, /* conditional function return */ PERF_BR_ERET = 11, /* exception return */ PERF_BR_IRQ = 12, /* irq */ + PERF_BR_SERROR = 13, /* system error */ + PERF_BR_NO_TX = 14, /* not in transaction */ PERF_BR_MAX, }; -- Gitee From 24042d8c9984d21920167d3bcc6bef4d5cc75feb Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:16 +0530 Subject: [PATCH 004/173] perf: Extend branch type classification [Upstream commit b190bc4ac9e6d9763b61654c5a0c085ff77d7a09] branch_entry.type now has ran out of space to accommodate more branch types classification. This will prevent perf branch stack implementation on arm64 (via BRBE) to capture all available branch types. Extending this bit field i.e branch_entry.type [4 bits] is not an option as it will break user space ABI both for little and big endian perf tools. Extend branch classification with a new field branch_entry.new_type via a new branch type PERF_BR_EXTEND_ABI in branch_entry.type. Perf tools which could decode PERF_BR_EXTEND_ABI, will then parse branch_entry.new_type as well. branch_entry.new_type is a 4 bit field which can hold upto 16 branch types. The first three branch types will hold various generic page faults followed by five architecture specific branch types, which can be overridden by the platform for specific use cases. These architecture specific branch types gets overridden on arm64 platform for BRBE implementation. New generic branch types - PERF_BR_NEW_FAULT_ALGN - PERF_BR_NEW_FAULT_DATA - PERF_BR_NEW_FAULT_INST New arch specific branch types - PERF_BR_NEW_ARCH_1 - PERF_BR_NEW_ARCH_2 - PERF_BR_NEW_ARCH_3 - PERF_BR_NEW_ARCH_4 - PERF_BR_NEW_ARCH_5 Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-3-anshuman.khandual@arm.com Signed-off-by: huwentao --- include/uapi/linux/perf_event.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 7db8e3d13871..46ffd216866d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -239,9 +239,22 @@ enum { PERF_BR_IRQ = 12, /* irq */ PERF_BR_SERROR = 13, /* system error */ PERF_BR_NO_TX = 14, /* not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* extend ABI */ PERF_BR_MAX, }; +enum { + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_MAX, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1227,7 +1240,8 @@ struct perf_branch_entry { abort:1, /* transaction abort */ cycles:16, /* cycle count to last branch */ type:4, /* branch type */ - reserved:40; + new_type:4, /* additional branch type */ + reserved:34; }; union perf_sample_weight { -- Gitee From 8d9ad21c4552e930b246a07497ce06205c020df6 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:17 +0530 Subject: [PATCH 005/173] perf: Capture branch privilege information [Upstream commit 5402d25aa5710d240040f73fb13d7d5c303ef071] Platforms like arm64 could capture privilege level information for all the branch records. Hence this adds a new element in the struct branch_entry to record the privilege level information, which could be requested through a new event.attr.branch_sample_type based flag PERF_SAMPLE_BRANCH_PRIV_SAVE. This flag helps user choose whether privilege information is captured. Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-4-anshuman.khandual@arm.com Signed-off-by: huwentao --- include/uapi/linux/perf_event.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 46ffd216866d..8978945e8ae3 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -188,6 +188,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -217,6 +219,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -255,6 +259,13 @@ enum { PERF_BR_NEW_MAX, }; +enum { + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1241,7 +1252,8 @@ struct perf_branch_entry { cycles:16, /* cycle count to last branch */ type:4, /* branch type */ new_type:4, /* additional branch type */ - reserved:34; + priv:3, /* privilege level */ + reserved:31; }; union perf_sample_weight { -- Gitee From 77a90e463bf4ce7f88f6eabae47d49142dec3b2b Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:18 +0530 Subject: [PATCH 006/173] perf: Add PERF_BR_NEW_ARCH_[N] map for BRBE on arm64 platform [Upstream commit f4054e522531038354bea5c924f286fdd8ae77b5] BRBE captured branch types will overflow perf_branch_entry.type and generic branch types in perf_branch_entry.new_type. So override each available arch specific branch type in the following manner to comprehensively process all reported branch types in BRBE. PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-5-anshuman.khandual@arm.com Signed-off-by: huwentao --- include/uapi/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 8978945e8ae3..f7fb6c83e030 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -266,6 +266,12 @@ enum { PERF_BR_PRIV_HV = 3, }; +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ -- Gitee From b4140648fc064444dfa59c658dc04ed413f938bc Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 6 Sep 2022 14:14:14 +0530 Subject: [PATCH 007/173] perf: Consolidate branch sample filter helpers [Upstream commit 03b02db93be407103c385814033633364674a6f6] Besides the branch type filtering requests, 'event.attr.branch_sample_type' also contains various flags indicating which additional information should be captured, along with the base branch record. These flags help configure the underlying hardware, and capture the branch records appropriately when required e.g after PMU interrupt. But first, this moves an existing helper perf_sample_save_hw_index() into the header before adding some more helpers for other branch sample filter flags. Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220906084414.396220-1-anshuman.khandual@arm.com Signed-off-by: huwentao --- include/linux/perf_event.h | 26 ++++++++++++++++++++++++++ kernel/events/core.c | 9 ++------- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7f6f174f481d..e6173d37803d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1577,4 +1577,30 @@ int perf_event_exit_cpu(unsigned int cpu); #define perf_event_exit_cpu NULL #endif +#ifdef CONFIG_PERF_EVENTS +static inline bool branch_sample_no_flags(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS; +} + +static inline bool branch_sample_no_cycles(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES; +} + +static inline bool branch_sample_type(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE; +} + +static inline bool branch_sample_hw_index(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + +static inline bool branch_sample_priv(const struct perf_event *event) +{ + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; +} +#endif /* CONFIG_PERF_EVENTS */ #endif /* _LINUX_PERF_EVENT_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index a3270243c917..a5d71b20f67d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6563,11 +6563,6 @@ static void perf_output_read(struct perf_output_handle *handle, perf_output_read_one(handle, event, enabled, running); } -static inline bool perf_sample_save_hw_index(struct perf_event *event) -{ - return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; -} - void perf_output_sample(struct perf_output_handle *handle, struct perf_event_header *header, struct perf_sample_data *data, @@ -6656,7 +6651,7 @@ void perf_output_sample(struct perf_output_handle *handle, * sizeof(struct perf_branch_entry); perf_output_put(handle, data->br_stack->nr); - if (perf_sample_save_hw_index(event)) + if (branch_sample_hw_index(event)) perf_output_put(handle, data->br_stack->hw_idx); perf_output_copy(handle, data->br_stack->entries, size); } else { @@ -6951,7 +6946,7 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_BRANCH_STACK) { int size = sizeof(u64); /* nr */ if (data->br_stack) { - if (perf_sample_save_hw_index(event)) + if (branch_sample_hw_index(event)) size += sizeof(u64); size += data->br_stack->nr -- Gitee From ad6b3372fe2beeae0069ebca4188ded5e4686be3 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 5 Dec 2022 12:14:43 +0530 Subject: [PATCH 008/173] perf record: Add remaining branch filters: "no_cycles", "no_flags" & "hw_index" [Upstream commit 955f6def5590ce6ca11a1c1ced0d2d1c95421059] This adds all remaining branch filters i.e "no_cycles", "no_flags" and "hw_index". While here, also updates the documentation. Signed-off-by: Anshuman Khandual Cc: James Clark Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20221205064443.533587-1-anshuman.khandual@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/Documentation/perf-record.txt | 5 +++++ tools/perf/util/parse-branch-options.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5a666750bd4b..4aecca8c1610 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -370,6 +370,7 @@ following filters are defined: - any_call: any function call or system call - any_ret: any function return or system call return - ind_call: any indirect branch + - ind_jmp: any indirect jump - call: direct calls, including far (to/from kernel) calls - u: only when the branch target is at the user level - k: only when the branch target is in the kernel @@ -378,6 +379,10 @@ following filters are defined: - no_tx: only when the target is not in a hardware transaction - abort_tx: only when the target is a hardware transaction abort - cond: conditional branches + - call_stack: save call stack + - no_flags: don't save branch flags e.g prediction, misprediction etc + - no_cycles: don't save branch cycles + - hw_index: save branch hardware index - save_type: save branch type during sampling in case binary is not available later + diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index bb4aa88c50a8..28696f8be6da 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -30,8 +30,11 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP), BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL), + BRANCH_OPT("no_flags", PERF_SAMPLE_BRANCH_NO_FLAGS), + BRANCH_OPT("no_cycles", PERF_SAMPLE_BRANCH_NO_CYCLES), BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE), BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), + BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), BRANCH_END }; -- Gitee From c8ea7d0a2be1b594c6045237f47a5ed79a85c537 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 8 Nov 2023 10:38:37 +0800 Subject: [PATCH 009/173] arm64/sysreg: Add BRBE registers and fields [Upstream commit 52e4a56ab8b85a11ffc017eaec5b2f38642a43ba] This adds BRBE related register definitions and various other related field macros there in. These will be used subsequently in a BRBE driver which is being added later on. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Mark Brown Signed-off-by: Anshuman Khandual Signed-off-by: huwentao --- arch/arm64/include/asm/sysreg.h | 497 ++++++++++++++++++++++++++++++++ 1 file changed, 497 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b61de43de7c5..ba551850daa4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -130,6 +130,109 @@ #define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) #define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) +#define __SYS_BRBINF(n) sys_reg(2, 1, 8, ((n) & 0xf), ((((n) & 0x10) >> 2) + 0)) +#define __SYS_BRBSRC(n) sys_reg(2, 1, 8, ((n) & 0xf), ((((n) & 0x10) >> 2) + 1)) +#define __SYS_BRBTGT(n) sys_reg(2, 1, 8, ((n) & 0xf), ((((n) & 0x10) >> 2) + 2)) + +#define SYS_BRBINF0_EL1 __SYS_BRBINF(0) +#define SYS_BRBINF1_EL1 __SYS_BRBINF(1) +#define SYS_BRBINF2_EL1 __SYS_BRBINF(2) +#define SYS_BRBINF3_EL1 __SYS_BRBINF(3) +#define SYS_BRBINF4_EL1 __SYS_BRBINF(4) +#define SYS_BRBINF5_EL1 __SYS_BRBINF(5) +#define SYS_BRBINF6_EL1 __SYS_BRBINF(6) +#define SYS_BRBINF7_EL1 __SYS_BRBINF(7) +#define SYS_BRBINF8_EL1 __SYS_BRBINF(8) +#define SYS_BRBINF9_EL1 __SYS_BRBINF(9) +#define SYS_BRBINF10_EL1 __SYS_BRBINF(10) +#define SYS_BRBINF11_EL1 __SYS_BRBINF(11) +#define SYS_BRBINF12_EL1 __SYS_BRBINF(12) +#define SYS_BRBINF13_EL1 __SYS_BRBINF(13) +#define SYS_BRBINF14_EL1 __SYS_BRBINF(14) +#define SYS_BRBINF15_EL1 __SYS_BRBINF(15) +#define SYS_BRBINF16_EL1 __SYS_BRBINF(16) +#define SYS_BRBINF17_EL1 __SYS_BRBINF(17) +#define SYS_BRBINF18_EL1 __SYS_BRBINF(18) +#define SYS_BRBINF19_EL1 __SYS_BRBINF(19) +#define SYS_BRBINF20_EL1 __SYS_BRBINF(20) +#define SYS_BRBINF21_EL1 __SYS_BRBINF(21) +#define SYS_BRBINF22_EL1 __SYS_BRBINF(22) +#define SYS_BRBINF23_EL1 __SYS_BRBINF(23) +#define SYS_BRBINF24_EL1 __SYS_BRBINF(24) +#define SYS_BRBINF25_EL1 __SYS_BRBINF(25) +#define SYS_BRBINF26_EL1 __SYS_BRBINF(26) +#define SYS_BRBINF27_EL1 __SYS_BRBINF(27) +#define SYS_BRBINF28_EL1 __SYS_BRBINF(28) +#define SYS_BRBINF29_EL1 __SYS_BRBINF(29) +#define SYS_BRBINF30_EL1 __SYS_BRBINF(30) +#define SYS_BRBINF31_EL1 __SYS_BRBINF(31) + +#define SYS_BRBSRC0_EL1 __SYS_BRBSRC(0) +#define SYS_BRBSRC1_EL1 __SYS_BRBSRC(1) +#define SYS_BRBSRC2_EL1 __SYS_BRBSRC(2) +#define SYS_BRBSRC3_EL1 __SYS_BRBSRC(3) +#define SYS_BRBSRC4_EL1 __SYS_BRBSRC(4) +#define SYS_BRBSRC5_EL1 __SYS_BRBSRC(5) +#define SYS_BRBSRC6_EL1 __SYS_BRBSRC(6) +#define SYS_BRBSRC7_EL1 __SYS_BRBSRC(7) +#define SYS_BRBSRC8_EL1 __SYS_BRBSRC(8) +#define SYS_BRBSRC9_EL1 __SYS_BRBSRC(9) +#define SYS_BRBSRC10_EL1 __SYS_BRBSRC(10) +#define SYS_BRBSRC11_EL1 __SYS_BRBSRC(11) +#define SYS_BRBSRC12_EL1 __SYS_BRBSRC(12) +#define SYS_BRBSRC13_EL1 __SYS_BRBSRC(13) +#define SYS_BRBSRC14_EL1 __SYS_BRBSRC(14) +#define SYS_BRBSRC15_EL1 __SYS_BRBSRC(15) +#define SYS_BRBSRC16_EL1 __SYS_BRBSRC(16) +#define SYS_BRBSRC17_EL1 __SYS_BRBSRC(17) +#define SYS_BRBSRC18_EL1 __SYS_BRBSRC(18) +#define SYS_BRBSRC19_EL1 __SYS_BRBSRC(19) +#define SYS_BRBSRC20_EL1 __SYS_BRBSRC(20) +#define SYS_BRBSRC21_EL1 __SYS_BRBSRC(21) +#define SYS_BRBSRC22_EL1 __SYS_BRBSRC(22) +#define SYS_BRBSRC23_EL1 __SYS_BRBSRC(23) +#define SYS_BRBSRC24_EL1 __SYS_BRBSRC(24) +#define SYS_BRBSRC25_EL1 __SYS_BRBSRC(25) +#define SYS_BRBSRC26_EL1 __SYS_BRBSRC(26) +#define SYS_BRBSRC27_EL1 __SYS_BRBSRC(27) +#define SYS_BRBSRC28_EL1 __SYS_BRBSRC(28) +#define SYS_BRBSRC29_EL1 __SYS_BRBSRC(29) +#define SYS_BRBSRC30_EL1 __SYS_BRBSRC(30) +#define SYS_BRBSRC31_EL1 __SYS_BRBSRC(31) + +#define SYS_BRBTGT0_EL1 __SYS_BRBTGT(0) +#define SYS_BRBTGT1_EL1 __SYS_BRBTGT(1) +#define SYS_BRBTGT2_EL1 __SYS_BRBTGT(2) +#define SYS_BRBTGT3_EL1 __SYS_BRBTGT(3) +#define SYS_BRBTGT4_EL1 __SYS_BRBTGT(4) +#define SYS_BRBTGT5_EL1 __SYS_BRBTGT(5) +#define SYS_BRBTGT6_EL1 __SYS_BRBTGT(6) +#define SYS_BRBTGT7_EL1 __SYS_BRBTGT(7) +#define SYS_BRBTGT8_EL1 __SYS_BRBTGT(8) +#define SYS_BRBTGT9_EL1 __SYS_BRBTGT(9) +#define SYS_BRBTGT10_EL1 __SYS_BRBTGT(10) +#define SYS_BRBTGT11_EL1 __SYS_BRBTGT(11) +#define SYS_BRBTGT12_EL1 __SYS_BRBTGT(12) +#define SYS_BRBTGT13_EL1 __SYS_BRBTGT(13) +#define SYS_BRBTGT14_EL1 __SYS_BRBTGT(14) +#define SYS_BRBTGT15_EL1 __SYS_BRBTGT(15) +#define SYS_BRBTGT16_EL1 __SYS_BRBTGT(16) +#define SYS_BRBTGT17_EL1 __SYS_BRBTGT(17) +#define SYS_BRBTGT18_EL1 __SYS_BRBTGT(18) +#define SYS_BRBTGT19_EL1 __SYS_BRBTGT(19) +#define SYS_BRBTGT20_EL1 __SYS_BRBTGT(20) +#define SYS_BRBTGT21_EL1 __SYS_BRBTGT(21) +#define SYS_BRBTGT22_EL1 __SYS_BRBTGT(22) +#define SYS_BRBTGT23_EL1 __SYS_BRBTGT(23) +#define SYS_BRBTGT24_EL1 __SYS_BRBTGT(24) +#define SYS_BRBTGT25_EL1 __SYS_BRBTGT(25) +#define SYS_BRBTGT26_EL1 __SYS_BRBTGT(26) +#define SYS_BRBTGT27_EL1 __SYS_BRBTGT(27) +#define SYS_BRBTGT28_EL1 __SYS_BRBTGT(28) +#define SYS_BRBTGT29_EL1 __SYS_BRBTGT(29) +#define SYS_BRBTGT30_EL1 __SYS_BRBTGT(30) +#define SYS_BRBTGT31_EL1 __SYS_BRBTGT(31) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -403,6 +506,396 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +/*** Branch Record Buffer Extension ***/ +/* ID registers */ +#define BRBINFx_EL1_CCU GENMASK(46, 46) +#define BRBINFx_EL1_CCU_MASK GENMASK(46, 46) +#define BRBINFx_EL1_CCU_SHIFT 46 +#define BRBINFx_EL1_CCU_WIDTH 1 + +#define BRBINFx_EL1_CC GENMASK(45, 32) +#define BRBINFx_EL1_CC_MASK GENMASK(45, 32) +#define BRBINFx_EL1_CC_SHIFT 32 +#define BRBINFx_EL1_CC_WIDTH 14 + +#define BRBINFx_EL1_LASTFAILED GENMASK(17, 17) +#define BRBINFx_EL1_LASTFAILED_MASK GENMASK(17, 17) +#define BRBINFx_EL1_LASTFAILED_SHIFT 17 +#define BRBINFx_EL1_LASTFAILED_WIDTH 1 + +#define BRBINFx_EL1_T GENMASK(16, 16) +#define BRBINFx_EL1_T_MASK GENMASK(16, 16) +#define BRBINFx_EL1_T_SHIFT 16 +#define BRBINFx_EL1_T_WIDTH 1 + +#define BRBINFx_EL1_TYPE GENMASK(13, 8) +#define BRBINFx_EL1_TYPE_MASK GENMASK(13, 8) +#define BRBINFx_EL1_TYPE_SHIFT 8 +#define BRBINFx_EL1_TYPE_WIDTH 6 +#define BRBINFx_EL1_TYPE_UNCOND_DIRECT UL(0b000000) +#define BRBINFx_EL1_TYPE_INDIRECT UL(0b000001) +#define BRBINFx_EL1_TYPE_DIRECT_LINK UL(0b000010) +#define BRBINFx_EL1_TYPE_INDIRECT_LINK UL(0b000011) +#define BRBINFx_EL1_TYPE_RET UL(0b000101) +#define BRBINFx_EL1_TYPE_ERET UL(0b000111) +#define BRBINFx_EL1_TYPE_COND_DIRECT UL(0b001000) +#define BRBINFx_EL1_TYPE_DEBUG_HALT UL(0b100001) +#define BRBINFx_EL1_TYPE_CALL UL(0b100010) +#define BRBINFx_EL1_TYPE_TRAP UL(0b100011) +#define BRBINFx_EL1_TYPE_SERROR UL(0b100100) +#define BRBINFx_EL1_TYPE_INSN_DEBUG UL(0b100110) +#define BRBINFx_EL1_TYPE_DATA_DEBUG UL(0b100111) +#define BRBINFx_EL1_TYPE_ALIGN_FAULT UL(0b101010) +#define BRBINFx_EL1_TYPE_INSN_FAULT UL(0b101011) +#define BRBINFx_EL1_TYPE_DATA_FAULT UL(0b101100) +#define BRBINFx_EL1_TYPE_IRQ UL(0b101110) +#define BRBINFx_EL1_TYPE_FIQ UL(0b101111) +#define BRBINFx_EL1_TYPE_DEBUG_EXIT UL(0b111001) + +#define BRBINFx_EL1_EL GENMASK(7, 6) +#define BRBINFx_EL1_EL_MASK GENMASK(7, 6) +#define BRBINFx_EL1_EL_SHIFT 6 +#define BRBINFx_EL1_EL_WIDTH 2 +#define BRBINFx_EL1_EL_EL0 UL(0b00) +#define BRBINFx_EL1_EL_EL1 UL(0b01) +#define BRBINFx_EL1_EL_EL2 UL(0b10) +#define BRBINFx_EL1_EL_EL3 UL(0b11) + +#define BRBINFx_EL1_MPRED GENMASK(5, 5) +#define BRBINFx_EL1_MPRED_MASK GENMASK(5, 5) +#define BRBINFx_EL1_MPRED_SHIFT 5 +#define BRBINFx_EL1_MPRED_WIDTH 1 + +#define BRBINFx_EL1_VALID GENMASK(1, 0) +#define BRBINFx_EL1_VALID_MASK GENMASK(1, 0) +#define BRBINFx_EL1_VALID_SHIFT 0 +#define BRBINFx_EL1_VALID_WIDTH 2 +#define BRBINFx_EL1_VALID_NONE UL(0b00) +#define BRBINFx_EL1_VALID_TARGET UL(0b01) +#define BRBINFx_EL1_VALID_SOURCE UL(0b10) +#define BRBINFx_EL1_VALID_FULL UL(0b11) + +#define BRBINFx_EL1_RES0 (UL(0) | GENMASK_ULL(63, 47) | \ + GENMASK_ULL(31, 18) | \ + GENMASK_ULL(15, 14) | \ + GENMASK_ULL(4, 2)) +#define BRBINFx_EL1_RES1 (UL(0)) +#define BRBINFx_EL1_UNKN (UL(0)) + +#define BRBCR_ELx_EXCEPTION GENMASK(23, 23) +#define BRBCR_ELx_EXCEPTION_MASK GENMASK(23, 23) +#define BRBCR_ELx_EXCEPTION_SHIFT 23 +#define BRBCR_ELx_EXCEPTION_WIDTH 1 + +#define BRBCR_ELx_ERTN GENMASK(22, 22) +#define BRBCR_ELx_ERTN_MASK GENMASK(22, 22) +#define BRBCR_ELx_ERTN_SHIFT 22 +#define BRBCR_ELx_ERTN_WIDTH 1 + +#define BRBCR_ELx_FZP GENMASK(8, 8) +#define BRBCR_ELx_FZP_MASK GENMASK(8, 8) +#define BRBCR_ELx_FZP_SHIFT 8 +#define BRBCR_ELx_FZP_WIDTH 1 + +#define BRBCR_ELx_TS GENMASK(6, 5) +#define BRBCR_ELx_TS_MASK GENMASK(6, 5) +#define BRBCR_ELx_TS_SHIFT 5 +#define BRBCR_ELx_TS_WIDTH 2 +#define BRBCR_ELx_TS_VIRTUAL UL(0b01) +#define BRBCR_ELx_TS_GUEST_PHYSICAL UL(0b10) +#define BRBCR_ELx_TS_PHYSICAL UL(0b11) + +#define BRBCR_ELx_MPRED GENMASK(4, 4) +#define BRBCR_ELx_MPRED_MASK GENMASK(4, 4) +#define BRBCR_ELx_MPRED_SHIFT 4 +#define BRBCR_ELx_MPRED_WIDTH 1 + +#define BRBCR_ELx_CC GENMASK(3, 3) +#define BRBCR_ELx_CC_MASK GENMASK(3, 3) +#define BRBCR_ELx_CC_SHIFT 3 +#define BRBCR_ELx_CC_WIDTH 1 + +#define BRBCR_ELx_ExBRE GENMASK(1, 1) +#define BRBCR_ELx_ExBRE_MASK GENMASK(1, 1) +#define BRBCR_ELx_ExBRE_SHIFT 1 +#define BRBCR_ELx_ExBRE_WIDTH 1 + +#define BRBCR_ELx_E0BRE GENMASK(0, 0) +#define BRBCR_ELx_E0BRE_MASK GENMASK(0, 0) +#define BRBCR_ELx_E0BRE_SHIFT 0 +#define BRBCR_ELx_E0BRE_WIDTH 1 + +#define BRBCR_ELx_RES0 (UL(0) | GENMASK_ULL(63, 24) | \ + GENMASK_ULL(21, 9) | \ + GENMASK_ULL(7, 7) | \ + GENMASK_ULL(2, 2)) +#define BRBCR_ELx_RES1 (UL(0)) +#define BRBCR_ELx_UNKN (UL(0)) + +#define REG_BRBCR_EL2 S2_4_C9_C0_0 +#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0) +#define SYS_BRBCR_EL2_Op0 2 +#define SYS_BRBCR_EL2_Op1 4 +#define SYS_BRBCR_EL2_CRn 9 +#define SYS_BRBCR_EL2_CRm 0 +#define SYS_BRBCR_EL2_Op2 0 + +/* For BRBCR_EL2 fields see BRBCR_ELx */ + +#define REG_BRBCR_EL1 S2_1_C9_C0_0 +#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0) +#define SYS_BRBCR_EL1_Op0 2 +#define SYS_BRBCR_EL1_Op1 1 +#define SYS_BRBCR_EL1_CRn 9 +#define SYS_BRBCR_EL1_CRm 0 +#define SYS_BRBCR_EL1_Op2 0 + +/* For BRBCR_EL1 fields see BRBCR_ELx */ + +#define REG_BRBCR_EL12 S2_5_C9_C0_0 +#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) +#define SYS_BRBCR_EL12_Op0 2 +#define SYS_BRBCR_EL12_Op1 5 +#define SYS_BRBCR_EL12_CRn 9 +#define SYS_BRBCR_EL12_CRm 0 +#define SYS_BRBCR_EL12_Op2 0 + +/* For BRBCR_EL12 fields see BRBCR_ELx */ + +#define REG_BRBFCR_EL1 S2_1_C9_C0_1 +#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1) +#define SYS_BRBFCR_EL1_Op0 2 +#define SYS_BRBFCR_EL1_Op1 1 +#define SYS_BRBFCR_EL1_CRn 9 +#define SYS_BRBFCR_EL1_CRm 0 +#define SYS_BRBFCR_EL1_Op2 1 + +#define BRBFCR_EL1_BANK GENMASK(29, 28) +#define BRBFCR_EL1_BANK_MASK GENMASK(29, 28) +#define BRBFCR_EL1_BANK_SHIFT 28 +#define BRBFCR_EL1_BANK_WIDTH 2 +#define BRBFCR_EL1_BANK_FIRST UL(0b0) +#define BRBFCR_EL1_BANK_SECOND UL(0b1) + +#define BRBFCR_EL1_CONDDIR GENMASK(22, 22) +#define BRBFCR_EL1_CONDDIR_MASK GENMASK(22, 22) +#define BRBFCR_EL1_CONDDIR_SHIFT 22 +#define BRBFCR_EL1_CONDDIR_WIDTH 1 + +#define BRBFCR_EL1_DIRCALL GENMASK(21, 21) +#define BRBFCR_EL1_DIRCALL_MASK GENMASK(21, 21) +#define BRBFCR_EL1_DIRCALL_SHIFT 21 +#define BRBFCR_EL1_DIRCALL_WIDTH 1 + +#define BRBFCR_EL1_INDCALL GENMASK(20, 20) +#define BRBFCR_EL1_INDCALL_MASK GENMASK(20, 20) +#define BRBFCR_EL1_INDCALL_SHIFT 20 +#define BRBFCR_EL1_INDCALL_WIDTH 1 + +#define BRBFCR_EL1_RTN GENMASK(19, 19) +#define BRBFCR_EL1_RTN_MASK GENMASK(19, 19) +#define BRBFCR_EL1_RTN_SHIFT 19 +#define BRBFCR_EL1_RTN_WIDTH 1 + +#define BRBFCR_EL1_INDIRECT GENMASK(18, 18) +#define BRBFCR_EL1_INDIRECT_MASK GENMASK(18, 18) +#define BRBFCR_EL1_INDIRECT_SHIFT 18 +#define BRBFCR_EL1_INDIRECT_WIDTH 1 + +#define BRBFCR_EL1_DIRECT GENMASK(17, 17) +#define BRBFCR_EL1_DIRECT_MASK GENMASK(17, 17) +#define BRBFCR_EL1_DIRECT_SHIFT 17 +#define BRBFCR_EL1_DIRECT_WIDTH 1 + +#define BRBFCR_EL1_EnI GENMASK(16, 16) +#define BRBFCR_EL1_EnI_MASK GENMASK(16, 16) +#define BRBFCR_EL1_EnI_SHIFT 16 +#define BRBFCR_EL1_EnI_WIDTH 1 + +#define BRBFCR_EL1_PAUSED GENMASK(7, 7) +#define BRBFCR_EL1_PAUSED_MASK GENMASK(7, 7) +#define BRBFCR_EL1_PAUSED_SHIFT 7 +#define BRBFCR_EL1_PAUSED_WIDTH 1 + +#define BRBFCR_EL1_LASTFAILED GENMASK(6, 6) +#define BRBFCR_EL1_LASTFAILED_MASK GENMASK(6, 6) +#define BRBFCR_EL1_LASTFAILED_SHIFT 6 +#define BRBFCR_EL1_LASTFAILED_WIDTH 1 + +#define BRBFCR_EL1_RES0 (UL(0) | GENMASK_ULL(63, 30) | \ + GENMASK_ULL(27, 23) | \ + GENMASK_ULL(15, 8) | \ + GENMASK_ULL(5, 0)) +#define BRBFCR_EL1_RES1 (UL(0)) +#define BRBFCR_EL1_UNKN (UL(0)) + +#define REG_BRBTS_EL1 S2_1_C9_C0_2 +#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2) +#define SYS_BRBTS_EL1_Op0 2 +#define SYS_BRBTS_EL1_Op1 1 +#define SYS_BRBTS_EL1_CRn 9 +#define SYS_BRBTS_EL1_CRm 0 +#define SYS_BRBTS_EL1_Op2 2 + +#define BRBTS_EL1_TS GENMASK(63, 0) +#define BRBTS_EL1_TS_MASK GENMASK(63, 0) +#define BRBTS_EL1_TS_SHIFT 0 +#define BRBTS_EL1_TS_WIDTH 64 + +#define BRBTS_EL1_RES0 (UL(0)) +#define BRBTS_EL1_RES1 (UL(0)) +#define BRBTS_EL1_UNKN (UL(0)) + +#define REG_BRBINFINJ_EL1 S2_1_C9_C1_0 +#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0) +#define SYS_BRBINFINJ_EL1_Op0 2 +#define SYS_BRBINFINJ_EL1_Op1 1 +#define SYS_BRBINFINJ_EL1_CRn 9 +#define SYS_BRBINFINJ_EL1_CRm 1 +#define SYS_BRBINFINJ_EL1_Op2 0 + +#define BRBINFINJ_EL1_CCU GENMASK(46, 46) +#define BRBINFINJ_EL1_CCU_MASK GENMASK(46, 46) +#define BRBINFINJ_EL1_CCU_SHIFT 46 +#define BRBINFINJ_EL1_CCU_WIDTH 1 + +#define BRBINFINJ_EL1_CC GENMASK(45, 32) +#define BRBINFINJ_EL1_CC_MASK GENMASK(45, 32) +#define BRBINFINJ_EL1_CC_SHIFT 32 +#define BRBINFINJ_EL1_CC_WIDTH 14 + +#define BRBINFINJ_EL1_LASTFAILED GENMASK(17, 17) +#define BRBINFINJ_EL1_LASTFAILED_MASK GENMASK(17, 17) +#define BRBINFINJ_EL1_LASTFAILED_SHIFT 17 +#define BRBINFINJ_EL1_LASTFAILED_WIDTH 1 + +#define BRBINFINJ_EL1_T GENMASK(16, 16) +#define BRBINFINJ_EL1_T_MASK GENMASK(16, 16) +#define BRBINFINJ_EL1_T_SHIFT 16 +#define BRBINFINJ_EL1_T_WIDTH 1 + +#define BRBINFINJ_EL1_TYPE GENMASK(13, 8) +#define BRBINFINJ_EL1_TYPE_MASK GENMASK(13, 8) +#define BRBINFINJ_EL1_TYPE_SHIFT 8 +#define BRBINFINJ_EL1_TYPE_WIDTH 6 +#define BRBINFINJ_EL1_TYPE_UNCOND_DIRECT UL(0b000000) +#define BRBINFINJ_EL1_TYPE_INDIRECT UL(0b000001) +#define BRBINFINJ_EL1_TYPE_DIRECT_LINK UL(0b000010) +#define BRBINFINJ_EL1_TYPE_INDIRECT_LINK UL(0b000011) +#define BRBINFINJ_EL1_TYPE_RET UL(0b000101) +#define BRBINFINJ_EL1_TYPE_ERET UL(0b000111) +#define BRBINFINJ_EL1_TYPE_COND_DIRECT UL(0b001000) +#define BRBINFINJ_EL1_TYPE_DEBUG_HALT UL(0b100001) +#define BRBINFINJ_EL1_TYPE_CALL UL(0b100010) +#define BRBINFINJ_EL1_TYPE_TRAP UL(0b100011) +#define BRBINFINJ_EL1_TYPE_SERROR UL(0b100100) +#define BRBINFINJ_EL1_TYPE_INSN_DEBUG UL(0b100110) +#define BRBINFINJ_EL1_TYPE_DATA_DEBUG UL(0b100111) +#define BRBINFINJ_EL1_TYPE_ALIGN_FAULT UL(0b101010) +#define BRBINFINJ_EL1_TYPE_INSN_FAULT UL(0b101011) +#define BRBINFINJ_EL1_TYPE_DATA_FAULT UL(0b101100) +#define BRBINFINJ_EL1_TYPE_IRQ UL(0b101110) +#define BRBINFINJ_EL1_TYPE_FIQ UL(0b101111) +#define BRBINFINJ_EL1_TYPE_DEBUG_EXIT UL(0b111001) + +#define BRBINFINJ_EL1_EL GENMASK(7, 6) +#define BRBINFINJ_EL1_EL_MASK GENMASK(7, 6) +#define BRBINFINJ_EL1_EL_SHIFT 6 +#define BRBINFINJ_EL1_EL_WIDTH 2 +#define BRBINFINJ_EL1_EL_EL0 UL(0b00) +#define BRBINFINJ_EL1_EL_EL1 UL(0b01) +#define BRBINFINJ_EL1_EL_EL2 UL(0b10) +#define BRBINFINJ_EL1_EL_EL3 UL(0b11) + +#define BRBINFINJ_EL1_MPRED GENMASK(5, 5) +#define BRBINFINJ_EL1_MPRED_MASK GENMASK(5, 5) +#define BRBINFINJ_EL1_MPRED_SHIFT 5 +#define BRBINFINJ_EL1_MPRED_WIDTH 1 + +#define BRBINFINJ_EL1_VALID GENMASK(1, 0) +#define BRBINFINJ_EL1_VALID_MASK GENMASK(1, 0) +#define BRBINFINJ_EL1_VALID_SHIFT 0 +#define BRBINFINJ_EL1_VALID_WIDTH 2 +#define BRBINFINJ_EL1_VALID_NONE UL(0b00) +#define BRBINFINJ_EL1_VALID_TARGET UL(0b01) +#define BRBINFINJ_EL1_VALID_SOURCE UL(0b10) +#define BRBINFINJ_EL1_VALID_FULL UL(0b11) + +#define BRBINFINJ_EL1_RES0 (UL(0) | GENMASK_ULL(63, 47) | \ + GENMASK_ULL(31, 18) | \ + GENMASK_ULL(15, 14) | \ + GENMASK_ULL(4, 2)) +#define BRBINFINJ_EL1_RES1 (UL(0)) +#define BRBINFINJ_EL1_UNKN (UL(0)) + +#define REG_BRBSRCINJ_EL1 S2_1_C9_C1_1 +#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1) +#define SYS_BRBSRCINJ_EL1_Op0 2 +#define SYS_BRBSRCINJ_EL1_Op1 1 +#define SYS_BRBSRCINJ_EL1_CRn 9 +#define SYS_BRBSRCINJ_EL1_CRm 1 +#define SYS_BRBSRCINJ_EL1_Op2 1 + +#define BRBSRCINJ_EL1_ADDRESS GENMASK(63, 0) +#define BRBSRCINJ_EL1_ADDRESS_MASK GENMASK(63, 0) +#define BRBSRCINJ_EL1_ADDRESS_SHIFT 0 +#define BRBSRCINJ_EL1_ADDRESS_WIDTH 64 + +#define BRBSRCINJ_EL1_RES0 (UL(0)) +#define BRBSRCINJ_EL1_RES1 (UL(0)) +#define BRBSRCINJ_EL1_UNKN (UL(0)) + +#define REG_BRBTGTINJ_EL1 S2_1_C9_C1_2 +#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2) +#define SYS_BRBTGTINJ_EL1_Op0 2 +#define SYS_BRBTGTINJ_EL1_Op1 1 +#define SYS_BRBTGTINJ_EL1_CRn 9 +#define SYS_BRBTGTINJ_EL1_CRm 1 +#define SYS_BRBTGTINJ_EL1_Op2 2 + +#define BRBTGTINJ_EL1_ADDRESS GENMASK(63, 0) +#define BRBTGTINJ_EL1_ADDRESS_MASK GENMASK(63, 0) +#define BRBTGTINJ_EL1_ADDRESS_SHIFT 0 +#define BRBTGTINJ_EL1_ADDRESS_WIDTH 64 + +#define BRBTGTINJ_EL1_RES0 (UL(0)) +#define BRBTGTINJ_EL1_RES1 (UL(0)) +#define BRBTGTINJ_EL1_UNKN (UL(0)) + +#define REG_BRBIDR0_EL1 S2_1_C9_C2_0 +#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0) +#define SYS_BRBIDR0_EL1_Op0 2 +#define SYS_BRBIDR0_EL1_Op1 1 +#define SYS_BRBIDR0_EL1_CRn 9 +#define SYS_BRBIDR0_EL1_CRm 2 +#define SYS_BRBIDR0_EL1_Op2 0 + +#define BRBIDR0_EL1_CC GENMASK(15, 12) +#define BRBIDR0_EL1_CC_MASK GENMASK(15, 12) +#define BRBIDR0_EL1_CC_SHIFT 12 +#define BRBIDR0_EL1_CC_WIDTH 4 +#define BRBIDR0_EL1_CC_20_BIT UL(0b101) + +#define BRBIDR0_EL1_FORMAT GENMASK(11, 8) +#define BRBIDR0_EL1_FORMAT_MASK GENMASK(11, 8) +#define BRBIDR0_EL1_FORMAT_SHIFT 8 +#define BRBIDR0_EL1_FORMAT_WIDTH 4 +#define BRBIDR0_EL1_FORMAT_0 UL(0b0) + +#define BRBIDR0_EL1_NUMREC GENMASK(7, 0) +#define BRBIDR0_EL1_NUMREC_MASK GENMASK(7, 0) +#define BRBIDR0_EL1_NUMREC_SHIFT 0 +#define BRBIDR0_EL1_NUMREC_WIDTH 8 +#define BRBIDR0_EL1_NUMREC_8 UL(0b0001000) +#define BRBIDR0_EL1_NUMREC_16 UL(0b0010000) +#define BRBIDR0_EL1_NUMREC_32 UL(0b0100000) +#define BRBIDR0_EL1_NUMREC_64 UL(0b1000000) + +#define BRBIDR0_EL1_RES0 (UL(0) | GENMASK_ULL(63, 16)) +#define BRBIDR0_EL1_RES1 (UL(0)) +#define BRBIDR0_EL1_UNKN (UL(0)) +/*** End of Branch Record Buffer Extension ***/ + #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) #define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) @@ -737,6 +1230,7 @@ #define ID_AA64MMFR3_EL1_TCRX_SHIFT 0 /* id_aa64dfr0 */ +#define ID_AA64DFR0_EL1_BRBE_SHIFT 52 #define ID_AA64DFR0_PMSVER_SHIFT 32 #define ID_AA64DFR0_CTX_CMPS_SHIFT 28 #define ID_AA64DFR0_WRPS_SHIFT 20 @@ -745,6 +1239,9 @@ #define ID_AA64DFR0_TRACEVER_SHIFT 4 #define ID_AA64DFR0_DEBUGVER_SHIFT 0 +#define ID_AA64DFR0_EL1_BRBE_NI 0x0 +#define ID_AA64DFR0_EL1_BRBE_IMP 0x1 +#define ID_AA64DFR0_EL1_BRBE_BRBE_V1P1 0x2 #define ID_AA64DFR0_PMSVER_8_2 0x1 #define ID_AA64DFR0_PMSVER_8_3 0x2 -- Gitee From 194759c0c662f6dfd3d03ae53cb1dae271f72ebc Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 8 Nov 2023 10:38:38 +0800 Subject: [PATCH 010/173] drivers: perf: arm_pmu: Add new sched_task() callback This adds armpmu_sched_task(), as generic pmu's sched_task() override which in turn can utilize a new arm_pmu.sched_task() callback when available from the arm_pmu instance. This new callback will be used while enabling BRBE in ARMV8 PMUV3. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Signed-off-by: huwentao --- drivers/perf/arm_pmu.c | 9 +++++++++ include/linux/perf/arm_pmu.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 4a9fc2268f19..18e28088db4e 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -518,6 +518,14 @@ static int armpmu_event_init(struct perf_event *event) return __hw_perf_event_init(event); } +static void armpmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(ctx->pmu); + + if (armpmu->sched_task) + armpmu->sched_task(ctx, sched_in); +} + static void armpmu_enable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); @@ -911,6 +919,7 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags) } pmu->pmu = (struct pmu) { + .sched_task = armpmu_sched_task, .pmu_enable = armpmu_enable, .pmu_disable = armpmu_disable, .event_init = armpmu_event_init, diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 8281ac531c42..fcaf1c99fab7 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -95,6 +95,7 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + void (*sched_task)(struct perf_event_context *ctx, bool sched_in); int (*filter_match)(struct perf_event *event); int num_events; bool secure_access; /* 32-bit ARM only */ -- Gitee From e9354046201a22f1d2f275c1b10f80ebdf5724c4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Sep 2022 06:09:54 -0700 Subject: [PATCH 011/173] perf: Add sample_flags to indicate the PMU-filled sample data [Upstream commit 3aac580d5cc3001ca1627725b3b61edb529f341d] On some platforms, some data e.g., timestamps, can be retrieved from the PMU driver. Usually, the data from the PMU driver is more accurate. The current perf kernel should output the PMU-filled sample data if it's available. To check the availability of the PMU-filled sample data, the current perf kernel initializes the related fields in the perf_sample_data_init(). When outputting a sample, the perf checks whether the field is updated by the PMU driver. If yes, the updated value will be output. If not, the perf uses an SW way to calculate the value or just outputs the initialized value if an SW way is unavailable either. With more and more data being provided by the PMU driver, more fields has to be initialized in the perf_sample_data_init(). That will increase the number of cache lines touched in perf_sample_data_init() and be harmful to the performance. Add new "sample_flags" to indicate the PMU-filled sample data. The PMU driver should set the corresponding PERF_SAMPLE_ flag when the field is updated. The initialization of the corresponding field is not required anymore. The following patches will make use of it and remove the corresponding fields from the perf_sample_data_init(), which will further minimize the number of cache lines touched. Only clear the sample flags that have already been done by the PMU driver in the perf_prepare_sample() for the PERF_RECORD_SAMPLE. For the other PERF_RECORD_ event type, the sample data is not available. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220901130959.1285717-2-kan.liang@linux.intel.com Signed-off-by: Xie Haocheng Signed-off-by: huwentao --- include/linux/perf_event.h | 2 ++ kernel/events/core.c | 17 +++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e6173d37803d..6ff1f8f6a04f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -993,6 +993,7 @@ struct perf_sample_data { * Fields set by perf_sample_data_init(), group so as to * minimize the cachelines touched. */ + u64 sample_flags; u64 addr; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; @@ -1048,6 +1049,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr, u64 period) { /* remaining struct members initialized in perf_prepare_sample() */ + data->sample_flags = 0; data->addr = addr; data->raw = NULL; data->br_stack = NULL; diff --git a/kernel/events/core.c b/kernel/events/core.c index a5d71b20f67d..65016339b499 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6396,11 +6396,10 @@ static void perf_aux_sample_output(struct perf_event *event, static void __perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, - struct perf_event *event) + struct perf_event *event, + u64 sample_type) { - u64 sample_type = event->attr.sample_type; - - data->type = sample_type; + data->type = event->attr.sample_type; header->size += event->id_header_size; if (sample_type & PERF_SAMPLE_TID) { @@ -6429,7 +6428,7 @@ void perf_event_header__init_id(struct perf_event_header *header, struct perf_event *event) { if (event->attr.sample_id_all) - __perf_event_header__init_id(header, data, event); + __perf_event_header__init_id(header, data, event, event->attr.sample_type); } static void __perf_event__output_id_sample(struct perf_output_handle *handle, @@ -6895,6 +6894,7 @@ void perf_prepare_sample(struct perf_event_header *header, struct pt_regs *regs) { u64 sample_type = event->attr.sample_type; + u64 filtered_sample_type; header->type = PERF_RECORD_SAMPLE; header->size = sizeof(*header) + event->header_size; @@ -6902,7 +6902,12 @@ void perf_prepare_sample(struct perf_event_header *header, header->misc = 0; header->misc |= perf_misc_flags(regs); - __perf_event_header__init_id(header, data, event); + /* + * Clear the sample flags that have already been done by the + * PMU driver. + */ + filtered_sample_type = sample_type & ~data->sample_flags; + __perf_event_header__init_id(header, data, event, filtered_sample_type); if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE)) data->ip = perf_instruction_pointer(regs); -- Gitee From 37e36588fba03558d846a93a36a0e6159505d791 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 8 Nov 2023 10:38:39 +0800 Subject: [PATCH 012/173] drivers: perf: arm_pmuv3: Enable branch stack sampling framework Branch stack sampling support i.e capturing branch records during execution in core perf, rides along with normal HW events being scheduled on the PMU. This prepares ARMV8 PMU framework for branch stack support on relevant PMUs with required HW implementation. ARMV8 PMU hardware support for branch stack sampling is indicated via a new feature flag called 'has_branch_stack' that can be ascertained via probing. This modifies current gate in armpmu_event_init() which blocks branch stack sampling based perf events unconditionally. Instead allows such perf events getting initialized on supporting PMU hardware. Branch stack sampling is enabled and disabled along with regular PMU events the relevant hardware also needs to be driven in tandem. This adds required function callbacks in armv8pmu_branch_xxx() format, to drive the PMU branch stack hardware when supported. This also adds fallback stub definitions for these callbacks for PMUs which would not have required support. Finally this adds a new buffer i.e 'struct branch_records', which can hold captured branch records during PMU IRQ processing before being passed on to the perf ring buffer. These buffers are per cpu, and dynamically allocated only for supporting ARMV8 PMU. These buffers can hold 'MAX_BRANCH_RECORDS' branch record entries. This enables PERF_ATTACH_TASK_DATA for branch stack sampling perf events to make them hold context branch records in their task_ctx_data. This will get used to stash branch records that would have been lost when a given process schedules out after a short run on the CPU without an event overflow. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: huwentao --- arch/arm64/include/asm/perf_event.h | 43 ++++++++++++++ arch/arm64/kernel/perf_event.c | 91 ++++++++++++++++++++++++++++- drivers/perf/arm_pmu.c | 19 +++++- include/linux/perf/arm_pmu.h | 22 ++++++- 4 files changed, 171 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2bdbc79bbd01..7749926dfe8d 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -208,12 +208,55 @@ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +struct pmu_hw_events; +struct arm_pmu; +struct perf_event; + #ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) #define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs + +static inline void armv8pmu_branch_reset(void) +{ +} + +static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) +{ +} + +static inline bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + return false; +} + +static inline void armv8pmu_branch_enable(struct perf_event *event) +{ +} + +static inline void armv8pmu_branch_disable(struct perf_event *event) +{ +} + +static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ +} + +static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) +{ +} + +static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) +{ + return 0; +} + +static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) +{ +} #endif #define perf_arch_fetch_caller_regs(regs, __ip) { \ diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 37d4f9e6ae5c..c808e346c14b 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -695,10 +695,16 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter */ armv8pmu_enable_event_counter(event); + + if (has_branch_stack(event)) + armv8pmu_branch_enable(event); } static void armv8pmu_disable_event(struct perf_event *event) { + if (has_branch_stack(event)) + armv8pmu_branch_disable(event); + /* * Disable counter */ @@ -772,6 +778,16 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + /* + * PMU IRQ should remain asserted until all branch records + * are captured and processed into struct perf_sample_data. + */ + if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) { + armv8pmu_branch_read(cpuc, event); + data.br_stack = &cpuc->branches->branch_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } + /* * Perf event overflow will queue the processing of the event as * an irq_work which will be taken care of in the handling of @@ -850,6 +866,24 @@ static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, clear_bit(idx - 1, cpuc->used_mask); } +static void armv8pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(ctx->pmu); + void *task_ctx = ctx ? ctx->task_ctx_data : NULL; + + if (armpmu->has_branch_stack) { + /* Save branch records in task_ctx on sched out */ + if (task_ctx && !sched_in) { + armv8pmu_branch_save(armpmu, task_ctx); + return; + } + + /* Reset branch records on sched in */ + if (sched_in) + armv8pmu_branch_reset(); + } +} + /* * Add an event filter to a given event. */ @@ -923,6 +957,9 @@ static void armv8pmu_reset(void *info) */ armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC); + + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_reset(); } static int __armv8_pmuv3_map_event(struct perf_event *event, @@ -940,6 +977,12 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); + if (has_branch_stack(event)) { + event->attach_state |= PERF_ATTACH_TASK_DATA; + if (!armv8pmu_branch_attr_valid(event)) + return -EOPNOTSUPP; + } + if (armv8pmu_event_is_64bit(event)) event->hw.flags |= ARMPMU_EVT_64BIT; @@ -1025,6 +1068,36 @@ static void __armv8pmu_probe_pmu(void *info) bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap, pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); + if (is_kernel_in_hyp_mode()) + armv8pmu_branch_probe(cpu_pmu); +} + +static int branch_records_alloc(struct arm_pmu *armpmu) +{ + struct branch_records __percpu *records; + int cpu; + + records = alloc_percpu_gfp(struct branch_records, GFP_KERNEL); + if (!records) + return -ENOMEM; + + /* + * percpu memory allocated for 'records' gets completely consumed + * here, and never required to be freed up later. So permanently + * losing access to this anchor i.e 'records' is acceptable. + * + * Otherwise this allocation handle would have to be saved up for + * free_percpu() release later if required. + */ + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events_cpu; + struct branch_records *records_cpu; + + events_cpu = per_cpu_ptr(armpmu->hw_events, cpu); + records_cpu = per_cpu_ptr(records, cpu); + events_cpu->branches = records_cpu; + } + return 0; } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1041,7 +1114,21 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) if (ret) return ret; - return probe.present ? 0 : -ENODEV; + if (!probe.present) + return -ENODEV; + + if (cpu_pmu->has_branch_stack) { + ret = armv8pmu_task_ctx_cache_alloc(cpu_pmu); + if (ret) + return ret; + + ret = branch_records_alloc(cpu_pmu); + if (ret) { + armv8pmu_task_ctx_cache_free(cpu_pmu); + return ret; + } + } + return 0; } static int armv8_pmu_init(struct arm_pmu *cpu_pmu) @@ -1062,6 +1149,8 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->reset = armv8pmu_reset; cpu_pmu->set_event_filter = armv8pmu_set_event_filter; cpu_pmu->filter_match = armv8pmu_filter_match; + cpu_pmu->sched_task = armv8pmu_sched_task; + cpu_pmu->branch_reset = armv8pmu_branch_reset; return 0; } diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 18e28088db4e..72d34c8bff05 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -313,6 +313,11 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + WARN_ON_ONCE(!hw_events->brbe_users); + hw_events->brbe_users--; + if (!hw_events->brbe_users) + hw_events->brbe_context = NULL; + armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; armpmu->clear_event_idx(hw_events, event); @@ -329,6 +334,13 @@ armpmu_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx; + if (event->ctx->task && hw_events->brbe_context != event->ctx) { + hw_events->brbe_context = event->ctx; + if (armpmu->branch_reset) + armpmu->branch_reset(); + } + hw_events->brbe_users++; + /* An event following a process won't be stopped earlier */ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) return -ENOENT; @@ -508,8 +520,11 @@ static int armpmu_event_init(struct perf_event *event) !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) return -ENOENT; - /* does not support taken branch sampling */ - if (has_branch_stack(event)) + /* + * Branch stack sampling events are allowed + * only on PMU which has required support. + */ + if (has_branch_stack(event) && !armpmu->has_branch_stack) return -EOPNOTSUPP; if (armpmu->map_event(event) == -ENOENT) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index fcaf1c99fab7..6cf61cef771a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -41,6 +41,18 @@ }, \ } +/* + * Maximum branch record entries which could be processed + * for core perf branch stack sampling support, regardless + * of the hardware support available on a given ARM PMU. + */ +#define MAX_BRANCH_RECORDS 64 + +struct branch_records { + struct perf_branch_stack branch_stack; + struct perf_branch_entry branch_entries[MAX_BRANCH_RECORDS]; +}; + /* The events for a given PMU register set. */ struct pmu_hw_events { /* @@ -67,6 +79,11 @@ struct pmu_hw_events { struct arm_pmu *percpu_pmu; int irq; + + struct branch_records *branches; + void *brbe_context; + unsigned int brbe_users; + unsigned long brbe_sample_type; }; enum armpmu_attr_groups { @@ -96,9 +113,12 @@ struct arm_pmu { void (*reset)(void *); int (*map_event)(struct perf_event *event); void (*sched_task)(struct perf_event_context *ctx, bool sched_in); + void (*branch_reset)(void); int (*filter_match)(struct perf_event *event); int num_events; - bool secure_access; /* 32-bit ARM only */ + unsigned int secure_access:1, /* 32-bit ARM only */ + has_branch_stack:1, /* 64-bit ARM only */ + reserved:30; #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); #define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000 -- Gitee From 64ad3f6643d176266cdd9415db0944c1f6f24165 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 8 Nov 2023 10:38:40 +0800 Subject: [PATCH 013/173] drivers: perf: arm_pmuv3: Enable branch stack sampling via FEAT_BRBE This extends recently added branch stack sampling framework in ARMV8 PMU to enable such events via new architecture feature called Branch Record Buffer Extension aka BRBE. This implements all the armv8pmu_branch_xxx() callbacks as expected at ARMV8 PMU level required to drive perf branch stack sampling events. This adds a new config option CONFIG_ARM64_BRBE to encapsulate this BRBE based implementation, available only on ARM64 platforms. BRBE hardware captures a branch record via three distinct system registers representing branch source address, branch target address, and other branch information. A BRBE buffer implementation is organized as multiple banks of 32 branch records each, which is a collection of BRBSRC_EL1, BRBTGT_EL1 and BRBINF_EL1 registers. Though total BRBE record entries i.e BRBE_MAX_ENTRIES cannot exceed MAX_BRANCH_RECORDS as defined for ARM PMU. BRBE hardware attributes get captured in a new reg_brbidr element in struct arm_pmu during armv8pmu_branch_probe() which is called from broader probing function __armv8pmu_probe_pmu(). Attributes such as number of branch record entries implemented in the hardware can be derived from armpmu->reg_brbidr. BRBE gets enabled via armv8pmu_branch_enable() where it also derives branch filter, and additional requirements from event's 'attr.branch_sample_type' and configures them via BRBFCR_EL1 and BRBCR_EL1 registers. PMU event overflow triggers IRQ, where current branch records get captured, stitched along with older records available in 'task_ctx', before getting processed for core perf ring buffer. Task context switch outs incrementally save current branch records in event's 'pmu_ctx->task_ctx_data' to optimize workload's branch record samples. In case multiple events with different branch sample type requests converge on the same PMU, BRBE gets enabled for branch filters for the last event's branch sample type. No branch records will be captured and processed for an event if BRBE hardware config does not match its branch sample type, while handling the PMU IRQ. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Junhao He Signed-off-by: huwentao --- arch/arm64/include/asm/perf_event.h | 13 + arch/arm64/include/asm/sysreg.h | 8 + arch/arm64/kernel/head.S | 48 ++ drivers/perf/Kconfig | 11 + drivers/perf/Makefile | 1 + drivers/perf/arm_brbe.c | 779 ++++++++++++++++++++++++++++ drivers/perf/arm_brbe.h | 258 +++++++++ drivers/perf/arm_pmu.c | 6 + include/linux/perf/arm_pmu.h | 5 + 9 files changed, 1129 insertions(+) create mode 100644 drivers/perf/arm_brbe.c create mode 100644 drivers/perf/arm_brbe.h diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 7749926dfe8d..a9932ffaaa36 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -219,6 +219,18 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) #define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs +#ifdef CONFIG_ARM64_BRBE +void armv8pmu_branch_reset(void); +void armv8pmu_branch_probe(struct arm_pmu *arm_pmu); +bool armv8pmu_branch_attr_valid(struct perf_event *event); +void armv8pmu_branch_enable(struct perf_event *event); +void armv8pmu_branch_disable(struct perf_event *event); +void armv8pmu_branch_read(struct pmu_hw_events *cpuc, + struct perf_event *event); +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx); +int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu); +void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu); +#else /* !CONFIG_ARM64_BRBE */ static inline void armv8pmu_branch_reset(void) { } @@ -257,6 +269,7 @@ static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) { } +#endif /* CONFIG_ARM64_BRBE */ #endif #define perf_arch_fetch_caller_regs(regs, __ip) { \ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index ba551850daa4..af35eab7b6e2 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -104,6 +104,14 @@ #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) +#define SYS_BRB_IALL sys_insn(1, 1, 7, 2, 4) +#define SYS_BRB_INJ sys_insn(1, 1, 7, 2, 5) + +/* + * BRBE Instructions + */ +#define BRB_IALL_INSN __emit_inst(0xd5000000 | SYS_BRB_IALL | (0x1f)) +#define BRB_INJ_INSN __emit_inst(0xd5000000 | SYS_BRB_INJ | (0x1f)) #define SYS_ALLINT_CLR sys_reg(0, 1, 4, 0, 0) #define SYS_ALLINT_SET sys_reg(0, 1, 4, 1, 0) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4558f6d94dab..dfbbd20afd01 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -475,6 +475,51 @@ ENTRY(kimage_vaddr) .quad _text - TEXT_OFFSET EXPORT_SYMBOL(kimage_vaddr) +#ifdef CONFIG_ARM64_BRBE +/* + * Enable BRBE cycle count + * + * BRBE requires both BRBCR_EL1.CC and BRBCR_EL2.CC fields, be set + * for the cycle counts to be available in BRBINF_EL1.CC during + * branch record processing after a PMU interrupt. This enables CC + * field on both these registers while still executing inside EL2. + * + * BRBE driver would still be able to toggle branch records cycle + * count support via BRBCR_EL1.CC field regardless of whether the + * kernel ends up executing in EL1 or EL2. + */ +.macro __init_el2_brbe + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_cc_\@ + + mrs_s x0, SYS_BRBCR_EL2 + orr x0, x0, BRBCR_ELx_CC + msr_s SYS_BRBCR_EL2, x0 + + /* + * Accessing BRBCR_EL1 register here does not require + * BRBCR_EL12 addressing mode as HCR_EL2.E2H is still + * clear. Regardless, check for HCR_E2H and be on the + * safer side. + */ + mrs x1, hcr_el2 + and x1, x1, #HCR_E2H + cbz x1, .Lset_brbe_el1_direct_\@ + + mrs_s x0, SYS_BRBCR_EL12 + orr x0, x0, BRBCR_ELx_CC + msr_s SYS_BRBCR_EL12, x0 + b .Lskip_brbe_cc_\@ + +.Lset_brbe_el1_direct_\@: + mrs_s x0, SYS_BRBCR_EL1 + orr x0, x0, BRBCR_ELx_CC + msr_s SYS_BRBCR_EL1, x0 +.Lskip_brbe_cc_\@: +.endm + +#endif /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. @@ -603,6 +648,9 @@ set_hcr: 7: msr mdcr_el2, x3 // Configure debug traps +#ifdef CONFIG_ARM64_BRBE + __init_el2_brbe +#endif /* LORegions */ mrs x1, id_aa64mmfr1_el1 ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index c49c3e1042c2..ac01b8887189 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -129,6 +129,17 @@ config ARM_SPE_PMU Extension, which provides periodic sampling of operations in the CPU pipeline and reports this via the perf AUX interface. +config ARM64_BRBE + bool "Enable support for Branch Record Buffer Extension (BRBE)" + depends on PERF_EVENTS && ARM64 && ARM_PMU + default y + help + Enable perf support for Branch Record Buffer Extension (BRBE) which + records all branches taken in an execution path. This supports some + branch types and privilege based filtering. It captures additional + relevant information such as cycle count, misprediction and branch + type, branch privilege level etc. + source "drivers/perf/hisilicon/Kconfig" endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 13db5be35156..2fb4c45b5249 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o +obj-$(CONFIG_ARM64_BRBE) += arm_brbe.o diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c new file mode 100644 index 000000000000..1ffa41e355cf --- /dev/null +++ b/drivers/perf/arm_brbe.c @@ -0,0 +1,779 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Branch Record Buffer Extension Driver. + * + * Copyright (C) 2022-2023 ARM Limited + * + * Author: Anshuman Khandual + */ +#include "arm_brbe.h" + +void armv8pmu_branch_reset(void) +{ + asm volatile(BRB_IALL_INSN); + isb(); +} + +static bool valid_brbe_nr(int brbe_nr) +{ + return brbe_nr == BRBIDR0_EL1_NUMREC_8 || + brbe_nr == BRBIDR0_EL1_NUMREC_16 || + brbe_nr == BRBIDR0_EL1_NUMREC_32 || + brbe_nr == BRBIDR0_EL1_NUMREC_64; +} + +static bool valid_brbe_cc(int brbe_cc) +{ + return brbe_cc == BRBIDR0_EL1_CC_20_BIT; +} + +static bool valid_brbe_format(int brbe_format) +{ + return brbe_format == BRBIDR0_EL1_FORMAT_0; +} + +static bool valid_brbe_version(int brbe_version) +{ + return brbe_version == ID_AA64DFR0_EL1_BRBE_IMP || + brbe_version == ID_AA64DFR0_EL1_BRBE_BRBE_V1P1; +} + +static void select_brbe_bank(int bank) +{ + u64 brbfcr; + + WARN_ON(bank > BRBE_BANK_IDX_1); + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbfcr &= ~BRBFCR_EL1_BANK_MASK; + brbfcr |= SYS_FIELD_PREP(BRBFCR_EL1, BANK, bank); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); +} + +static bool __read_brbe_regset(struct brbe_regset *entry, int idx) +{ + entry->brbinf = get_brbinf_reg(idx); + + if (brbe_invalid(entry->brbinf)) + return false; + + entry->brbsrc = get_brbsrc_reg(idx); + entry->brbtgt = get_brbtgt_reg(idx); + return true; +} + +/* + * Read all BRBE entries in HW until the first invalid entry. + * + * The caller must ensure that the BRBE is not concurrently modifying these + * branch entries. + */ +static int capture_brbe_regset(struct brbe_regset *buf, int nr_hw_entries) +{ + int idx = 0; + + select_brbe_bank(BRBE_BANK_IDX_0); + while (idx < nr_hw_entries && idx <= BRBE_BANK0_IDX_MAX) { + if (!__read_brbe_regset(&buf[idx], idx)) + return idx; + idx++; + } + + select_brbe_bank(BRBE_BANK_IDX_1); + while (idx < nr_hw_entries && idx <= BRBE_BANK1_IDX_MAX) { + if (!__read_brbe_regset(&buf[idx], idx)) + return idx; + idx++; + } + return idx; +} + +/* + * This function concatenates branch records from stored and live buffer + * up to maximum nr_max records and the stored buffer holds the resultant + * buffer. The concatenated buffer contains all the branch records from + * the live buffer but might contain some from stored buffer considering + * the maximum combined length does not exceed 'nr_max'. + * + * Stored records Live records + * ------------------------------------------------^ + * | S0 | L0 | Newest | + * --------------------------------- | + * | S1 | L1 | | + * --------------------------------- | + * | S2 | L2 | | + * --------------------------------- | + * | S3 | L3 | | + * --------------------------------- | + * | S4 | L4 | nr_max + * --------------------------------- | + * | | L5 | | + * --------------------------------- | + * | | L6 | | + * --------------------------------- | + * | | L7 | | + * --------------------------------- | + * | | | | + * --------------------------------- | + * | | | Oldest | + * ------------------------------------------------V + * + * + * S0 is the newest in the stored records, where as L7 is the oldest in + * the live records. Unless the live buffer is detected as being full + * thus potentially dropping off some older records, L7 and S0 records + * are contiguous in time for a user task context. The stitched buffer + * here represents maximum possible branch records, contiguous in time. + * + * Stored records Live records + * ------------------------------------------------^ + * | L0 | L0 | Newest | + * --------------------------------- | + * | L0 | L1 | | + * --------------------------------- | + * | L2 | L2 | | + * --------------------------------- | + * | L3 | L3 | | + * --------------------------------- | + * | L4 | L4 | nr_max + * --------------------------------- | + * | L5 | L5 | | + * --------------------------------- | + * | L6 | L6 | | + * --------------------------------- | + * | L7 | L7 | | + * --------------------------------- | + * | S0 | | | + * --------------------------------- | + * | S1 | | Oldest | + * ------------------------------------------------V + * | S2 | <----| + * ----------------- | + * | S3 | <----| Dropped off after nr_max + * ----------------- | + * | S4 | <----| + * ----------------- + */ +static int stitch_stored_live_entries(struct brbe_regset *stored, + struct brbe_regset *live, + int nr_stored, int nr_live, + int nr_max) +{ + int nr_move = min(nr_stored, nr_max - nr_live); + + /* Move the tail of the buffer to make room for the new entries */ + memmove(&stored[nr_live], &stored[0], nr_move * sizeof(*stored)); + + /* Copy the new entries into the head of the buffer */ + memcpy(&stored[0], &live[0], nr_live * sizeof(*stored)); + + /* Return the number of entries in the stitched buffer */ + return min(nr_live + nr_stored, nr_max); +} + +static int brbe_branch_save(struct brbe_regset *live, int nr_hw_entries) +{ + u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + int nr_live; + + write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + nr_live = capture_brbe_regset(live, nr_hw_entries); + + write_sysreg_s(brbfcr & ~BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + return nr_live; +} + +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) +{ + struct arm64_perf_task_context *task_ctx = ctx; + struct brbe_regset live[BRBE_MAX_ENTRIES]; + int nr_live, nr_store, nr_hw_entries; + + nr_hw_entries = brbe_get_numrec(arm_pmu->reg_brbidr); + nr_live = brbe_branch_save(live, nr_hw_entries); + nr_store = task_ctx->nr_brbe_records; + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store, + nr_live, nr_hw_entries); + task_ctx->nr_brbe_records = nr_store; +} + +/* + * Generic perf branch filters supported on BRBE + * + * New branch filters need to be evaluated whether they could be supported on + * BRBE. This ensures that such branch filters would not just be accepted, to + * fail silently. PERF_SAMPLE_BRANCH_HV is a special case that is selectively + * supported only on platforms where kernel is in hyp mode. + */ +#define BRBE_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX | \ + PERF_SAMPLE_BRANCH_IN_TX | \ + PERF_SAMPLE_BRANCH_NO_TX | \ + PERF_SAMPLE_BRANCH_CALL_STACK) + +#define BRBE_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER | \ + PERF_SAMPLE_BRANCH_KERNEL | \ + PERF_SAMPLE_BRANCH_HV | \ + PERF_SAMPLE_BRANCH_ANY | \ + PERF_SAMPLE_BRANCH_ANY_CALL | \ + PERF_SAMPLE_BRANCH_ANY_RETURN | \ + PERF_SAMPLE_BRANCH_IND_CALL | \ + PERF_SAMPLE_BRANCH_COND | \ + PERF_SAMPLE_BRANCH_IND_JUMP | \ + PERF_SAMPLE_BRANCH_CALL | \ + PERF_SAMPLE_BRANCH_NO_FLAGS | \ + PERF_SAMPLE_BRANCH_NO_CYCLES | \ + PERF_SAMPLE_BRANCH_TYPE_SAVE | \ + PERF_SAMPLE_BRANCH_HW_INDEX | \ + PERF_SAMPLE_BRANCH_PRIV_SAVE) + +#define BRBE_PERF_BRANCH_FILTERS (BRBE_ALLOWED_BRANCH_FILTERS | \ + BRBE_EXCLUDE_BRANCH_FILTERS) + +bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + u64 branch_type = event->attr.branch_sample_type; + + /* + * Ensure both perf branch filter allowed and exclude + * masks are always in sync with the generic perf ABI. + */ + BUILD_BUG_ON(BRBE_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1)); + + if (branch_type & ~BRBE_ALLOWED_BRANCH_FILTERS) { + pr_debug_once("requested branch filter not supported 0x%llx\n", branch_type); + return false; + } + + /* + * If the event does not have at least one of the privilege + * branch filters as in PERF_SAMPLE_BRANCH_PLM_ALL, the core + * perf will adjust its value based on perf event's existing + * privilege level via attr.exclude_[user|kernel|hv]. + * + * As event->attr.branch_sample_type might have been changed + * when the event reaches here, it is not possible to figure + * out whether the event originally had HV privilege request + * or got added via the core perf. Just report this situation + * once and continue ignoring if there are other instances. + */ + if ((branch_type & PERF_SAMPLE_BRANCH_HV) && !is_kernel_in_hyp_mode()) + pr_debug_once("hypervisor privilege filter not supported 0x%llx\n", branch_type); + + return true; +} + +int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) +{ + size_t size = sizeof(struct arm64_perf_task_context); + + arm_pmu->pmu.task_ctx_cache = kmem_cache_create("arm64_brbe_task_ctx", size, 0, 0, NULL); + if (!arm_pmu->pmu.task_ctx_cache) + return -ENOMEM; + return 0; +} + +void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) +{ + kmem_cache_destroy(arm_pmu->pmu.task_ctx_cache); +} + +static int brbe_attributes_probe(struct arm_pmu *armpmu, u32 brbe) +{ + u64 brbidr = read_sysreg_s(SYS_BRBIDR0_EL1); + int brbe_version, brbe_format, brbe_cc, brbe_nr; + + brbe_version = brbe; + brbe_format = brbe_get_format(brbidr); + brbe_cc = brbe_get_cc_bits(brbidr); + brbe_nr = brbe_get_numrec(brbidr); + armpmu->reg_brbidr = brbidr; + + if (!valid_brbe_version(brbe_version) || + !valid_brbe_format(brbe_format) || + !valid_brbe_cc(brbe_cc) || + !valid_brbe_nr(brbe_nr)) + return -EOPNOTSUPP; + return 0; +} + +void armv8pmu_branch_probe(struct arm_pmu *armpmu) +{ + u64 aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1); + u32 brbe; + + /* + * BRBE implementation's branch entries cannot exceed maximum + * branch records supported at the ARM PMU level abstraction. + * Otherwise there is always a possibility of array overflow, + * while processing BRBE branch records. + */ + BUILD_BUG_ON(BRBE_BANK_MAX_ENTRIES > MAX_BRANCH_RECORDS); + + brbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT); + if (!brbe) + return; + + if (brbe_attributes_probe(armpmu, brbe)) + return; + + armpmu->has_branch_stack = 1; +} + +/* + * BRBE supports the following functional branch type filters while + * generating branch records. These branch filters can be enabled, + * either individually or as a group i.e ORing multiple filters + * with each other. + * + * BRBFCR_EL1_CONDDIR - Conditional direct branch + * BRBFCR_EL1_DIRCALL - Direct call + * BRBFCR_EL1_INDCALL - Indirect call + * BRBFCR_EL1_INDIRECT - Indirect branch + * BRBFCR_EL1_DIRECT - Direct branch + * BRBFCR_EL1_RTN - Subroutine return + */ +static u64 branch_type_to_brbfcr(int branch_type) +{ + u64 brbfcr = 0; + + if (branch_type & PERF_SAMPLE_BRANCH_ANY) { + brbfcr |= BRBFCR_EL1_BRANCH_FILTERS; + return brbfcr; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + brbfcr |= BRBFCR_EL1_INDCALL; + brbfcr |= BRBFCR_EL1_DIRCALL; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + brbfcr |= BRBFCR_EL1_RTN; + + if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL) + brbfcr |= BRBFCR_EL1_INDCALL; + + if (branch_type & PERF_SAMPLE_BRANCH_COND) + brbfcr |= BRBFCR_EL1_CONDDIR; + + if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP) + brbfcr |= BRBFCR_EL1_INDIRECT; + + if (branch_type & PERF_SAMPLE_BRANCH_CALL) + brbfcr |= BRBFCR_EL1_DIRCALL; + + return brbfcr; +} + +/* + * BRBE supports the following privilege mode filters while generating + * branch records. + * + * BRBCR_ELx_E0BRE - EL0 branch records + * BRBCR_ELx_ExBRE - EL1/EL2 branch records + * + * BRBE also supports the following additional functional branch type + * filters while generating branch records. + * + * BRBCR_ELx_EXCEPTION - Exception + * BRBCR_ELx_ERTN - Exception return + */ +static u64 branch_type_to_brbcr(int branch_type) +{ + u64 brbcr = BRBCR_ELx_DEFAULT_TS; + + /* + * BRBE should be paused on PMU interrupt while tracing kernel + * space to stop capturing further branch records. Otherwise + * interrupt handler branch records might get into the samples + * which is not desired. + * + * BRBE need not be paused on PMU interrupt while tracing only + * the user space, because it will automatically be inside the + * prohibited region. But even after PMU overflow occurs, the + * interrupt could still take much more cycles, before it can + * be taken and by that time BRBE will have been overwritten. + * Hence enable pause on PMU interrupt mechanism even for user + * only traces as well. + */ + brbcr |= BRBCR_ELx_FZP; + + if (branch_type & PERF_SAMPLE_BRANCH_USER) + brbcr |= BRBCR_ELx_E0BRE; + + /* + * When running in the hyp mode, writing into BRBCR_EL1 + * actually writes into BRBCR_EL2 instead. Field E2BRE + * is also at the same position as E1BRE. + */ + if (branch_type & PERF_SAMPLE_BRANCH_KERNEL) + brbcr |= BRBCR_ELx_ExBRE; + + if (branch_type & PERF_SAMPLE_BRANCH_HV) { + if (is_kernel_in_hyp_mode()) + brbcr |= BRBCR_ELx_ExBRE; + } + + if (!(branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES)) + brbcr |= BRBCR_ELx_CC; + + if (!(branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS)) + brbcr |= BRBCR_ELx_MPRED; + + /* + * The exception and exception return branches could be + * captured, irrespective of the perf event's privilege. + * If the perf event does not have enough privilege for + * a given exception level, then addresses which falls + * under that exception level will be reported as zero + * for the captured branch record, creating source only + * or target only records. + */ + if (branch_type & PERF_SAMPLE_BRANCH_ANY) { + brbcr |= BRBCR_ELx_EXCEPTION; + brbcr |= BRBCR_ELx_ERTN; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) + brbcr |= BRBCR_ELx_EXCEPTION; + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + brbcr |= BRBCR_ELx_ERTN; + + return brbcr & BRBCR_ELx_CONFIG_MASK; +} + +void armv8pmu_branch_enable(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + u64 brbfcr, brbcr; + + if (!cpuc->brbe_users) + return; + + /* + * Skip enabling BRBE again with same filters and configs + */ + if (cpuc->brbe_sample_type == event->attr.branch_sample_type) + return; + + /* + * BRBE gets configured with a new mismatched branch sample + * type request, overriding any previous branch filters. + */ + cpuc->brbe_sample_type = event->attr.branch_sample_type; + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbfcr &= ~BRBFCR_EL1_DEFAULT_CONFIG; + brbfcr |= branch_type_to_brbfcr(cpuc->brbe_sample_type); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); + + brbcr = read_sysreg_s(SYS_BRBCR_EL1); + brbcr &= ~BRBCR_ELx_CONFIG_MASK; + brbcr |= branch_type_to_brbcr(cpuc->brbe_sample_type); + write_sysreg_s(brbcr, SYS_BRBCR_EL1); + isb(); +} + +void armv8pmu_branch_disable(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + u64 brbfcr, brbcr; + + if (cpuc->brbe_users) + return; + + cpuc->brbe_sample_type = 0; + brbcr = read_sysreg_s(SYS_BRBCR_EL1); + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbcr &= ~(BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE); + brbfcr |= BRBFCR_EL1_PAUSED; + write_sysreg_s(brbcr, SYS_BRBCR_EL1); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); +} + +static void brbe_set_perf_entry_type(struct perf_branch_entry *entry, u64 brbinf) +{ + int brbe_type = brbe_get_type(brbinf); + + switch (brbe_type) { + case BRBINFx_EL1_TYPE_UNCOND_DIRECT: + entry->type = PERF_BR_UNCOND; + break; + case BRBINFx_EL1_TYPE_INDIRECT: + entry->type = PERF_BR_IND; + break; + case BRBINFx_EL1_TYPE_DIRECT_LINK: + entry->type = PERF_BR_CALL; + break; + case BRBINFx_EL1_TYPE_INDIRECT_LINK: + entry->type = PERF_BR_IND_CALL; + break; + case BRBINFx_EL1_TYPE_RET: + entry->type = PERF_BR_RET; + break; + case BRBINFx_EL1_TYPE_COND_DIRECT: + entry->type = PERF_BR_COND; + break; + case BRBINFx_EL1_TYPE_CALL: + entry->type = PERF_BR_CALL; + break; + case BRBINFx_EL1_TYPE_TRAP: + entry->type = PERF_BR_SYSCALL; + break; + case BRBINFx_EL1_TYPE_ERET: + entry->type = PERF_BR_ERET; + break; + case BRBINFx_EL1_TYPE_IRQ: + entry->type = PERF_BR_IRQ; + break; + case BRBINFx_EL1_TYPE_DEBUG_HALT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_HALT; + break; + case BRBINFx_EL1_TYPE_SERROR: + entry->type = PERF_BR_SERROR; + break; + case BRBINFx_EL1_TYPE_INSN_DEBUG: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_INST; + break; + case BRBINFx_EL1_TYPE_DATA_DEBUG: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_DATA; + break; + case BRBINFx_EL1_TYPE_ALIGN_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_ALGN; + break; + case BRBINFx_EL1_TYPE_INSN_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_INST; + break; + case BRBINFx_EL1_TYPE_DATA_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_DATA; + break; + case BRBINFx_EL1_TYPE_FIQ: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_FIQ; + break; + case BRBINFx_EL1_TYPE_DEBUG_EXIT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_EXIT; + break; + default: + pr_warn_once("%d - unknown branch type captured\n", brbe_type); + entry->type = PERF_BR_UNKNOWN; + break; + } +} + +static int brbe_get_perf_priv(u64 brbinf) +{ + int brbe_el = brbe_get_el(brbinf); + + switch (brbe_el) { + case BRBINFx_EL1_EL_EL0: + return PERF_BR_PRIV_USER; + case BRBINFx_EL1_EL_EL1: + return PERF_BR_PRIV_KERNEL; + case BRBINFx_EL1_EL_EL2: + if (is_kernel_in_hyp_mode()) + return PERF_BR_PRIV_KERNEL; + return PERF_BR_PRIV_HV; + default: + pr_warn_once("%d - unknown branch privilege captured\n", brbe_el); + return PERF_BR_PRIV_UNKNOWN; + } +} + +static void capture_brbe_flags(struct perf_branch_entry *entry, struct perf_event *event, + u64 brbinf) +{ + if (branch_sample_type(event)) + brbe_set_perf_entry_type(entry, brbinf); + + if (!branch_sample_no_cycles(event)) + entry->cycles = brbe_get_cycles(brbinf); + + if (!branch_sample_no_flags(event)) { + /* + * BRBINFx_EL1.LASTFAILED indicates that a TME transaction failed (or + * was cancelled) prior to this record, and some number of records + * prior to this one, may have been generated during an attempt to + * execute the transaction. + * + * We will remove such entries later in process_branch_aborts(). + */ + entry->abort = brbe_get_lastfailed(brbinf); + + /* + * All these information (i.e transaction state and mispredicts) + * are available for source only and complete branch records. + */ + if (brbe_record_is_complete(brbinf) || + brbe_record_is_source_only(brbinf)) { + entry->mispred = brbe_get_mispredict(brbinf); + entry->predicted = !entry->mispred; + entry->in_tx = brbe_get_in_tx(brbinf); + } + } + + if (branch_sample_priv(event)) { + /* + * All these information (i.e branch privilege level) are + * available for target only and complete branch records. + */ + if (brbe_record_is_complete(brbinf) || + brbe_record_is_target_only(brbinf)) + entry->priv = brbe_get_perf_priv(brbinf); + } +} + +/* + * A branch record with BRBINFx_EL1.LASTFAILED set, implies that all + * preceding consecutive branch records, that were in a transaction + * (i.e their BRBINFx_EL1.TX set) have been aborted. + * + * Similarly BRBFCR_EL1.LASTFAILED set, indicate that all preceding + * consecutive branch records up to the last record, which were in a + * transaction (i.e their BRBINFx_EL1.TX set) have been aborted. + * + * --------------------------------- ------------------- + * | 00 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX success] + * --------------------------------- ------------------- + * | 01 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX success] + * --------------------------------- ------------------- + * | 02 | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 0 | + * --------------------------------- ------------------- + * | 03 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed] + * --------------------------------- ------------------- + * | 04 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed] + * --------------------------------- ------------------- + * | 05 | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 1 | + * --------------------------------- ------------------- + * | .. | BRBSRC | BRBTGT | BRBINF | | TX = 0 | LF = 0 | + * --------------------------------- ------------------- + * | 61 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed] + * --------------------------------- ------------------- + * | 62 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed] + * --------------------------------- ------------------- + * | 63 | BRBSRC | BRBTGT | BRBINF | | TX = 1 | LF = 0 | [TX failed] + * --------------------------------- ------------------- + * + * BRBFCR_EL1.LASTFAILED == 1 + * + * BRBFCR_EL1.LASTFAILED fails all those consecutive, in transaction + * branches records near the end of the BRBE buffer. + * + * Architecture does not guarantee a non transaction (TX = 0) branch + * record between two different transactions. So it is possible that + * a subsequent lastfailed record (TX = 0, LF = 1) might erroneously + * mark more than required transactions as aborted. + */ +static void process_branch_aborts(struct pmu_hw_events *cpuc) +{ + u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + bool lastfailed = !!(brbfcr & BRBFCR_EL1_LASTFAILED); + int idx = brbe_get_numrec(cpuc->percpu_pmu->reg_brbidr) - 1; + struct perf_branch_entry *entry; + + do { + entry = &cpuc->branches->branch_entries[idx]; + if (entry->in_tx) { + entry->abort = lastfailed; + } else { + lastfailed = entry->abort; + entry->abort = false; + } + } while (idx--, idx >= 0); +} + +static void brbe_regset_branch_entries(struct pmu_hw_events *cpuc, struct perf_event *event, + struct brbe_regset *regset, int idx) +{ + struct perf_branch_entry *entry = &cpuc->branches->branch_entries[idx]; + u64 brbinf = regset[idx].brbinf; + + perf_clear_branch_entry_bitfields(entry); + if (brbe_record_is_complete(brbinf)) { + entry->from = regset[idx].brbsrc; + entry->to = regset[idx].brbtgt; + } else if (brbe_record_is_source_only(brbinf)) { + entry->from = regset[idx].brbsrc; + entry->to = 0; + } else if (brbe_record_is_target_only(brbinf)) { + entry->from = 0; + entry->to = regset[idx].brbtgt; + } + capture_brbe_flags(entry, event, brbinf); +} + +static void process_branch_entries(struct pmu_hw_events *cpuc, struct perf_event *event, + struct brbe_regset *regset, int nr_regset) +{ + int idx; + + for (idx = 0; idx < nr_regset; idx++) + brbe_regset_branch_entries(cpuc, event, regset, idx); + + cpuc->branches->branch_stack.nr = nr_regset; + cpuc->branches->branch_stack.hw_idx = -1ULL; +} + +void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event) +{ + struct arm64_perf_task_context *task_ctx = event->ctx->task_ctx_data; + struct brbe_regset live[BRBE_MAX_ENTRIES]; + int nr_live, nr_store, nr_hw_entries; + u64 brbfcr, brbcr; + + brbcr = read_sysreg_s(SYS_BRBCR_EL1); + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + + /* Ensure pause on PMU interrupt is enabled */ + WARN_ON_ONCE(!(brbcr & BRBCR_ELx_FZP)); + + /* Pause the buffer */ + write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + /* + * Overflown event's branch_sample_type does not match the configured + * branch filters in the BRBE HW. So the captured branch records here + * cannot be co-related to the overflown event. Report to the user as + * if no branch records have been captured, and flush branch records. + */ + if ((cpuc->brbe_sample_type != event->attr.branch_sample_type) || + (event->ctx->task && cpuc->brbe_context != event->ctx)) { + cpuc->branches->branch_stack.nr = 0; + cpuc->branches->branch_stack.hw_idx = -1ULL; + goto unpause_reset; + } + + nr_hw_entries = brbe_get_numrec(cpuc->percpu_pmu->reg_brbidr); + nr_live = capture_brbe_regset(live, nr_hw_entries); + if (event->ctx->task) { + nr_store = task_ctx->nr_brbe_records; + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store, + nr_live, nr_hw_entries); + process_branch_entries(cpuc, event, task_ctx->store, nr_store); + task_ctx->nr_brbe_records = 0; + } else { + process_branch_entries(cpuc, event, live, nr_live); + } + process_branch_aborts(cpuc); + +unpause_reset: + /* Unpause the buffer */ + write_sysreg_s(brbfcr & ~BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + armv8pmu_branch_reset(); +} diff --git a/drivers/perf/arm_brbe.h b/drivers/perf/arm_brbe.h new file mode 100644 index 000000000000..5ba6cf1b0099 --- /dev/null +++ b/drivers/perf/arm_brbe.h @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Branch Record Buffer Extension Helpers. + * + * Copyright (C) 2022-2023 ARM Limited + * + * Author: Anshuman Khandual + */ +#define pr_fmt(fmt) "brbe: " fmt + +#include + +#define BRBFCR_EL1_BRANCH_FILTERS (BRBFCR_EL1_DIRECT | \ + BRBFCR_EL1_INDIRECT | \ + BRBFCR_EL1_RTN | \ + BRBFCR_EL1_INDCALL | \ + BRBFCR_EL1_DIRCALL | \ + BRBFCR_EL1_CONDDIR) + +#define BRBFCR_EL1_DEFAULT_CONFIG (BRBFCR_EL1_BANK_MASK | \ + BRBFCR_EL1_PAUSED | \ + BRBFCR_EL1_EnI | \ + BRBFCR_EL1_BRANCH_FILTERS) + +/* + * BRBTS_EL1 is currently not used for branch stack implementation + * purpose but BRBCR_ELx.TS needs to have a valid value from all + * available options. BRBCR_ELx_TS_VIRTUAL is selected for this. + */ +#define BRBCR_ELx_DEFAULT_TS FIELD_PREP(BRBCR_ELx_TS_MASK, BRBCR_ELx_TS_VIRTUAL) + +#define BRBCR_ELx_CONFIG_MASK (BRBCR_ELx_EXCEPTION | \ + BRBCR_ELx_ERTN | \ + BRBCR_ELx_CC | \ + BRBCR_ELx_MPRED | \ + BRBCR_ELx_ExBRE | \ + BRBCR_ELx_E0BRE | \ + BRBCR_ELx_FZP | \ + BRBCR_ELx_TS_MASK) +/* + * BRBE Buffer Organization + * + * BRBE buffer is arranged as multiple banks of 32 branch record + * entries each. An individual branch record in a given bank could + * be accessed, after selecting the bank in BRBFCR_EL1.BANK and + * accessing the registers i.e [BRBSRC, BRBTGT, BRBINF] set with + * indices [0..31]. + * + * Bank 0 + * + * --------------------------------- ------ + * | 00 | BRBSRC | BRBTGT | BRBINF | | 00 | + * --------------------------------- ------ + * | 01 | BRBSRC | BRBTGT | BRBINF | | 01 | + * --------------------------------- ------ + * | .. | BRBSRC | BRBTGT | BRBINF | | .. | + * --------------------------------- ------ + * | 31 | BRBSRC | BRBTGT | BRBINF | | 31 | + * --------------------------------- ------ + * + * Bank 1 + * + * --------------------------------- ------ + * | 32 | BRBSRC | BRBTGT | BRBINF | | 00 | + * --------------------------------- ------ + * | 33 | BRBSRC | BRBTGT | BRBINF | | 01 | + * --------------------------------- ------ + * | .. | BRBSRC | BRBTGT | BRBINF | | .. | + * --------------------------------- ------ + * | 63 | BRBSRC | BRBTGT | BRBINF | | 31 | + * --------------------------------- ------ + */ +#define BRBE_BANK_MAX_ENTRIES 32 +#define BRBE_MAX_BANK 2 +#define BRBE_MAX_ENTRIES (BRBE_BANK_MAX_ENTRIES * BRBE_MAX_BANK) + +#define BRBE_BANK0_IDX_MIN 0 +#define BRBE_BANK0_IDX_MAX 31 +#define BRBE_BANK1_IDX_MIN 32 +#define BRBE_BANK1_IDX_MAX 63 + +struct brbe_regset { + unsigned long brbsrc; + unsigned long brbtgt; + unsigned long brbinf; +}; + +struct arm64_perf_task_context { + struct brbe_regset store[BRBE_MAX_ENTRIES]; + int nr_brbe_records; +}; + +struct brbe_hw_attr { + int brbe_version; + int brbe_cc; + int brbe_nr; + int brbe_format; +}; + +enum brbe_bank_idx { + BRBE_BANK_IDX_INVALID = -1, + BRBE_BANK_IDX_0, + BRBE_BANK_IDX_1, + BRBE_BANK_IDX_MAX +}; + +#define RETURN_READ_BRBSRCN(n) \ + read_sysreg_s(SYS_BRBSRC##n##_EL1) + +#define RETURN_READ_BRBTGTN(n) \ + read_sysreg_s(SYS_BRBTGT##n##_EL1) + +#define RETURN_READ_BRBINFN(n) \ + read_sysreg_s(SYS_BRBINF##n##_EL1) + +#define BRBE_REGN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + case 0: return case_macro(0); break; \ + case 1: return case_macro(1); break; \ + case 2: return case_macro(2); break; \ + case 3: return case_macro(3); break; \ + case 4: return case_macro(4); break; \ + case 5: return case_macro(5); break; \ + case 6: return case_macro(6); break; \ + case 7: return case_macro(7); break; \ + case 8: return case_macro(8); break; \ + case 9: return case_macro(9); break; \ + case 10: return case_macro(10); break; \ + case 11: return case_macro(11); break; \ + case 12: return case_macro(12); break; \ + case 13: return case_macro(13); break; \ + case 14: return case_macro(14); break; \ + case 15: return case_macro(15); break; \ + case 16: return case_macro(16); break; \ + case 17: return case_macro(17); break; \ + case 18: return case_macro(18); break; \ + case 19: return case_macro(19); break; \ + case 20: return case_macro(20); break; \ + case 21: return case_macro(21); break; \ + case 22: return case_macro(22); break; \ + case 23: return case_macro(23); break; \ + case 24: return case_macro(24); break; \ + case 25: return case_macro(25); break; \ + case 26: return case_macro(26); break; \ + case 27: return case_macro(27); break; \ + case 28: return case_macro(28); break; \ + case 29: return case_macro(29); break; \ + case 30: return case_macro(30); break; \ + case 31: return case_macro(31); break; \ + default: \ + pr_warn("unknown register index\n"); \ + return -1; \ + } \ + } while (0) + +static inline int buffer_to_brbe_idx(int buffer_idx) +{ + return buffer_idx % BRBE_BANK_MAX_ENTRIES; +} + +static inline u64 get_brbsrc_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBSRCN); +} + +static inline u64 get_brbtgt_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBTGTN); +} + +static inline u64 get_brbinf_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBINFN); +} + +static inline u64 brbe_record_valid(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_VALID_MASK, brbinf); +} + +static inline bool brbe_invalid(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_NONE; +} + +static inline bool brbe_record_is_complete(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_FULL; +} + +static inline bool brbe_record_is_source_only(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_SOURCE; +} + +static inline bool brbe_record_is_target_only(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_TARGET; +} + +static inline int brbe_get_in_tx(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_T_MASK, brbinf); +} + +static inline int brbe_get_mispredict(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_MPRED_MASK, brbinf); +} + +static inline int brbe_get_lastfailed(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_LASTFAILED_MASK, brbinf); +} + +static inline int brbe_get_cycles(u64 brbinf) +{ + /* + * Captured cycle count is unknown and hence + * should not be passed on to the user space. + */ + if (brbinf & BRBINFx_EL1_CCU) + return 0; + + return FIELD_GET(BRBINFx_EL1_CC_MASK, brbinf); +} + +static inline int brbe_get_type(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_TYPE_MASK, brbinf); +} + +static inline int brbe_get_el(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_EL_MASK, brbinf); +} + +static inline int brbe_get_numrec(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_NUMREC_MASK, brbidr); +} + +static inline int brbe_get_format(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_FORMAT_MASK, brbidr); +} + +static inline int brbe_get_cc_bits(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_CC_MASK, brbidr); +} diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 72d34c8bff05..a1efbff49346 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -334,6 +334,12 @@ armpmu_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx; + /* + * Reset branch records buffer if a new task event gets + * scheduled on a PMU which might have existing records. + * Otherwise older branch records present in the buffer + * might leak into the new task event. + */ if (event->ctx->task && hw_events->brbe_context != event->ctx) { hw_events->brbe_context = event->ctx; if (armpmu->branch_reset) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 6cf61cef771a..84460cc91ad7 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -130,6 +130,11 @@ struct arm_pmu { /* the attr_groups array must be NULL-terminated */ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; +#ifdef CONFIG_ARM64_BRBE + /* store the BRBIDR0_EL1 capturing attributes */ + u64 reg_brbidr; +#endif + /* Only to be used by ACPI probing code */ unsigned long acpi_cpuid; }; -- Gitee From 679ae526a8364b0740c5fd41e2d44dd72de761d4 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Wed, 29 Nov 2023 11:54:35 +0800 Subject: [PATCH 014/173] drivers: perf: Add feature flag check in armpmu_add/del Function armpmu_add will call armv8pmu_branch_reset() to reset BRBE. The BRBE feature depend on ARMV8 PMU hardware support for branch stack sampling. On older platforms the kernel panics. Internal error: Oops - Undefined instruction: 0000000002000000 [#1] SMP Call trace: armv8pmu_branch_reset+0xc/0x20 event_sched_in+0xc8/0x1a0 merge_sched_in+0x16c/0x41c Fixes: 2690e65ac9f9 ("drivers: perf: arm_pmuv3: Enable branch stack sampling via FEAT_BRBE") Signed-off-by: Junhao He Signed-off-by: huwentao --- drivers/perf/arm_pmu.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index a1efbff49346..1d9ef5065db3 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -313,10 +313,12 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - WARN_ON_ONCE(!hw_events->brbe_users); - hw_events->brbe_users--; - if (!hw_events->brbe_users) - hw_events->brbe_context = NULL; + if (has_branch_stack(event)) { + WARN_ON_ONCE(!hw_events->brbe_users); + hw_events->brbe_users--; + if (!hw_events->brbe_users) + hw_events->brbe_context = NULL; + } armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; @@ -334,18 +336,20 @@ armpmu_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx; - /* - * Reset branch records buffer if a new task event gets - * scheduled on a PMU which might have existing records. - * Otherwise older branch records present in the buffer - * might leak into the new task event. - */ - if (event->ctx->task && hw_events->brbe_context != event->ctx) { - hw_events->brbe_context = event->ctx; - if (armpmu->branch_reset) - armpmu->branch_reset(); + if (has_branch_stack(event)) { + /* + * Reset branch records buffer if a new task event gets + * scheduled on a PMU which might have existing records. + * Otherwise older branch records present in the buffer + * might leak into the new task event. + */ + if (event->ctx->task && hw_events->brbe_context != event->ctx) { + hw_events->brbe_context = event->ctx; + if (armpmu->branch_reset) + armpmu->branch_reset(); + } + hw_events->brbe_users++; } - hw_events->brbe_users++; /* An event following a process won't be stopped earlier */ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) -- Gitee From 28615aff1ebf163eb7aaed98b4a847b6271a293c Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 12 Mar 2025 09:38:58 +0800 Subject: [PATCH 015/173] perf: Configure BRBE correctly on VHE host The BRBE support backported of v18 didn't configure the BRBCR_EL1 correctly which will miss some key functions like FZP during sampling. In VHE mode with MDCR_EL2.HPMN set to PMCR_EL0.N, the counters are controlled by BRBCR_EL1 rather than BRBCR_EL2 (which writes to BRBCR_EL1 are redirected to). Use the same value for both register except keep EL1 and EL0 recording disabled in guests. The fix is backported from the v19 version [1]. [1] https://lore.kernel.org/linux-arm-kernel/ 20250202-arm-brbe-v19-v19-0-1c1300802385@kernel.org/ T/#m887b1dcf7f6784b7293626952052f27b68784beb Fixes: 6c6848e7e00c ("drivers: perf: arm_pmuv3: Enable branch stack sampling via FEAT_BRBE") Signed-off-by: Yicong Yang Signed-off-by: Qizhi Zhang Signed-off-by: huwentao --- drivers/perf/arm_brbe.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c index 1ffa41e355cf..9c9cd7d44e71 100644 --- a/drivers/perf/arm_brbe.c +++ b/drivers/perf/arm_brbe.c @@ -476,6 +476,16 @@ void armv8pmu_branch_enable(struct perf_event *event) brbcr &= ~BRBCR_ELx_CONFIG_MASK; brbcr |= branch_type_to_brbcr(cpuc->brbe_sample_type); write_sysreg_s(brbcr, SYS_BRBCR_EL1); + + /* + * In VHE mode with MDCR_EL2.HPMN set to PMCR_EL0.N, the counters are + * controlled by BRBCR_EL1 rather than BRBCR_EL2 (which writes to + * BRBCR_EL1 are redirected to). Use the same value for both register + * except keep EL1 and EL0 recording disabled in guests. + */ + if (is_kernel_in_hyp_mode()) + write_sysreg_s(brbcr & ~(BRBCR_ELx_ExBRE | BRBCR_ELx_E0BRE), SYS_BRBCR_EL12); + isb(); } @@ -494,6 +504,11 @@ void armv8pmu_branch_disable(struct perf_event *event) brbcr &= ~(BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE); brbfcr |= BRBFCR_EL1_PAUSED; write_sysreg_s(brbcr, SYS_BRBCR_EL1); + + /* See the comment in armv8pmu_branch_enable() */ + if (is_kernel_in_hyp_mode()) + write_sysreg_s(brbcr, SYS_BRBCR_EL12); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); isb(); } -- Gitee From 778ab6a1966a46776c5a3a124abe7ebfbf5bcc45 Mon Sep 17 00:00:00 2001 From: Huisong Li Date: Fri, 1 Dec 2023 11:45:30 +0800 Subject: [PATCH 016/173] soc: hisilicon: kunpeng_hccs: Fix some incorrect format strings [Upstream commit 734add1a278f05db2d5a0b9d280a8bce94ce5eeb] Fix some incorrect format strings. Signed-off-by: Huisong Li Reviewed-by: Jonathan Cameron Signed-off-by: Wei Xu Signed-off-by: huwentao --- drivers/soc/hisilicon/kunpeng_hccs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/soc/hisilicon/kunpeng_hccs.c b/drivers/soc/hisilicon/kunpeng_hccs.c index 3f2d4aa7b399..236aed2a9406 100644 --- a/drivers/soc/hisilicon/kunpeng_hccs.c +++ b/drivers/soc/hisilicon/kunpeng_hccs.c @@ -174,7 +174,7 @@ static int hccs_register_pcc_channel(struct hccs_dev *hdev) cl_info->pcc_comm_addr = ioremap(pcc_chan->shmem_base_addr, pcc_chan->shmem_size); if (!cl_info->pcc_comm_addr) { - dev_err(dev, "Failed to ioremap PCC communication region for channel-%d.\n", + dev_err(dev, "Failed to ioremap PCC communication region for channel-%u.\n", hdev->chan_id); rc = -ENOMEM; goto err_mbx_channel_free; @@ -1161,7 +1161,7 @@ static int hccs_create_hccs_dir(struct hccs_dev *hdev, int ret; ret = kobject_init_and_add(&port->kobj, &hccs_port_type, - &die->kobj, "hccs%d", port->port_id); + &die->kobj, "hccs%u", port->port_id); if (ret) { kobject_put(&port->kobj); return ret; @@ -1179,7 +1179,7 @@ static int hccs_create_die_dir(struct hccs_dev *hdev, u16 i; ret = kobject_init_and_add(&die->kobj, &hccs_die_type, - &chip->kobj, "die%d", die->die_id); + &chip->kobj, "die%u", die->die_id); if (ret) { kobject_put(&die->kobj); return ret; @@ -1189,7 +1189,7 @@ static int hccs_create_die_dir(struct hccs_dev *hdev, port = &die->ports[i]; ret = hccs_create_hccs_dir(hdev, die, port); if (ret) { - dev_err(hdev->dev, "create hccs%d dir failed.\n", + dev_err(hdev->dev, "create hccs%u dir failed.\n", port->port_id); goto err; } @@ -1211,7 +1211,7 @@ static int hccs_create_chip_dir(struct hccs_dev *hdev, u16 id; ret = kobject_init_and_add(&chip->kobj, &hccs_chip_type, - &hdev->dev->kobj, "chip%d", chip->chip_id); + &hdev->dev->kobj, "chip%u", chip->chip_id); if (ret) { kobject_put(&chip->kobj); return ret; @@ -1242,7 +1242,7 @@ static int hccs_create_topo_dirs(struct hccs_dev *hdev) chip = &hdev->chips[id]; ret = hccs_create_chip_dir(hdev, chip); if (ret) { - dev_err(hdev->dev, "init chip%d dir failed!\n", id); + dev_err(hdev->dev, "init chip%u dir failed!\n", id); goto err; } } -- Gitee From 97611f1c460db5dc2dc6d7665e32de6cd6c0b090 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 30 Jul 2021 15:44:08 +0800 Subject: [PATCH 017/173] drivers/perf: hisi: Add support for HiSilicon SLLC PMU driver [Upstream commit 3bf30882c3c7b6e376d9d6d04082c9aa2d2ac30a] HiSilicon's Hip09 is comprised by multi-dies that can be connected by SLLC module (Skyros Link Layer Controller), its has separate PMU registers which the driver can program it freely and interrupt is supported to handle counter overflow. Let's support its driver under the framework of HiSilicon uncore PMU driver. SLLC PMU supports the following filter functions: * tracetag_en: allows user to count data according to tt_req or tt_core set in L3C PMU. * srcid_cmd & srcid_msk: allows user to filter statistics that come from specific CCL/ICL by configuration source ID. * tgtid_hi & tgtid_lo: it also supports event statistics that these operations will go to the CCL/ICL by configuration target ID or target ID range. It's the same as source ID with 11-bit width in the SoC. More introduction is added in documentation: Documentation/admin-guide/perf/hisi-pmu.rst Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-8-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon Reviewed-by: Shaokun Zhang Signed-off-by: huwentao --- drivers/perf/hisilicon/Makefile | 3 +- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 530 ++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 3 files changed, 533 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 63c558fcffb1..b2b24a2a58c2 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ - hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o + hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o + diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c new file mode 100644 index 000000000000..46be312fa126 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -0,0 +1,530 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SLLC uncore Hardware event counters support + * + * Copyright (C) 2020 Hisilicon Limited + * Author: Shaokun Zhang + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* SLLC register definition */ +#define SLLC_INT_MASK 0x0814 +#define SLLC_INT_STATUS 0x0818 +#define SLLC_INT_CLEAR 0x081c +#define SLLC_PERF_CTRL 0x1c00 +#define SLLC_SRCID_CTRL 0x1c04 +#define SLLC_TGTID_CTRL 0x1c08 +#define SLLC_EVENT_CTRL 0x1c14 +#define SLLC_EVENT_TYPE0 0x1c18 +#define SLLC_VERSION 0x1cf0 +#define SLLC_EVENT_CNT0_L 0x1d00 + +#define SLLC_EVTYPE_MASK 0xff +#define SLLC_PERF_CTRL_EN BIT(0) +#define SLLC_FILT_EN BIT(1) +#define SLLC_TRACETAG_EN BIT(2) +#define SLLC_SRCID_EN BIT(4) +#define SLLC_SRCID_NONE 0x0 +#define SLLC_TGTID_EN BIT(5) +#define SLLC_TGTID_NONE 0x0 +#define SLLC_TGTID_MIN_SHIFT 1 +#define SLLC_TGTID_MAX_SHIFT 12 +#define SLLC_SRCID_CMD_SHIFT 1 +#define SLLC_SRCID_MSK_SHIFT 12 +#define SLLC_NR_EVENTS 0x80 + +HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_max, config1, 21, 11); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); + +static bool tgtid_is_valid(u32 max, u32 min) +{ + return max > 0 && max >= min; +} + +static void hisi_sllc_pmu_enable_tracetag(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tracetag_en(event); + + if (tt_en) { + u32 val; + + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val |= SLLC_TRACETAG_EN | SLLC_FILT_EN; + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_disable_tracetag(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tracetag_en(event); + + if (tt_en) { + u32 val; + + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val &= ~(SLLC_TRACETAG_EN | SLLC_FILT_EN); + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_config_tgtid(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 min = hisi_get_tgtid_min(event); + u32 max = hisi_get_tgtid_max(event); + + if (tgtid_is_valid(max, min)) { + u32 val = (max << SLLC_TGTID_MAX_SHIFT) | (min << SLLC_TGTID_MIN_SHIFT); + + writel(val, sllc_pmu->base + SLLC_TGTID_CTRL); + /* Enable the tgtid */ + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val |= SLLC_TGTID_EN | SLLC_FILT_EN; + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_clear_tgtid(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 min = hisi_get_tgtid_min(event); + u32 max = hisi_get_tgtid_max(event); + + if (tgtid_is_valid(max, min)) { + u32 val; + + writel(SLLC_TGTID_NONE, sllc_pmu->base + SLLC_TGTID_CTRL); + /* Disable the tgtid */ + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val &= ~(SLLC_TGTID_EN | SLLC_FILT_EN); + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_config_srcid(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_srcid_cmd(event); + + if (cmd) { + u32 val, msk; + + msk = hisi_get_srcid_msk(event); + val = (cmd << SLLC_SRCID_CMD_SHIFT) | (msk << SLLC_SRCID_MSK_SHIFT); + writel(val, sllc_pmu->base + SLLC_SRCID_CTRL); + /* Enable the srcid */ + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val |= SLLC_SRCID_EN | SLLC_FILT_EN; + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_clear_srcid(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_srcid_cmd(event); + + if (cmd) { + u32 val; + + writel(SLLC_SRCID_NONE, sllc_pmu->base + SLLC_SRCID_CTRL); + /* Disable the srcid */ + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val &= ~(SLLC_SRCID_EN | SLLC_FILT_EN); + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_sllc_pmu_enable_tracetag(event); + hisi_sllc_pmu_config_srcid(event); + hisi_sllc_pmu_config_tgtid(event); + } +} + +static void hisi_sllc_pmu_clear_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_sllc_pmu_disable_tracetag(event); + hisi_sllc_pmu_clear_srcid(event); + hisi_sllc_pmu_clear_tgtid(event); + } +} + +static u32 hisi_sllc_pmu_get_counter_offset(int idx) +{ + return (SLLC_EVENT_CNT0_L + idx * 8); +} + +static u64 hisi_sllc_pmu_read_counter(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc) +{ + return readq(sllc_pmu->base + + hisi_sllc_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_sllc_pmu_write_counter(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, sllc_pmu->base + + hisi_sllc_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx, + u32 type) +{ + u32 reg, reg_idx, shift, val; + + /* + * Select the appropriate event select register(SLLC_EVENT_TYPE0/1). + * There are 2 event select registers for the 8 hardware counters. + * Event code is 8-bits and for the former 4 hardware counters, + * SLLC_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, + * SLLC_EVENT_TYPE1 is chosen. + */ + reg = SLLC_EVENT_TYPE0 + (idx / 4) * 4; + reg_idx = idx % 4; + shift = 8 * reg_idx; + + /* Write event code to SLLC_EVENT_TYPEx Register */ + val = readl(sllc_pmu->base + reg); + val &= ~(SLLC_EVTYPE_MASK << shift); + val |= (type << shift); + writel(val, sllc_pmu->base + reg); +} + +static void hisi_sllc_pmu_start_counters(struct hisi_pmu *sllc_pmu) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val |= SLLC_PERF_CTRL_EN; + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); +} + +static void hisi_sllc_pmu_stop_counters(struct hisi_pmu *sllc_pmu) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val &= ~(SLLC_PERF_CTRL_EN); + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); +} + +static void hisi_sllc_pmu_enable_counter(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); + val |= 1 << hwc->idx; + writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); +} + +static void hisi_sllc_pmu_disable_counter(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); + val &= ~(1 << hwc->idx); + writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); +} + +static void hisi_sllc_pmu_enable_counter_int(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_INT_MASK); + /* Write 0 to enable interrupt */ + val &= ~(1 << hwc->idx); + writel(val, sllc_pmu->base + SLLC_INT_MASK); +} + +static void hisi_sllc_pmu_disable_counter_int(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_INT_MASK); + /* Write 1 to mask interrupt */ + val |= 1 << hwc->idx; + writel(val, sllc_pmu->base + SLLC_INT_MASK); +} + +static u32 hisi_sllc_pmu_get_int_status(struct hisi_pmu *sllc_pmu) +{ + return readl(sllc_pmu->base + SLLC_INT_STATUS); +} + +static void hisi_sllc_pmu_clear_int_status(struct hisi_pmu *sllc_pmu, int idx) +{ + writel(1 << idx, sllc_pmu->base + SLLC_INT_CLEAR); +} + +static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { + { "HISI0263", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); + +static int hisi_sllc_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *sllc_pmu) +{ + /* + * Use the SCCL_ID and the index ID to identify the SLLC PMU, + * while SCCL_ID is from MPIDR_EL1 by CPU. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &sllc_pmu->sccl_id)) { + dev_err(&pdev->dev, "Cannot read sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", + &sllc_pmu->index_id)) { + dev_err(&pdev->dev, "Cannot read idx-id!\n"); + return -EINVAL; + } + + /* SLLC PMUs only share the same SCCL */ + sllc_pmu->ccl_id = -1; + + sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(sllc_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n"); + return PTR_ERR(sllc_pmu->base); + } + + sllc_pmu->identifier = readl(sllc_pmu->base + SLLC_VERSION); + + return 0; +} + +static struct attribute *hisi_sllc_pmu_v2_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(tgtid_min, "config1:0-10"), + HISI_PMU_FORMAT_ATTR(tgtid_max, "config1:11-21"), + HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:22-32"), + HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:33-43"), + HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:44"), + NULL +}; + +static const struct attribute_group hisi_sllc_pmu_v2_format_group = { + .name = "format", + .attrs = hisi_sllc_pmu_v2_format_attr, +}; + +static struct attribute *hisi_sllc_pmu_v2_events_attr[] = { + HISI_PMU_EVENT_ATTR(rx_req, 0x30), + HISI_PMU_EVENT_ATTR(rx_data, 0x31), + HISI_PMU_EVENT_ATTR(tx_req, 0x34), + HISI_PMU_EVENT_ATTR(tx_data, 0x35), + HISI_PMU_EVENT_ATTR(cycles, 0x09), + NULL +}; + +static const struct attribute_group hisi_sllc_pmu_v2_events_group = { + .name = "events", + .attrs = hisi_sllc_pmu_v2_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = { + .attrs = hisi_sllc_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_sllc_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_sllc_pmu_identifier_attrs[] = { + &hisi_sllc_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_sllc_pmu_identifier_group = { + .attrs = hisi_sllc_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { + &hisi_sllc_pmu_v2_format_group, + &hisi_sllc_pmu_v2_events_group, + &hisi_sllc_pmu_cpumask_attr_group, + &hisi_sllc_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_sllc_ops = { + .write_evtype = hisi_sllc_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_sllc_pmu_start_counters, + .stop_counters = hisi_sllc_pmu_stop_counters, + .enable_counter = hisi_sllc_pmu_enable_counter, + .disable_counter = hisi_sllc_pmu_disable_counter, + .enable_counter_int = hisi_sllc_pmu_enable_counter_int, + .disable_counter_int = hisi_sllc_pmu_disable_counter_int, + .write_counter = hisi_sllc_pmu_write_counter, + .read_counter = hisi_sllc_pmu_read_counter, + .get_int_status = hisi_sllc_pmu_get_int_status, + .clear_int_status = hisi_sllc_pmu_clear_int_status, + .enable_filter = hisi_sllc_pmu_enable_filter, + .disable_filter = hisi_sllc_pmu_clear_filter, +}; + +static int hisi_sllc_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *sllc_pmu) +{ + int ret; + + ret = hisi_sllc_pmu_init_data(pdev, sllc_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(sllc_pmu, pdev); + if (ret) + return ret; + + sllc_pmu->pmu_events.attr_groups = hisi_sllc_pmu_v2_attr_groups; + sllc_pmu->ops = &hisi_uncore_sllc_ops; + sllc_pmu->check_event = SLLC_NR_EVENTS; + sllc_pmu->counter_bits = 64; + sllc_pmu->num_counters = 8; + sllc_pmu->dev = &pdev->dev; + sllc_pmu->on_cpu = -1; + + return 0; +} + +static int hisi_sllc_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *sllc_pmu; + char *name; + int ret; + + sllc_pmu = devm_kzalloc(&pdev->dev, sizeof(*sllc_pmu), GFP_KERNEL); + if (!sllc_pmu) + return -ENOMEM; + + ret = hisi_sllc_pmu_dev_probe(pdev, sllc_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u", + sllc_pmu->sccl_id, sllc_pmu->index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + &sllc_pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); + return ret; + } + + sllc_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = sllc_pmu->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); + if (ret) { + dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + &sllc_pmu->node); + irq_set_affinity_hint(sllc_pmu->irq, NULL); + return ret; + } + + platform_set_drvdata(pdev, sllc_pmu); + + return ret; +} + +static int hisi_sllc_pmu_remove(struct platform_device *pdev) +{ + struct hisi_pmu *sllc_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&sllc_pmu->pmu); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + &sllc_pmu->node); + irq_set_affinity_hint(sllc_pmu->irq, NULL); + + return 0; +} + +static struct platform_driver hisi_sllc_pmu_driver = { + .driver = { + .name = "hisi_sllc_pmu", + .acpi_match_table = hisi_sllc_pmu_acpi_match, + .suppress_bind_attrs = true, + }, + .probe = hisi_sllc_pmu_probe, + .remove = hisi_sllc_pmu_remove, +}; + +static int __init hisi_sllc_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + "AP_PERF_ARM_HISI_SLLC_ONLINE", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret) { + pr_err("SLLC PMU: cpuhp state setup failed, ret = %d\n", ret); + return ret; + } + + ret = platform_driver_register(&hisi_sllc_pmu_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE); + + return ret; +} +module_init(hisi_sllc_pmu_module_init); + +static void __exit hisi_sllc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_sllc_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE); +} +module_exit(hisi_sllc_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Shaokun Zhang "); +MODULE_AUTHOR("Qi Liu "); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 4ae8482e556a..8e6e95a8bd34 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -167,6 +167,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, + CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, #ifndef __GENKSYMS__ CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, #endif -- Gitee From 25d72c7badd9024361c93218af603a5e9f6b6a01 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 30 Jul 2021 15:44:09 +0800 Subject: [PATCH 018/173] drivers/perf: hisi: Add support for HiSilicon PA PMU driver [Upstream commit a0ab25cd82eeb68bfa19a4d93a097521af5011b8] On HiSilicon Hip09 platform, there is a PA (Protocol Adapter) module on each chip SICL (Super I/O Cluster) which incorporates three Hydra interface and facilitates the cache coherency between the dies on the chip. While PA uncore PMU model is the same as other Hip09 PMU modules and many PMU events are supported. Let's support the PMU driver using the HiSilicon uncore PMU framework. PA PMU supports the following filter functions: * tracetag_en: allows user to count events according to tt_req or tt_core set in L3C PMU. It's the same as other PMUs. * srcid_cmd & srcid_msk: allows user to filter statistics that come from specific CCL/ICL by configuration source ID. * tgtid_cmd & tgtid_msk: it is the similar function to srcid_cmd & srcid_msk. Both are used to check where the data comes from or go to. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-9-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon Reviewed-by: Shaokun Zhang Signed-off-by: huwentao --- drivers/perf/hisilicon/Makefile | 3 +- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 500 ++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 3 files changed, 503 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_pa_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index b2b24a2a58c2..07b80c5b4fc6 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ - hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o + hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ + hisi_uncore_pa_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c new file mode 100644 index 000000000000..5517a90552ec --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -0,0 +1,500 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon PA uncore Hardware event counters support + * + * Copyright (C) 2020 HiSilicon Limited + * Author: Shaokun Zhang + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* PA register definition */ +#define PA_PERF_CTRL 0x1c00 +#define PA_EVENT_CTRL 0x1c04 +#define PA_TT_CTRL 0x1c08 +#define PA_TGTID_CTRL 0x1c14 +#define PA_SRCID_CTRL 0x1c18 +#define PA_INT_MASK 0x1c70 +#define PA_INT_STATUS 0x1c78 +#define PA_INT_CLEAR 0x1c7c +#define PA_EVENT_TYPE0 0x1c80 +#define PA_PMU_VERSION 0x1cf0 +#define PA_EVENT_CNT0_L 0x1d00 + +#define PA_EVTYPE_MASK 0xff +#define PA_NR_COUNTERS 0x8 +#define PA_PERF_CTRL_EN BIT(0) +#define PA_TRACETAG_EN BIT(4) +#define PA_TGTID_EN BIT(11) +#define PA_SRCID_EN BIT(11) +#define PA_TGTID_NONE 0 +#define PA_SRCID_NONE 0 +#define PA_TGTID_MSK_SHIFT 12 +#define PA_SRCID_MSK_SHIFT 12 + +HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_cmd, config1, 10, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_msk, config1, 21, 11); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); + +static void hisi_pa_pmu_enable_tracetag(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tracetag_en(event); + + if (tt_en) { + u32 val; + + val = readl(pa_pmu->base + PA_TT_CTRL); + val |= PA_TRACETAG_EN; + writel(val, pa_pmu->base + PA_TT_CTRL); + } +} + +static void hisi_pa_pmu_clear_tracetag(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tracetag_en(event); + + if (tt_en) { + u32 val; + + val = readl(pa_pmu->base + PA_TT_CTRL); + val &= ~PA_TRACETAG_EN; + writel(val, pa_pmu->base + PA_TT_CTRL); + } +} + +static void hisi_pa_pmu_config_tgtid(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_tgtid_cmd(event); + + if (cmd) { + u32 msk = hisi_get_tgtid_msk(event); + u32 val = cmd | PA_TGTID_EN | (msk << PA_TGTID_MSK_SHIFT); + + writel(val, pa_pmu->base + PA_TGTID_CTRL); + } +} + +static void hisi_pa_pmu_clear_tgtid(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_tgtid_cmd(event); + + if (cmd) + writel(PA_TGTID_NONE, pa_pmu->base + PA_TGTID_CTRL); +} + +static void hisi_pa_pmu_config_srcid(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_srcid_cmd(event); + + if (cmd) { + u32 msk = hisi_get_srcid_msk(event); + u32 val = cmd | PA_SRCID_EN | (msk << PA_SRCID_MSK_SHIFT); + + writel(val, pa_pmu->base + PA_SRCID_CTRL); + } +} + +static void hisi_pa_pmu_clear_srcid(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_srcid_cmd(event); + + if (cmd) + writel(PA_SRCID_NONE, pa_pmu->base + PA_SRCID_CTRL); +} + +static void hisi_pa_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_pa_pmu_enable_tracetag(event); + hisi_pa_pmu_config_srcid(event); + hisi_pa_pmu_config_tgtid(event); + } +} + +static void hisi_pa_pmu_disable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_pa_pmu_clear_tgtid(event); + hisi_pa_pmu_clear_srcid(event); + hisi_pa_pmu_clear_tracetag(event); + } +} + +static u32 hisi_pa_pmu_get_counter_offset(int idx) +{ + return (PA_EVENT_CNT0_L + idx * 8); +} + +static u64 hisi_pa_pmu_read_counter(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc) +{ + return readq(pa_pmu->base + hisi_pa_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_pa_pmu_write_counter(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, pa_pmu->base + hisi_pa_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_pa_pmu_write_evtype(struct hisi_pmu *pa_pmu, int idx, + u32 type) +{ + u32 reg, reg_idx, shift, val; + + /* + * Select the appropriate event select register(PA_EVENT_TYPE0/1). + * There are 2 event select registers for the 8 hardware counters. + * Event code is 8-bits and for the former 4 hardware counters, + * PA_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, + * PA_EVENT_TYPE1 is chosen. + */ + reg = PA_EVENT_TYPE0 + (idx / 4) * 4; + reg_idx = idx % 4; + shift = 8 * reg_idx; + + /* Write event code to pa_EVENT_TYPEx Register */ + val = readl(pa_pmu->base + reg); + val &= ~(PA_EVTYPE_MASK << shift); + val |= (type << shift); + writel(val, pa_pmu->base + reg); +} + +static void hisi_pa_pmu_start_counters(struct hisi_pmu *pa_pmu) +{ + u32 val; + + val = readl(pa_pmu->base + PA_PERF_CTRL); + val |= PA_PERF_CTRL_EN; + writel(val, pa_pmu->base + PA_PERF_CTRL); +} + +static void hisi_pa_pmu_stop_counters(struct hisi_pmu *pa_pmu) +{ + u32 val; + + val = readl(pa_pmu->base + PA_PERF_CTRL); + val &= ~(PA_PERF_CTRL_EN); + writel(val, pa_pmu->base + PA_PERF_CTRL); +} + +static void hisi_pa_pmu_enable_counter(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Enable counter index in PA_EVENT_CTRL register */ + val = readl(pa_pmu->base + PA_EVENT_CTRL); + val |= 1 << hwc->idx; + writel(val, pa_pmu->base + PA_EVENT_CTRL); +} + +static void hisi_pa_pmu_disable_counter(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index in PA_EVENT_CTRL register */ + val = readl(pa_pmu->base + PA_EVENT_CTRL); + val &= ~(1 << hwc->idx); + writel(val, pa_pmu->base + PA_EVENT_CTRL); +} + +static void hisi_pa_pmu_enable_counter_int(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 0 to enable interrupt */ + val = readl(pa_pmu->base + PA_INT_MASK); + val &= ~(1 << hwc->idx); + writel(val, pa_pmu->base + PA_INT_MASK); +} + +static void hisi_pa_pmu_disable_counter_int(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 1 to mask interrupt */ + val = readl(pa_pmu->base + PA_INT_MASK); + val |= 1 << hwc->idx; + writel(val, pa_pmu->base + PA_INT_MASK); +} + +static u32 hisi_pa_pmu_get_int_status(struct hisi_pmu *pa_pmu) +{ + return readl(pa_pmu->base + PA_INT_STATUS); +} + +static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) +{ + writel(1 << idx, pa_pmu->base + PA_INT_CLEAR); +} + +static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { + { "HISI0273", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match); + +static int hisi_pa_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *pa_pmu) +{ + /* + * Use the SCCL_ID and the index ID to identify the PA PMU, + * while SCCL_ID is the nearst SCCL_ID from this SICL and + * CPU core is chosen from this SCCL to manage this PMU. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &pa_pmu->sccl_id)) { + dev_err(&pdev->dev, "Cannot read sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", + &pa_pmu->index_id)) { + dev_err(&pdev->dev, "Cannot read idx-id!\n"); + return -EINVAL; + } + + pa_pmu->ccl_id = -1; + + pa_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(pa_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for pa_pmu resource.\n"); + return PTR_ERR(pa_pmu->base); + } + + pa_pmu->identifier = readl(pa_pmu->base + PA_PMU_VERSION); + + return 0; +} + +static struct attribute *hisi_pa_pmu_v2_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(tgtid_cmd, "config1:0-10"), + HISI_PMU_FORMAT_ATTR(tgtid_msk, "config1:11-21"), + HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:22-32"), + HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:33-43"), + HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:44"), + NULL, +}; + +static const struct attribute_group hisi_pa_pmu_v2_format_group = { + .name = "format", + .attrs = hisi_pa_pmu_v2_format_attr, +}; + +static struct attribute *hisi_pa_pmu_v2_events_attr[] = { + HISI_PMU_EVENT_ATTR(rx_req, 0x40), + HISI_PMU_EVENT_ATTR(tx_req, 0x5c), + HISI_PMU_EVENT_ATTR(cycle, 0x78), + NULL +}; + +static const struct attribute_group hisi_pa_pmu_v2_events_group = { + .name = "events", + .attrs = hisi_pa_pmu_v2_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = { + .attrs = hisi_pa_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_pa_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_pa_pmu_identifier_attrs[] = { + &hisi_pa_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_pa_pmu_identifier_group = { + .attrs = hisi_pa_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { + &hisi_pa_pmu_v2_format_group, + &hisi_pa_pmu_v2_events_group, + &hisi_pa_pmu_cpumask_attr_group, + &hisi_pa_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_pa_ops = { + .write_evtype = hisi_pa_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_pa_pmu_start_counters, + .stop_counters = hisi_pa_pmu_stop_counters, + .enable_counter = hisi_pa_pmu_enable_counter, + .disable_counter = hisi_pa_pmu_disable_counter, + .enable_counter_int = hisi_pa_pmu_enable_counter_int, + .disable_counter_int = hisi_pa_pmu_disable_counter_int, + .write_counter = hisi_pa_pmu_write_counter, + .read_counter = hisi_pa_pmu_read_counter, + .get_int_status = hisi_pa_pmu_get_int_status, + .clear_int_status = hisi_pa_pmu_clear_int_status, + .enable_filter = hisi_pa_pmu_enable_filter, + .disable_filter = hisi_pa_pmu_disable_filter, +}; + +static int hisi_pa_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *pa_pmu) +{ + int ret; + + ret = hisi_pa_pmu_init_data(pdev, pa_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(pa_pmu, pdev); + if (ret) + return ret; + + pa_pmu->pmu_events.attr_groups = hisi_pa_pmu_v2_attr_groups; + pa_pmu->num_counters = PA_NR_COUNTERS; + pa_pmu->ops = &hisi_uncore_pa_ops; + pa_pmu->check_event = 0xB0; + pa_pmu->counter_bits = 64; + pa_pmu->dev = &pdev->dev; + pa_pmu->on_cpu = -1; + + return 0; +} + +static int hisi_pa_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *pa_pmu; + char *name; + int ret; + + pa_pmu = devm_kzalloc(&pdev->dev, sizeof(*pa_pmu), GFP_KERNEL); + if (!pa_pmu) + return -ENOMEM; + + ret = hisi_pa_pmu_dev_probe(pdev, pa_pmu); + if (ret) + return ret; + /* + * PA is attached in SICL and the CPU core is chosen to manage this + * PMU which is the nearest SCCL, while its SCCL_ID is greater than + * one with the SICL_ID. + */ + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u", + pa_pmu->sccl_id - 1, pa_pmu->index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, + &pa_pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); + return ret; + } + + pa_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = pa_pmu->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + ret = perf_pmu_register(&pa_pmu->pmu, name, -1); + if (ret) { + dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, + &pa_pmu->node); + irq_set_affinity_hint(pa_pmu->irq, NULL); + return ret; + } + + platform_set_drvdata(pdev, pa_pmu); + return ret; +} + +static int hisi_pa_pmu_remove(struct platform_device *pdev) +{ + struct hisi_pmu *pa_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&pa_pmu->pmu); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, + &pa_pmu->node); + irq_set_affinity_hint(pa_pmu->irq, NULL); + + return 0; +} + +static struct platform_driver hisi_pa_pmu_driver = { + .driver = { + .name = "hisi_pa_pmu", + .acpi_match_table = hisi_pa_pmu_acpi_match, + .suppress_bind_attrs = true, + }, + .probe = hisi_pa_pmu_probe, + .remove = hisi_pa_pmu_remove, +}; + +static int __init hisi_pa_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, + "AP_PERF_ARM_HISI_PA_ONLINE", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret) { + pr_err("PA PMU: cpuhp state setup failed, ret = %d\n", ret); + return ret; + } + + ret = platform_driver_register(&hisi_pa_pmu_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE); + + return ret; +} +module_init(hisi_pa_pmu_module_init); + +static void __exit hisi_pa_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_pa_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE); +} +module_exit(hisi_pa_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Shaokun Zhang "); +MODULE_AUTHOR("Qi Liu "); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8e6e95a8bd34..1b1ceb465c3d 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -167,6 +167,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, + CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, #ifndef __GENKSYMS__ CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, -- Gitee From 486691b287cb4028c6050545d7ff4274d5b6894d Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Tue, 9 Aug 2022 23:06:42 +0800 Subject: [PATCH 019/173] drivers/perf: hisi: Associate PMUs in SICL with CPUs online [Upstream commit 807907dae9701c4b0593d5195d4839f17d103314] If a PMU is in a SICL (Super IO cluster), it is not appropriate to associate this PMU with a CPU die. So we associate it with all CPUs online, rather than CPUs in the nearest SCCL. As the firmware of Hip09 platform hasn't been published yet, change of PMU driver will not influence backwards compatibility between driver and firmware. Signed-off-by: Qi Liu Reviewed-by: John Garry Link: https://lore.kernel.org/r/20220415102352.6665-2-liuqi115@huawei.com Signed-off-by: Will Deacon Signed-off-by: Wangming Shao Reviewed-by: Junhao He Reviewed-by: Yang Jihong Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 18 +++++++----------- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 ++++ drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 + 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 5517a90552ec..8bea7ac4f5ad 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -258,13 +258,12 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *pa_pmu) { /* - * Use the SCCL_ID and the index ID to identify the PA PMU, - * while SCCL_ID is the nearst SCCL_ID from this SICL and - * CPU core is chosen from this SCCL to manage this PMU. + * As PA PMU is in a SICL, use the SICL_ID and the index ID + * to identify the PA PMU. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &pa_pmu->sccl_id)) { - dev_err(&pdev->dev, "Cannot read sccl-id!\n"); + &pa_pmu->sicl_id)) { + dev_err(&pdev->dev, "Cannot read sicl-id!\n"); return -EINVAL; } @@ -275,6 +274,7 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev, } pa_pmu->ccl_id = -1; + pa_pmu->sccl_id = -1; pa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pa_pmu->base)) { @@ -399,13 +399,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) ret = hisi_pa_pmu_dev_probe(pdev, pa_pmu); if (ret) return ret; - /* - * PA is attached in SICL and the CPU core is chosen to manage this - * PMU which is the nearest SCCL, while its SCCL_ID is greater than - * one with the SICL_ID. - */ + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u", - pa_pmu->sccl_id - 1, pa_pmu->index_id); + pa_pmu->sicl_id, pa_pmu->index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index c7a62a871183..72737f80bb83 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -458,6 +458,10 @@ static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu) { int sccl_id, ccl_id; + /* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */ + if (hisi_pmu->sccl_id == -1) + return true; + if (hisi_pmu->ccl_id == -1) { /* If CCL_ID is -1, the PMU only shares the same SCCL */ hisi_read_sccl_and_ccl_id(&sccl_id, NULL); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 78c00d2d0ee6..0a983efe9db5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -80,6 +80,7 @@ struct hisi_pmu { struct device *dev; struct hlist_node node; int sccl_id; + int sicl_id; int ccl_id; void __iomem *base; /* the ID of the PMU modules */ -- Gitee From 37c940fff8948f88a6566bad7599115d1bdb2bae Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Thu, 19 May 2022 20:09:48 +0800 Subject: [PATCH 020/173] perf: hisi: Make irq shared On some platforms, there are some error: genirq: Flags mismatch irq 23. 00010804 (xxx) vs. 00010804 (xxx) The reason is that there are more than one pmu nodes witch uses the same irq number. Add IROF_SHARED when devm_request_irq Signed-off-by: Chen Jun Reviewed-by: Weilong Chen Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 72737f80bb83..40a70ae8fbed 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -168,7 +168,7 @@ int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, return irq; ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr, - IRQF_NOBALANCING | IRQF_NO_THREAD, + IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_SHARED, dev_name(&pdev->dev), hisi_pmu); if (ret < 0) { dev_err(&pdev->dev, -- Gitee From d7dd7bac012dcd8bb0c769855f64fa1328dce3ea Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Thu, 19 May 2022 20:09:49 +0800 Subject: [PATCH 021/173] perf: hisi: Fix read sccl_id and ccl_id error in some platform Some platforms with TSV200 or CORTEX_A55, sccl and ccl are keep in aff2 if mt. Signed-off-by: Chen Jun Reviewed-by: Weilong Chen Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 40a70ae8fbed..02910e93cf57 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -434,12 +434,19 @@ static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp) bool mt = mpidr & MPIDR_MT_BITMASK; int sccl, ccl; - if (mt && read_cpuid_part_number() == HISI_CPU_PART_TSV110) { - sccl = aff2 >> 3; - ccl = aff2 & 0x7; - } else if (mt) { - sccl = aff3; - ccl = aff2; + if (mt) { + switch (read_cpuid_part_number()) { + case HISI_CPU_PART_TSV110: + case HISI_CPU_PART_TSV200: + case ARM_CPU_PART_CORTEX_A55: + sccl = aff2 >> 3; + ccl = aff2 & 0x7; + break; + default: + sccl = aff3; + ccl = aff2; + break; + } } else { sccl = aff2; ccl = aff1; -- Gitee From 2d65d832b83bb6e64c582737e94d7472b216bfbf Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Thu, 19 May 2022 20:09:50 +0800 Subject: [PATCH 022/173] perf: hisi: Add support for HiSilicon SoC L3T PMU Add support for HiSilicon SoC L3T PMU Signed-off-by: Chen Jun Reviewed-by: Weilong Chen Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/Makefile | 3 +- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c | 403 +++++++++++++++++++ 3 files changed, 405 insertions(+), 3 deletions(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 07b80c5b4fc6..4f10a9c7671c 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_l3t_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o - diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 66227265de70..962cad5d86c8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -612,7 +612,7 @@ static int __init hisi_l3c_pmu_module_init(void) int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, - "AP_PERF_ARM_HISI_L3_ONLINE", + "AP_PERF_ARM_HISI_L3T_ONLINE", hisi_uncore_pmu_online_cpu, hisi_uncore_pmu_offline_cpu); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c new file mode 100644 index 000000000000..f414dc1736aa --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC L3T uncore Hardware event counters support + * + * Copyright (C) 2017 Hisilicon Limited + * Author: Anurup M + * Shaokun Zhang + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* L3T register definition */ +#define L3T_PERF_CTRL 0x0408 +#define L3T_INT_MASK 0x0800 +#define L3T_INT_STATUS 0x0808 +#define L3T_INT_CLEAR 0x080c +#define L3T_EVENT_CTRL 0x1c00 +#define L3T_VERSION 0x1cf0 +#define L3T_EVENT_TYPE0 0x1d00 +/* + * If the HW version only supports a 48-bit counter, then + * bits [63:48] are reserved, which are Read-As-Zero and + * Writes-Ignored. + */ +#define L3T_CNTR0_LOWER 0x1e00 + +/* L3T has 8-counters */ +#define L3T_NR_COUNTERS 0x8 + +#define L3T_PERF_CTRL_EN 0x20000 +#define L3T_EVTYPE_NONE 0xff +#define L3T_NR_EVENTS 0x59 + +/* + * Select the counter register offset using the counter index + */ +static u32 hisi_l3t_pmu_get_counter_offset(int cntr_idx) +{ + return (L3T_CNTR0_LOWER + (cntr_idx * 8)); +} + +static u64 hisi_l3t_pmu_read_counter(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc) +{ + return readq(l3t_pmu->base + hisi_l3t_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_l3t_pmu_write_counter(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, l3t_pmu->base + hisi_l3t_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_l3t_pmu_write_evtype(struct hisi_pmu *l3t_pmu, int idx, + u32 type) +{ + u32 reg, reg_idx, shift, val; + + /* + * Select the appropriate event select register(L3T_EVENT_TYPE0/1). + * There are 2 event select registers for the 8 hardware counters. + * Event code is 8-bits and for the former 4 hardware counters, + * L3T_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, + * L3T_EVENT_TYPE1 is chosen. + */ + reg = L3T_EVENT_TYPE0 + (idx / 4) * 4; + reg_idx = idx % 4; + shift = 8 * reg_idx; + + /* Write event code to L3T_EVENT_TYPEx Register */ + val = readl(l3t_pmu->base + reg); + val &= ~(L3T_EVTYPE_NONE << shift); + val |= (type << shift); + writel(val, l3t_pmu->base + reg); +} + +static void hisi_l3t_pmu_start_counters(struct hisi_pmu *l3t_pmu) +{ + u32 val; + + /* + * Set perf_enable bit in L3T_PERF_CTRL register to start counting + * for all enabled counters. + */ + val = readl(l3t_pmu->base + L3T_PERF_CTRL); + val |= L3T_PERF_CTRL_EN; + writel(val, l3t_pmu->base + L3T_PERF_CTRL); +} + +static void hisi_l3t_pmu_stop_counters(struct hisi_pmu *l3t_pmu) +{ + u32 val; + + /* + * Clear perf_enable bit in L3T_PERF_CTRL register to stop counting + * for all enabled counters. + */ + val = readl(l3t_pmu->base + L3T_PERF_CTRL); + val &= ~(L3T_PERF_CTRL_EN); + writel(val, l3t_pmu->base + L3T_PERF_CTRL); +} + +static void hisi_l3t_pmu_enable_counter(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Enable counter index in L3T_EVENT_CTRL register */ + val = readl(l3t_pmu->base + L3T_EVENT_CTRL); + val |= (1 << hwc->idx); + writel(val, l3t_pmu->base + L3T_EVENT_CTRL); +} + +static void hisi_l3t_pmu_disable_counter(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index in L3T_EVENT_CTRL register */ + val = readl(l3t_pmu->base + L3T_EVENT_CTRL); + val &= ~(1 << hwc->idx); + writel(val, l3t_pmu->base + L3T_EVENT_CTRL); +} + +static void hisi_l3t_pmu_enable_counter_int(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(l3t_pmu->base + L3T_INT_MASK); + /* Write 0 to enable interrupt */ + val &= ~(1 << hwc->idx); + writel(val, l3t_pmu->base + L3T_INT_MASK); +} + +static void hisi_l3t_pmu_disable_counter_int(struct hisi_pmu *l3t_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(l3t_pmu->base + L3T_INT_MASK); + /* Write 1 to mask interrupt */ + val |= (1 << hwc->idx); + writel(val, l3t_pmu->base + L3T_INT_MASK); +} + +static u32 hisi_l3t_pmu_get_int_status(struct hisi_pmu *l3t_pmu) +{ + return readl(l3t_pmu->base + L3T_INT_STATUS); +} + +static void hisi_l3t_pmu_clear_int_status(struct hisi_pmu *l3t_pmu, int idx) +{ + writel(1 << idx, l3t_pmu->base + L3T_INT_CLEAR); +} + +static const struct acpi_device_id hisi_l3t_pmu_acpi_match[] = { + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_l3t_pmu_acpi_match); + +static const struct of_device_id l3t_of_match[] = { + { .compatible = "hisilicon,l3t-pmu", }, + {}, +}; + +static int hisi_l3t_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *l3t_pmu) +{ + /* + * Use the SCCL_ID and CCL_ID to identify the L3T PMU, while + * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &l3t_pmu->sccl_id)) { + dev_err(&pdev->dev, "Can not read l3t sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", + &l3t_pmu->ccl_id)) { + dev_err(&pdev->dev, "Can not read l3t ccl-id!\n"); + return -EINVAL; + } + + l3t_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(l3t_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for l3t_pmu resource\n"); + return PTR_ERR(l3t_pmu->base); + } + + l3t_pmu->identifier = readl(l3t_pmu->base + L3T_VERSION); + + return 0; +} + +static struct attribute *hisi_l3t_pmu_v1_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL, +}; + +static const struct attribute_group hisi_l3t_pmu_v1_format_group = { + .name = "format", + .attrs = hisi_l3t_pmu_v1_format_attr, +}; + +static struct attribute *hisi_l3t_pmu_v1_events_attr[] = { + HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00), + HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01), + HISI_PMU_EVENT_ATTR(rd_hit_cpipe, 0x02), + HISI_PMU_EVENT_ATTR(wr_hit_cpipe, 0x03), + HISI_PMU_EVENT_ATTR(victim_num, 0x04), + HISI_PMU_EVENT_ATTR(rd_spipe, 0x20), + HISI_PMU_EVENT_ATTR(wr_spipe, 0x21), + HISI_PMU_EVENT_ATTR(rd_hit_spipe, 0x22), + HISI_PMU_EVENT_ATTR(wr_hit_spipe, 0x23), + HISI_PMU_EVENT_ATTR(back_invalid, 0x29), + HISI_PMU_EVENT_ATTR(retry_cpu, 0x40), + HISI_PMU_EVENT_ATTR(retry_ring, 0x41), + HISI_PMU_EVENT_ATTR(prefetch_drop, 0x42), + NULL, +}; + +static const struct attribute_group hisi_l3t_pmu_v1_events_group = { + .name = "events", + .attrs = hisi_l3t_pmu_v1_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_l3t_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group hisi_l3t_pmu_cpumask_attr_group = { + .attrs = hisi_l3t_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_l3t_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_l3t_pmu_identifier_attrs[] = { + &hisi_l3t_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_l3t_pmu_identifier_group = { + .attrs = hisi_l3t_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_l3t_pmu_v1_attr_groups[] = { + &hisi_l3t_pmu_v1_format_group, + &hisi_l3t_pmu_v1_events_group, + &hisi_l3t_pmu_cpumask_attr_group, + &hisi_l3t_pmu_identifier_group, + NULL, +}; + +static const struct hisi_uncore_ops hisi_uncore_l3t_ops = { + .write_evtype = hisi_l3t_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_l3t_pmu_start_counters, + .stop_counters = hisi_l3t_pmu_stop_counters, + .enable_counter = hisi_l3t_pmu_enable_counter, + .disable_counter = hisi_l3t_pmu_disable_counter, + .enable_counter_int = hisi_l3t_pmu_enable_counter_int, + .disable_counter_int = hisi_l3t_pmu_disable_counter_int, + .write_counter = hisi_l3t_pmu_write_counter, + .read_counter = hisi_l3t_pmu_read_counter, + .get_int_status = hisi_l3t_pmu_get_int_status, + .clear_int_status = hisi_l3t_pmu_clear_int_status, +}; + +static int hisi_l3t_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *l3t_pmu) +{ + int ret; + + ret = hisi_l3t_pmu_init_data(pdev, l3t_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(l3t_pmu, pdev); + if (ret) + return ret; + + l3t_pmu->counter_bits = 48; + l3t_pmu->check_event = L3T_NR_EVENTS; + l3t_pmu->pmu_events.attr_groups = hisi_l3t_pmu_v1_attr_groups; + + l3t_pmu->num_counters = L3T_NR_COUNTERS; + l3t_pmu->ops = &hisi_uncore_l3t_ops; + l3t_pmu->dev = &pdev->dev; + l3t_pmu->on_cpu = -1; + + return 0; +} + +static int hisi_l3t_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *l3t_pmu; + char *name; + int ret; + + l3t_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3t_pmu), GFP_KERNEL); + if (!l3t_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, l3t_pmu); + + ret = hisi_l3t_pmu_dev_probe(pdev, l3t_pmu); + if (ret) + return ret; + + if (device_property_read_u32(&pdev->dev, "hisilicon,index-id", &l3t_pmu->index_id)) { + dev_err(&pdev->dev, "Can not read l3t index-id!\n"); + return -EINVAL; + } + + /* + * CCL_ID is used to identify the L3T in the same SCCL which was + * used _UID by mistake. + */ + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3t%u", + l3t_pmu->sccl_id, l3t_pmu->index_id); + l3t_pmu->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = l3t_pmu->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + /* Pick one core to use for cpumask attributes */ + cpumask_set_cpu(smp_processor_id(), &l3t_pmu->associated_cpus); + + l3t_pmu->on_cpu = cpumask_first(&l3t_pmu->associated_cpus); + if (l3t_pmu->on_cpu >= nr_cpu_ids) + return -EINVAL; + + ret = perf_pmu_register(&l3t_pmu->pmu, name, -1); + + return ret; +} + +static int hisi_l3t_pmu_remove(struct platform_device *pdev) +{ + struct hisi_pmu *l3t_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&l3t_pmu->pmu); + + return 0; +} + +static struct platform_driver hisi_l3t_pmu_driver = { + .driver = { + .name = "hisi_l3t_pmu", + .acpi_match_table = ACPI_PTR(hisi_l3t_pmu_acpi_match), + .of_match_table = l3t_of_match, + .suppress_bind_attrs = true, + }, + .probe = hisi_l3t_pmu_probe, + .remove = hisi_l3t_pmu_remove, +}; + +static int __init hisi_l3t_pmu_module_init(void) +{ + int ret; + + ret = platform_driver_register(&hisi_l3t_pmu_driver); + + return ret; +} +module_init(hisi_l3t_pmu_module_init); + +static void __exit hisi_l3t_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_l3t_pmu_driver); +} +module_exit(hisi_l3t_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC L3T uncore PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Anurup M "); +MODULE_AUTHOR("Shaokun Zhang "); -- Gitee From ab46a521266e094f9da9d8747ca092c9ee597ffe Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Thu, 19 May 2022 20:09:51 +0800 Subject: [PATCH 023/173] perf: hisi: Add support for HiSilicon SoC LPDDRC PMU Add support for HiSilicon SoC LPDDRC PMU Signed-off-by: Chen Jun Reviewed-by: Weilong Chen Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/Makefile | 2 +- .../perf/hisilicon/hisi_uncore_lpddrc_pmu.c | 408 ++++++++++++++++++ 2 files changed, 409 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 4f10a9c7671c..5e3b8522441b 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_l3t_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_l3t_pmu.o hisi_uncore_lpddrc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c new file mode 100644 index 000000000000..03a4bb1a9948 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC LPDDRC uncore Hardware event counters support + * + * Copyright (C) 2017 Hisilicon Limited + * Author: Shaokun Zhang + * Anurup M + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* LPDDRC register definition in v1 */ +#define LPDDRC_PERF_CTRL 0x4930 +#define LPDDRC_FLUX_WR 0x4948 +#define LPDDRC_FLUX_RD 0x494c +#define LPDDRC_FLUX_WCMD 0x4950 +#define LPDDRC_FLUX_RCMD 0x4954 +#define LPDDRC_PRE_CMD 0x4984 +#define LPDDRC_ACT_CMD 0x4988 +#define LPDDRC_RNK_CHG 0x4990 +#define LPDDRC_RW_CHG 0x4994 +#define LPDDRC_EVENT_CTRL 0x4d60 +#define LPDDRC_INT_MASK 0x6c8 +#define LPDDRC_INT_STATUS 0x6cc +#define LPDDRC_INT_CLEAR 0x6d0 +#define LPDDRC_VERSION 0x710 + +#define LPDDRC_NR_COUNTERS 0x8 +#define LPDDRC_V1_PERF_CTRL_EN 0x1 +#define LPDDRC_V1_NR_EVENTS 0x7 + +/* + * For PMU v1, there are eight-events and every event has been mapped + * to fixed-purpose counters which register offset is not consistent. + * Therefore there is no write event type and we assume that event + * code (0 to 7) is equal to counter index in PMU driver. + */ +#define GET_LPDDRC_EVENTID(hwc) (hwc->config_base & 0x7) + +static const u32 lpddrc_reg_off[] = { + LPDDRC_FLUX_WR, LPDDRC_FLUX_RD, LPDDRC_FLUX_WCMD, LPDDRC_FLUX_RCMD, + LPDDRC_PRE_CMD, LPDDRC_ACT_CMD, LPDDRC_RNK_CHG, LPDDRC_RW_CHG +}; + +/* + * Select the counter register offset using the counter index. + * In PMU v1, there are no programmable counter, the count + * is read form the statistics counter register itself. + */ +static u32 hisi_lpddrc_pmu_v1_get_counter_offset(int cntr_idx) +{ + return lpddrc_reg_off[cntr_idx]; +} + +static u64 hisi_lpddrc_pmu_v1_read_counter(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc) +{ + return readl(lpddrc_pmu->base + + hisi_lpddrc_pmu_v1_get_counter_offset(hwc->idx)); +} + +/* + * For LPDDRC PMU, event counter should be reset when start counters, + * reset the prev_count by software, because the counter register was RO. + */ +static void hisi_lpddrc_pmu_v1_write_counter(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + local64_set(&hwc->prev_count, 0); +} + +/* + * For LPDDRC PMU v1, event has been mapped to fixed-purpose counter by hardware, + * so there is no need to write event type, while it is programmable counter in + * PMU v2. + */ +static void hisi_lpddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, + u32 type) +{ +} + +static void hisi_lpddrc_pmu_v1_start_counters(struct hisi_pmu *lpddrc_pmu) +{ + u32 val; + + /* Set perf_enable in LPDDRC_PERF_CTRL to start event counting */ + val = readl(lpddrc_pmu->base + LPDDRC_PERF_CTRL); + val |= LPDDRC_V1_PERF_CTRL_EN; + writel(val, lpddrc_pmu->base + LPDDRC_PERF_CTRL); +} + +static void hisi_lpddrc_pmu_v1_stop_counters(struct hisi_pmu *lpddrc_pmu) +{ + u32 val; + + /* Clear perf_enable in LPDDRC_PERF_CTRL to stop event counting */ + val = readl(lpddrc_pmu->base + LPDDRC_PERF_CTRL); + val &= ~LPDDRC_V1_PERF_CTRL_EN; + writel(val, lpddrc_pmu->base + LPDDRC_PERF_CTRL); +} + +static void hisi_lpddrc_pmu_v1_enable_counter(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Set counter index(event code) in LPDDRC_EVENT_CTRL register */ + val = readl(lpddrc_pmu->base + LPDDRC_EVENT_CTRL); + val |= (1 << GET_LPDDRC_EVENTID(hwc)); + writel(val, lpddrc_pmu->base + LPDDRC_EVENT_CTRL); +} + +static void hisi_lpddrc_pmu_v1_disable_counter(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index(event code) in LPDDRC_EVENT_CTRL register */ + val = readl(lpddrc_pmu->base + LPDDRC_EVENT_CTRL); + val &= ~(1 << GET_LPDDRC_EVENTID(hwc)); + writel(val, lpddrc_pmu->base + LPDDRC_EVENT_CTRL); +} + +static int hisi_lpddrc_pmu_v1_get_event_idx(struct perf_event *event) +{ + struct hisi_pmu *lpddrc_pmu = to_hisi_pmu(event->pmu); + unsigned long *used_mask = lpddrc_pmu->pmu_events.used_mask; + struct hw_perf_event *hwc = &event->hw; + /* For LPDDRC PMU, we use event code as counter index */ + int idx = GET_LPDDRC_EVENTID(hwc); + + if (test_bit(idx, used_mask)) + return -EAGAIN; + + set_bit(idx, used_mask); + + return idx; +} + +static void hisi_lpddrc_pmu_v1_enable_counter_int(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 0 to enable interrupt */ + val = readl(lpddrc_pmu->base + LPDDRC_INT_MASK); + val &= ~(1 << hwc->idx); + writel(val, lpddrc_pmu->base + LPDDRC_INT_MASK); +} + +static void hisi_lpddrc_pmu_v1_disable_counter_int(struct hisi_pmu *lpddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 1 to mask interrupt */ + val = readl(lpddrc_pmu->base + LPDDRC_INT_MASK); + val |= 1 << hwc->idx; + writel(val, lpddrc_pmu->base + LPDDRC_INT_MASK); +} + +static u32 hisi_lpddrc_pmu_v1_get_int_status(struct hisi_pmu *lpddrc_pmu) +{ + return readl(lpddrc_pmu->base + LPDDRC_INT_STATUS); +} + +static void hisi_lpddrc_pmu_v1_clear_int_status(struct hisi_pmu *lpddrc_pmu, + int idx) +{ + writel(1 << idx, lpddrc_pmu->base + LPDDRC_INT_CLEAR); +} + +static const struct acpi_device_id hisi_lpddrc_pmu_acpi_match[] = { + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_lpddrc_pmu_acpi_match); + +static const struct of_device_id lpddrc_of_match[] = { + { .compatible = "hisilicon,lpddrc-pmu", }, + {}, +}; + +static int hisi_lpddrc_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *lpddrc_pmu) +{ + /* + * Use the SCCL_ID and LPDDRC channel ID to identify the + * LPDDRC PMU, while SCCL_ID is in MPIDR[aff2]. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id", + &lpddrc_pmu->index_id)) { + dev_err(&pdev->dev, "Can not read lpddrc channel-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &lpddrc_pmu->sccl_id)) { + dev_err(&pdev->dev, "Can not read lpddrc sccl-id!\n"); + return -EINVAL; + } + /* LPDDRC PMUs only share the same SCCL */ + lpddrc_pmu->ccl_id = -1; + + lpddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(lpddrc_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for lpddrc_pmu resource\n"); + return PTR_ERR(lpddrc_pmu->base); + } + + lpddrc_pmu->identifier = readl(lpddrc_pmu->base + LPDDRC_VERSION); + + return 0; +} + +static struct attribute *hisi_lpddrc_pmu_v1_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-4"), + NULL, +}; + +static const struct attribute_group hisi_lpddrc_pmu_v1_format_group = { + .name = "format", + .attrs = hisi_lpddrc_pmu_v1_format_attr, +}; + +static struct attribute *hisi_lpddrc_pmu_v1_events_attr[] = { + HISI_PMU_EVENT_ATTR(flux_wr, 0x00), + HISI_PMU_EVENT_ATTR(flux_rd, 0x01), + HISI_PMU_EVENT_ATTR(flux_wcmd, 0x02), + HISI_PMU_EVENT_ATTR(flux_rcmd, 0x03), + HISI_PMU_EVENT_ATTR(pre_cmd, 0x04), + HISI_PMU_EVENT_ATTR(act_cmd, 0x05), + HISI_PMU_EVENT_ATTR(rnk_chg, 0x06), + HISI_PMU_EVENT_ATTR(rw_chg, 0x07), + NULL, +}; + +static const struct attribute_group hisi_lpddrc_pmu_v1_events_group = { + .name = "events", + .attrs = hisi_lpddrc_pmu_v1_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_lpddrc_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group hisi_lpddrc_pmu_cpumask_attr_group = { + .attrs = hisi_lpddrc_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_lpddrc_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_lpddrc_pmu_identifier_attrs[] = { + &hisi_lpddrc_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_lpddrc_pmu_identifier_group = { + .attrs = hisi_lpddrc_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_lpddrc_pmu_v1_attr_groups[] = { + &hisi_lpddrc_pmu_v1_format_group, + &hisi_lpddrc_pmu_v1_events_group, + &hisi_lpddrc_pmu_cpumask_attr_group, + &hisi_lpddrc_pmu_identifier_group, + NULL, +}; + +static const struct hisi_uncore_ops hisi_uncore_lpddrc_v1_ops = { + .write_evtype = hisi_lpddrc_pmu_write_evtype, + .get_event_idx = hisi_lpddrc_pmu_v1_get_event_idx, + .start_counters = hisi_lpddrc_pmu_v1_start_counters, + .stop_counters = hisi_lpddrc_pmu_v1_stop_counters, + .enable_counter = hisi_lpddrc_pmu_v1_enable_counter, + .disable_counter = hisi_lpddrc_pmu_v1_disable_counter, + .enable_counter_int = hisi_lpddrc_pmu_v1_enable_counter_int, + .disable_counter_int = hisi_lpddrc_pmu_v1_disable_counter_int, + .write_counter = hisi_lpddrc_pmu_v1_write_counter, + .read_counter = hisi_lpddrc_pmu_v1_read_counter, + .get_int_status = hisi_lpddrc_pmu_v1_get_int_status, + .clear_int_status = hisi_lpddrc_pmu_v1_clear_int_status, +}; + +static int hisi_lpddrc_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *lpddrc_pmu) +{ + int ret; + + ret = hisi_lpddrc_pmu_init_data(pdev, lpddrc_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(lpddrc_pmu, pdev); + if (ret) + return ret; + + lpddrc_pmu->counter_bits = 32; + lpddrc_pmu->check_event = LPDDRC_V1_NR_EVENTS; + lpddrc_pmu->pmu_events.attr_groups = hisi_lpddrc_pmu_v1_attr_groups; + lpddrc_pmu->ops = &hisi_uncore_lpddrc_v1_ops; + + lpddrc_pmu->num_counters = LPDDRC_NR_COUNTERS; + lpddrc_pmu->dev = &pdev->dev; + lpddrc_pmu->on_cpu = -1; + + return 0; +} + +static int hisi_lpddrc_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *lpddrc_pmu; + char *name; + int ret; + + lpddrc_pmu = devm_kzalloc(&pdev->dev, sizeof(*lpddrc_pmu), GFP_KERNEL); + if (!lpddrc_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, lpddrc_pmu); + + ret = hisi_lpddrc_pmu_dev_probe(pdev, lpddrc_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "hisi_sccl%u_lpddrc%u", lpddrc_pmu->sccl_id, + lpddrc_pmu->index_id); + + lpddrc_pmu->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = lpddrc_pmu->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + /* Pick one core to use for cpumask attributes */ + cpumask_set_cpu(smp_processor_id(), &lpddrc_pmu->associated_cpus); + + lpddrc_pmu->on_cpu = cpumask_first(&lpddrc_pmu->associated_cpus); + if (lpddrc_pmu->on_cpu >= nr_cpu_ids) + return -EINVAL; + + ret = perf_pmu_register(&lpddrc_pmu->pmu, name, -1); + + return ret; +} + +static int hisi_lpddrc_pmu_remove(struct platform_device *pdev) +{ + struct hisi_pmu *lpddrc_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&lpddrc_pmu->pmu); + return 0; +} + +static struct platform_driver hisi_lpddrc_pmu_driver = { + .driver = { + .name = "hisi_lpddrc_pmu", + .acpi_match_table = ACPI_PTR(hisi_lpddrc_pmu_acpi_match), + .of_match_table = lpddrc_of_match, + .suppress_bind_attrs = true, + }, + .probe = hisi_lpddrc_pmu_probe, + .remove = hisi_lpddrc_pmu_remove, +}; + +static int __init hisi_lpddrc_pmu_module_init(void) +{ + int ret; + + ret = platform_driver_register(&hisi_lpddrc_pmu_driver); + + return ret; +} +module_init(hisi_lpddrc_pmu_module_init); + +static void __exit hisi_lpddrc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_lpddrc_pmu_driver); +} +module_exit(hisi_lpddrc_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC LPDDRC uncore PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Shaokun Zhang "); +MODULE_AUTHOR("Anurup M "); -- Gitee From 6684b9d0e722e9f7e80f37d972bbc58fe5641e36 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Tue, 9 Aug 2022 23:06:43 +0800 Subject: [PATCH 024/173] drivers/perf: hisi: Add Support for CPA PMU [Upstream commit 6b79738b6ed91a2d0fe958819469eeedac3bca81] each SICL (Super IO Cluster) which implements packet format translation, route parsing and traffic statistics. CPA PMU has 8 PMU counters and interrupt is supported to handle counter overflow. Let's support its driver under the framework of HiSilicon PMU driver. Signed-off-by: Qi Liu Reviewed-by: John Garry Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20220415102352.6665-3-liuqi115@huawei.com Signed-off-by: Will Deacon Signed-off-by: Wangming Shao Signed-off-by: huwentao --- drivers/perf/hisilicon/Makefile | 4 +- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 409 +++++++++++++++++++ include/linux/cpuhotplug.h | 3 + 3 files changed, 415 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 5e3b8522441b..a3522abb3975 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_l3t_pmu.o hisi_uncore_lpddrc_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o \ + hisi_uncore_l3t_pmu.o \ + hisi_uncore_lpddrc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c new file mode 100644 index 000000000000..a9bb73f76be4 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC CPA(Coherency Protocol Agent) hardware event counters support + * + * Copyright (C) 2022 HiSilicon Limited + * Author: Qi Liu + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ + +#define pr_fmt(fmt) "cpa pmu: " fmt +#include +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* CPA register definition */ +#define CPA_PERF_CTRL 0x1c00 +#define CPA_EVENT_CTRL 0x1c04 +#define CPA_INT_MASK 0x1c70 +#define CPA_INT_STATUS 0x1c78 +#define CPA_INT_CLEAR 0x1c7c +#define CPA_EVENT_TYPE0 0x1c80 +#define CPA_VERSION 0x1cf0 +#define CPA_CNT0_LOWER 0x1d00 +#define CPA_CFG_REG 0x0534 + +/* CPA operation command */ +#define CPA_PERF_CTRL_EN BIT_ULL(0) +#define CPA_EVTYPE_MASK 0xffUL +#define CPA_PM_CTRL BIT_ULL(9) + +/* CPA has 8-counters */ +#define CPA_NR_COUNTERS 0x8 +#define CPA_COUNTER_BITS 64 +#define CPA_NR_EVENTS 0xff +#define CPA_REG_OFFSET 0x8 + +static u32 hisi_cpa_pmu_get_counter_offset(int idx) +{ + return (CPA_CNT0_LOWER + idx * CPA_REG_OFFSET); +} + +static u64 hisi_cpa_pmu_read_counter(struct hisi_pmu *cpa_pmu, + struct hw_perf_event *hwc) +{ + return readq(cpa_pmu->base + hisi_cpa_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_cpa_pmu_write_counter(struct hisi_pmu *cpa_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, cpa_pmu->base + hisi_cpa_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_cpa_pmu_write_evtype(struct hisi_pmu *cpa_pmu, int idx, + u32 type) +{ + u32 reg, reg_idx, shift, val; + + /* + * Select the appropriate event select register(CPA_EVENT_TYPE0/1). + * There are 2 event select registers for the 8 hardware counters. + * Event code is 8-bits and for the former 4 hardware counters, + * CPA_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, + * CPA_EVENT_TYPE1 is chosen. + */ + reg = CPA_EVENT_TYPE0 + (idx / 4) * 4; + reg_idx = idx % 4; + shift = CPA_REG_OFFSET * reg_idx; + + /* Write event code to CPA_EVENT_TYPEx Register */ + val = readl(cpa_pmu->base + reg); + val &= ~(CPA_EVTYPE_MASK << shift); + val |= type << shift; + writel(val, cpa_pmu->base + reg); +} + +static void hisi_cpa_pmu_start_counters(struct hisi_pmu *cpa_pmu) +{ + u32 val; + + val = readl(cpa_pmu->base + CPA_PERF_CTRL); + val |= CPA_PERF_CTRL_EN; + writel(val, cpa_pmu->base + CPA_PERF_CTRL); +} + +static void hisi_cpa_pmu_stop_counters(struct hisi_pmu *cpa_pmu) +{ + u32 val; + + val = readl(cpa_pmu->base + CPA_PERF_CTRL); + val &= ~(CPA_PERF_CTRL_EN); + writel(val, cpa_pmu->base + CPA_PERF_CTRL); +} + +static void hisi_cpa_pmu_disable_pm(struct hisi_pmu *cpa_pmu) +{ + u32 val; + + val = readl(cpa_pmu->base + CPA_CFG_REG); + val |= CPA_PM_CTRL; + writel(val, cpa_pmu->base + CPA_CFG_REG); +} + +static void hisi_cpa_pmu_enable_pm(struct hisi_pmu *cpa_pmu) +{ + u32 val; + + val = readl(cpa_pmu->base + CPA_CFG_REG); + val &= ~(CPA_PM_CTRL); + writel(val, cpa_pmu->base + CPA_CFG_REG); +} + +static void hisi_cpa_pmu_enable_counter(struct hisi_pmu *cpa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Enable counter index in CPA_EVENT_CTRL register */ + val = readl(cpa_pmu->base + CPA_EVENT_CTRL); + val |= 1 << hwc->idx; + writel(val, cpa_pmu->base + CPA_EVENT_CTRL); +} + +static void hisi_cpa_pmu_disable_counter(struct hisi_pmu *cpa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index in CPA_EVENT_CTRL register */ + val = readl(cpa_pmu->base + CPA_EVENT_CTRL); + val &= ~(1UL << hwc->idx); + writel(val, cpa_pmu->base + CPA_EVENT_CTRL); +} + +static void hisi_cpa_pmu_enable_counter_int(struct hisi_pmu *cpa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 0 to enable interrupt */ + val = readl(cpa_pmu->base + CPA_INT_MASK); + val &= ~(1UL << hwc->idx); + writel(val, cpa_pmu->base + CPA_INT_MASK); +} + +static void hisi_cpa_pmu_disable_counter_int(struct hisi_pmu *cpa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 1 to mask interrupt */ + val = readl(cpa_pmu->base + CPA_INT_MASK); + val |= 1 << hwc->idx; + writel(val, cpa_pmu->base + CPA_INT_MASK); +} + +static u32 hisi_cpa_pmu_get_int_status(struct hisi_pmu *cpa_pmu) +{ + return readl(cpa_pmu->base + CPA_INT_STATUS); +} + +static void hisi_cpa_pmu_clear_int_status(struct hisi_pmu *cpa_pmu, int idx) +{ + writel(1 << idx, cpa_pmu->base + CPA_INT_CLEAR); +} + +static const struct acpi_device_id hisi_cpa_pmu_acpi_match[] = { + { "HISI0281", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match); + +static int hisi_cpa_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *cpa_pmu) +{ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &cpa_pmu->sicl_id)) { + dev_err(&pdev->dev, "Can not read sicl-id\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", + &cpa_pmu->index_id)) { + dev_err(&pdev->dev, "Cannot read idx-id\n"); + return -EINVAL; + } + + cpa_pmu->ccl_id = -1; + cpa_pmu->sccl_id = -1; + cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(cpa_pmu->base)) + return PTR_ERR(cpa_pmu->base); + + cpa_pmu->identifier = readl(cpa_pmu->base + CPA_VERSION); + + return 0; +} + +static struct attribute *hisi_cpa_pmu_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-15"), + NULL +}; + +static const struct attribute_group hisi_cpa_pmu_format_group = { + .name = "format", + .attrs = hisi_cpa_pmu_format_attr, +}; + +static struct attribute *hisi_cpa_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(cpa_cycles, 0x00), + HISI_PMU_EVENT_ATTR(cpa_p1_wr_dat, 0x61), + HISI_PMU_EVENT_ATTR(cpa_p1_rd_dat, 0x62), + HISI_PMU_EVENT_ATTR(cpa_p0_wr_dat, 0xE1), + HISI_PMU_EVENT_ATTR(cpa_p0_rd_dat, 0xE2), + NULL +}; + +static const struct attribute_group hisi_cpa_pmu_events_group = { + .name = "events", + .attrs = hisi_cpa_pmu_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = { + .attrs = hisi_cpa_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_cpa_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_cpa_pmu_identifier_attrs[] = { + &hisi_cpa_pmu_identifier_attr.attr, + NULL +}; + +static const struct attribute_group hisi_cpa_pmu_identifier_group = { + .attrs = hisi_cpa_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = { + &hisi_cpa_pmu_format_group, + &hisi_cpa_pmu_events_group, + &hisi_cpa_pmu_cpumask_attr_group, + &hisi_cpa_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_cpa_pmu_ops = { + .write_evtype = hisi_cpa_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_cpa_pmu_start_counters, + .stop_counters = hisi_cpa_pmu_stop_counters, + .enable_counter = hisi_cpa_pmu_enable_counter, + .disable_counter = hisi_cpa_pmu_disable_counter, + .enable_counter_int = hisi_cpa_pmu_enable_counter_int, + .disable_counter_int = hisi_cpa_pmu_disable_counter_int, + .write_counter = hisi_cpa_pmu_write_counter, + .read_counter = hisi_cpa_pmu_read_counter, + .get_int_status = hisi_cpa_pmu_get_int_status, + .clear_int_status = hisi_cpa_pmu_clear_int_status, +}; + +static int hisi_cpa_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *cpa_pmu) +{ + int ret; + + ret = hisi_cpa_pmu_init_data(pdev, cpa_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(cpa_pmu, pdev); + if (ret) + return ret; + + cpa_pmu->counter_bits = CPA_COUNTER_BITS; + cpa_pmu->check_event = CPA_NR_EVENTS; + cpa_pmu->pmu_events.attr_groups = hisi_cpa_pmu_attr_groups; + cpa_pmu->ops = &hisi_uncore_cpa_pmu_ops; + cpa_pmu->num_counters = CPA_NR_COUNTERS; + cpa_pmu->dev = &pdev->dev; + cpa_pmu->on_cpu = -1; + + return 0; +} + +static int hisi_cpa_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *cpa_pmu; + char *name; + int ret; + + cpa_pmu = devm_kzalloc(&pdev->dev, sizeof(*cpa_pmu), GFP_KERNEL); + if (!cpa_pmu) + return -ENOMEM; + + ret = hisi_cpa_pmu_dev_probe(pdev, cpa_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u", + cpa_pmu->sicl_id, cpa_pmu->index_id); + if (!name) + return -ENOMEM; + + cpa_pmu->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = cpa_pmu->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + /* Power Management should be disabled before using CPA PMU. */ + hisi_cpa_pmu_disable_pm(cpa_pmu); + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, + &cpa_pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); + hisi_cpa_pmu_enable_pm(cpa_pmu); + return ret; + } + + ret = perf_pmu_register(&cpa_pmu->pmu, name, -1); + if (ret) { + dev_err(cpa_pmu->dev, "PMU register failed\n"); + cpuhp_state_remove_instance_nocalls( + CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, &cpa_pmu->node); + hisi_cpa_pmu_enable_pm(cpa_pmu); + return ret; + } + + platform_set_drvdata(pdev, cpa_pmu); + return ret; +} + +static int hisi_cpa_pmu_remove(struct platform_device *pdev) +{ + struct hisi_pmu *cpa_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&cpa_pmu->pmu); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, + &cpa_pmu->node); + hisi_cpa_pmu_enable_pm(cpa_pmu); + return 0; +} + +static struct platform_driver hisi_cpa_pmu_driver = { + .driver = { + .name = "hisi_cpa_pmu", + .acpi_match_table = ACPI_PTR(hisi_cpa_pmu_acpi_match), + .suppress_bind_attrs = true, + }, + .probe = hisi_cpa_pmu_probe, + .remove = hisi_cpa_pmu_remove, +}; + +static int __init hisi_cpa_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, + "AP_PERF_ARM_HISI_CPA_ONLINE", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret) { + pr_err("setup hotplug failed: %d\n", ret); + return ret; + } + + ret = platform_driver_register(&hisi_cpa_pmu_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE); + + return ret; +} +module_init(hisi_cpa_pmu_module_init); + +static void __exit hisi_cpa_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_cpa_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE); +} +module_exit(hisi_cpa_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Qi Liu "); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 1b1ceb465c3d..a7024884f5b0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -164,6 +164,9 @@ enum cpuhp_state { CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, + #ifndef __GENKSYMS__ + CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, + #endif CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, -- Gitee From fcfb20cb91252f714f371817b6d176ec86adada1 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Mon, 21 Nov 2022 22:03:10 +0800 Subject: [PATCH 025/173] drivers/perf: hisi: Add description for HNS3 PMU driver [Upstream commit 39915b6b5fc209c9262b32ec754b3838550f2a1f] HNS3 PMU End Point device is supported on HiSilicon HIP09 platform, so add document hns3-pmu.rst to provide guidance on how to use it. Signed-off-by: Guangbin Huang Reviewed-by: John Garry Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20220628063419.38514-2-huangguangbin2@huawei.com Signed-off-by: Will Deacon Signed-off-by: Jiantao Xiao Signed-off-by: huwentao --- Documentation/admin-guide/perf/hns3-pmu.rst | 136 ++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 Documentation/admin-guide/perf/hns3-pmu.rst diff --git a/Documentation/admin-guide/perf/hns3-pmu.rst b/Documentation/admin-guide/perf/hns3-pmu.rst new file mode 100644 index 000000000000..578407e487d6 --- /dev/null +++ b/Documentation/admin-guide/perf/hns3-pmu.rst @@ -0,0 +1,136 @@ +====================================== +HNS3 Performance Monitoring Unit (PMU) +====================================== + +HNS3(HiSilicon network system 3) Performance Monitoring Unit (PMU) is an +End Point device to collect performance statistics of HiSilicon SoC NIC. +On Hip09, each SICL(Super I/O cluster) has one PMU device. + +HNS3 PMU supports collection of performance statistics such as bandwidth, +latency, packet rate and interrupt rate. + +Each HNS3 PMU supports 8 hardware events. + +HNS3 PMU driver +=============== + +The HNS3 PMU driver registers a perf PMU with the name of its sicl id.:: + + /sys/devices/hns3_pmu_sicl_ + +PMU driver provides description of available events, filter modes, format, +identifier and cpumask in sysfs. + +The "events" directory describes the event code of all supported events +shown in perf list. + +The "filtermode" directory describes the supported filter modes of each +event. + +The "format" directory describes all formats of the config (events) and +config1 (filter options) fields of the perf_event_attr structure. + +The "identifier" file shows version of PMU hardware device. + +The "bdf_min" and "bdf_max" files show the supported bdf range of each +pmu device. + +The "hw_clk_freq" file shows the hardware clock frequency of each pmu +device. + +Example usage of checking event code and subevent code:: + + $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time + config=0x00204 + $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num + config=0x10204 + +Each performance statistic has a pair of events to get two values to +calculate real performance data in userspace. + +The bits 0~15 of config (here 0x0204) are the true hardware event code. If +two events have same value of bits 0~15 of config, that means they are +event pair. And the bit 16 of config indicates getting counter 0 or +counter 1 of hardware event. + +After getting two values of event pair in usersapce, the formula of +computation to calculate real performance data is::: + + counter 0 / counter 1 + +Example usage of checking supported filter mode:: + + $# cat /sys/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num + filter mode supported: global/port/port-tc/func/func-queue/ + +Example usage of perf:: + + $# perf list + hns3_pmu_sicl_0/bw_ssu_rpu_byte_num/ [kernel PMU event] + hns3_pmu_sicl_0/bw_ssu_rpu_time/ [kernel PMU event] + ------------------------------------------ + + $# perf stat -g -e hns3_pmu_sicl_0/bw_ssu_rpu_byte_num,global=1/ -e hns3_pmu_sicl_0/bw_ssu_rpu_time,global=1/ -I 1000 + or + $# perf stat -g -e hns3_pmu_sicl_0/config=0x00002,global=1/ -e hns3_pmu_sicl_0/config=0x10002,global=1/ -I 1000 + + +Filter modes +-------------- + +1. global mode +PMU collect performance statistics for all HNS3 PCIe functions of IO DIE. +Set the "global" filter option to 1 will enable this mode. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,global=1/ -I 1000 + +2. port mode +PMU collect performance statistic of one whole physical port. The port id +is same as mac id. The "tc" filter option must be set to 0xF in this mode, +here tc stands for traffic class. + +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0xF/ -I 1000 + +3. port-tc mode +PMU collect performance statistic of one tc of physical port. The port id +is same as mac id. The "tc" filter option must be set to 0 ~ 7 in this +mode. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,port=0,tc=0/ -I 1000 + +4. func mode +PMU collect performance statistic of one PF/VF. The function id is BDF of +PF/VF, its conversion formula:: + + func = (bus << 8) + (device << 3) + (function) + +for example: + BDF func + 35:00.0 0x3500 + 35:00.1 0x3501 + 35:01.0 0x3508 + +In this mode, the "queue" filter option must be set to 0xFFFF. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0xFFFF/ -I 1000 + +5. func-queue mode +PMU collect performance statistic of one queue of PF/VF. The function id +is BDF of PF/VF, the "queue" filter option must be set to the exact queue +id of function. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x1020F,bdf=0x3500,queue=0/ -I 1000 + +6. func-intr mode +PMU collect performance statistic of one interrupt of PF/VF. The function +id is BDF of PF/VF, the "intr" filter option must be set to the exact +interrupt id of function. +Example usage of perf:: + + $# perf stat -a -e hns3_pmu_sicl_0/config=0x00301,bdf=0x3500,intr=0/ -I 1000 -- Gitee From 7d1e71bc072ae4c895bc33468abef55369470d9d Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Mon, 21 Nov 2022 22:03:11 +0800 Subject: [PATCH 026/173] drivers/perf: hisi: add driver for HNS3 PMU [Upstream commit 66637ab137b44914356a9dc7a9b3f8ebcf0b0695] HNS3(HiSilicon Network System 3) PMU is RCiEP device in HiSilicon SoC NIC, supports collection of performance statistics such as bandwidth, latency, packet rate and interrupt rate. NIC of each SICL has one PMU device for it. Driver registers each PMU device to perf, and exports information of supported events, filter mode of each event, bdf range, hardware clock frequency, identifier and so on via sysfs. Each PMU device has its own registers of control, counters and interrupt, and it supports 8 hardware events, each hardward event has its own registers for configuration, counters and interrupt. Filter options contains: config - select event port - select physical port of nic tc - select tc(must be used with port) func - select PF/VF queue - select queue of PF/VF(must be used with func) intr - select interrupt number(must be used with func) global - select all functions of IO DIE Signed-off-by: Guangbin Huang Reviewed-by: John Garry Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20220628063419.38514-3-huangguangbin2@huawei.com Signed-off-by: Will Deacon Signed-off-by: Jiantao Xiao Signed-off-by: Jiantao Xiao Signed-off-by: huwentao --- MAINTAINERS | 6 + drivers/perf/hisilicon/Kconfig | 10 + drivers/perf/hisilicon/Makefile | 1 + drivers/perf/hisilicon/hns3_pmu.c | 1672 +++++++++++++++++++++++++++++ 4 files changed, 1689 insertions(+) create mode 100644 drivers/perf/hisilicon/hns3_pmu.c diff --git a/MAINTAINERS b/MAINTAINERS index a162f8446651..e49f89bbcf20 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7483,6 +7483,12 @@ W: http://www.hisilicon.com S: Maintained F: drivers/spi/spi-hisi-sfc-v3xx.c +HISILICON HNS3 PMU DRIVER +M: Guangbin Huang +S: Supported +F: Documentation/admin-guide/perf/hns3-pmu.rst +F: drivers/perf/hisilicon/hns3_pmu.c + HISILICON QM AND ZIP Controller DRIVER M: Zhou Wang L: linux-crypto@vger.kernel.org diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig index 5546218b5598..171bfc1b6bc2 100644 --- a/drivers/perf/hisilicon/Kconfig +++ b/drivers/perf/hisilicon/Kconfig @@ -14,3 +14,13 @@ config HISI_PCIE_PMU RCiEP devices. Adds the PCIe PMU into perf events system for monitoring latency, bandwidth etc. + +config HNS3_PMU + tristate "HNS3 PERF PMU" + depends on ARM64 || COMPILE_TEST + depends on PCI + help + Provide support for HNS3 performance monitoring unit (PMU) RCiEP + devices. + Adds the HNS3 PMU into perf events system for monitoring latency, + bandwidth etc. diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index a3522abb3975..a35705795dfc 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_lpddrc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o +obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c new file mode 100644 index 000000000000..b9d201ff2e63 --- /dev/null +++ b/drivers/perf/hisilicon/hns3_pmu.c @@ -0,0 +1,1672 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This driver adds support for HNS3 PMU iEP device. Related perf events are + * bandwidth, latency, packet rate, interrupt rate etc. + * + * Copyright (C) 2022 HiSilicon Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Dynamic CPU hotplug state used by HNS3 PMU */ +static enum cpuhp_state hns3_pmu_online; + +/* registers offset address */ +#define HNS3_PMU_REG_GLOBAL_CTRL 0x0000 +#define HNS3_PMU_REG_CLOCK_FREQ 0x0020 +#define HNS3_PMU_REG_BDF 0x0fe0 +#define HNS3_PMU_REG_VERSION 0x0fe4 +#define HNS3_PMU_REG_DEVICE_ID 0x0fe8 + +#define HNS3_PMU_REG_EVENT_OFFSET 0x1000 +#define HNS3_PMU_REG_EVENT_SIZE 0x1000 +#define HNS3_PMU_REG_EVENT_CTRL_LOW 0x00 +#define HNS3_PMU_REG_EVENT_CTRL_HIGH 0x04 +#define HNS3_PMU_REG_EVENT_INTR_STATUS 0x08 +#define HNS3_PMU_REG_EVENT_INTR_MASK 0x0c +#define HNS3_PMU_REG_EVENT_COUNTER 0x10 +#define HNS3_PMU_REG_EVENT_EXT_COUNTER 0x18 +#define HNS3_PMU_REG_EVENT_QID_CTRL 0x28 +#define HNS3_PMU_REG_EVENT_QID_PARA 0x2c + +#define HNS3_PMU_FILTER_SUPPORT_GLOBAL BIT(0) +#define HNS3_PMU_FILTER_SUPPORT_PORT BIT(1) +#define HNS3_PMU_FILTER_SUPPORT_PORT_TC BIT(2) +#define HNS3_PMU_FILTER_SUPPORT_FUNC BIT(3) +#define HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE BIT(4) +#define HNS3_PMU_FILTER_SUPPORT_FUNC_INTR BIT(5) + +#define HNS3_PMU_FILTER_ALL_TC 0xf +#define HNS3_PMU_FILTER_ALL_QUEUE 0xffff + +#define HNS3_PMU_CTRL_SUBEVENT_S 4 +#define HNS3_PMU_CTRL_FILTER_MODE_S 24 + +#define HNS3_PMU_GLOBAL_START BIT(0) + +#define HNS3_PMU_EVENT_STATUS_RESET BIT(11) +#define HNS3_PMU_EVENT_EN BIT(12) +#define HNS3_PMU_EVENT_OVERFLOW_RESTART BIT(15) + +#define HNS3_PMU_QID_PARA_FUNC_S 0 +#define HNS3_PMU_QID_PARA_QUEUE_S 16 + +#define HNS3_PMU_QID_CTRL_REQ_ENABLE BIT(0) +#define HNS3_PMU_QID_CTRL_DONE BIT(1) +#define HNS3_PMU_QID_CTRL_MISS BIT(2) + +#define HNS3_PMU_INTR_MASK_OVERFLOW BIT(1) + +#define HNS3_PMU_MAX_HW_EVENTS 8 + +/* + * Each hardware event contains two registers (counter and ext_counter) for + * bandwidth, packet rate, latency and interrupt rate. These two registers will + * be triggered to run at the same when a hardware event is enabled. The meaning + * of counter and ext_counter of different event type are different, their + * meaning show as follow: + * + * +----------------+------------------+---------------+ + * | event type | counter | ext_counter | + * +----------------+------------------+---------------+ + * | bandwidth | byte number | cycle number | + * +----------------+------------------+---------------+ + * | packet rate | packet number | cycle number | + * +----------------+------------------+---------------+ + * | latency | cycle number | packet number | + * +----------------+------------------+---------------+ + * | interrupt rate | interrupt number | cycle number | + * +----------------+------------------+---------------+ + * + * The cycle number indicates increment of counter of hardware timer, the + * frequency of hardware timer can be read from hw_clk_freq file. + * + * Performance of each hardware event is calculated by: counter / ext_counter. + * + * Since processing of data is preferred to be done in userspace, we expose + * ext_counter as a separate event for userspace and use bit 16 to indicate it. + * For example, event 0x00001 and 0x10001 are actually one event for hardware + * because bit 0-15 are same. If the bit 16 of one event is 0 means to read + * counter register, otherwise means to read ext_counter register. + */ +/* bandwidth events */ +#define HNS3_PMU_EVT_BW_SSU_EGU_BYTE_NUM 0x00001 +#define HNS3_PMU_EVT_BW_SSU_EGU_TIME 0x10001 +#define HNS3_PMU_EVT_BW_SSU_RPU_BYTE_NUM 0x00002 +#define HNS3_PMU_EVT_BW_SSU_RPU_TIME 0x10002 +#define HNS3_PMU_EVT_BW_SSU_ROCE_BYTE_NUM 0x00003 +#define HNS3_PMU_EVT_BW_SSU_ROCE_TIME 0x10003 +#define HNS3_PMU_EVT_BW_ROCE_SSU_BYTE_NUM 0x00004 +#define HNS3_PMU_EVT_BW_ROCE_SSU_TIME 0x10004 +#define HNS3_PMU_EVT_BW_TPU_SSU_BYTE_NUM 0x00005 +#define HNS3_PMU_EVT_BW_TPU_SSU_TIME 0x10005 +#define HNS3_PMU_EVT_BW_RPU_RCBRX_BYTE_NUM 0x00006 +#define HNS3_PMU_EVT_BW_RPU_RCBRX_TIME 0x10006 +#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_BYTE_NUM 0x00008 +#define HNS3_PMU_EVT_BW_RCBTX_TXSCH_TIME 0x10008 +#define HNS3_PMU_EVT_BW_WR_FBD_BYTE_NUM 0x00009 +#define HNS3_PMU_EVT_BW_WR_FBD_TIME 0x10009 +#define HNS3_PMU_EVT_BW_WR_EBD_BYTE_NUM 0x0000a +#define HNS3_PMU_EVT_BW_WR_EBD_TIME 0x1000a +#define HNS3_PMU_EVT_BW_RD_FBD_BYTE_NUM 0x0000b +#define HNS3_PMU_EVT_BW_RD_FBD_TIME 0x1000b +#define HNS3_PMU_EVT_BW_RD_EBD_BYTE_NUM 0x0000c +#define HNS3_PMU_EVT_BW_RD_EBD_TIME 0x1000c +#define HNS3_PMU_EVT_BW_RD_PAY_M0_BYTE_NUM 0x0000d +#define HNS3_PMU_EVT_BW_RD_PAY_M0_TIME 0x1000d +#define HNS3_PMU_EVT_BW_RD_PAY_M1_BYTE_NUM 0x0000e +#define HNS3_PMU_EVT_BW_RD_PAY_M1_TIME 0x1000e +#define HNS3_PMU_EVT_BW_WR_PAY_M0_BYTE_NUM 0x0000f +#define HNS3_PMU_EVT_BW_WR_PAY_M0_TIME 0x1000f +#define HNS3_PMU_EVT_BW_WR_PAY_M1_BYTE_NUM 0x00010 +#define HNS3_PMU_EVT_BW_WR_PAY_M1_TIME 0x10010 + +/* packet rate events */ +#define HNS3_PMU_EVT_PPS_IGU_SSU_PACKET_NUM 0x00100 +#define HNS3_PMU_EVT_PPS_IGU_SSU_TIME 0x10100 +#define HNS3_PMU_EVT_PPS_SSU_EGU_PACKET_NUM 0x00101 +#define HNS3_PMU_EVT_PPS_SSU_EGU_TIME 0x10101 +#define HNS3_PMU_EVT_PPS_SSU_RPU_PACKET_NUM 0x00102 +#define HNS3_PMU_EVT_PPS_SSU_RPU_TIME 0x10102 +#define HNS3_PMU_EVT_PPS_SSU_ROCE_PACKET_NUM 0x00103 +#define HNS3_PMU_EVT_PPS_SSU_ROCE_TIME 0x10103 +#define HNS3_PMU_EVT_PPS_ROCE_SSU_PACKET_NUM 0x00104 +#define HNS3_PMU_EVT_PPS_ROCE_SSU_TIME 0x10104 +#define HNS3_PMU_EVT_PPS_TPU_SSU_PACKET_NUM 0x00105 +#define HNS3_PMU_EVT_PPS_TPU_SSU_TIME 0x10105 +#define HNS3_PMU_EVT_PPS_RPU_RCBRX_PACKET_NUM 0x00106 +#define HNS3_PMU_EVT_PPS_RPU_RCBRX_TIME 0x10106 +#define HNS3_PMU_EVT_PPS_RCBTX_TPU_PACKET_NUM 0x00107 +#define HNS3_PMU_EVT_PPS_RCBTX_TPU_TIME 0x10107 +#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_PACKET_NUM 0x00108 +#define HNS3_PMU_EVT_PPS_RCBTX_TXSCH_TIME 0x10108 +#define HNS3_PMU_EVT_PPS_WR_FBD_PACKET_NUM 0x00109 +#define HNS3_PMU_EVT_PPS_WR_FBD_TIME 0x10109 +#define HNS3_PMU_EVT_PPS_WR_EBD_PACKET_NUM 0x0010a +#define HNS3_PMU_EVT_PPS_WR_EBD_TIME 0x1010a +#define HNS3_PMU_EVT_PPS_RD_FBD_PACKET_NUM 0x0010b +#define HNS3_PMU_EVT_PPS_RD_FBD_TIME 0x1010b +#define HNS3_PMU_EVT_PPS_RD_EBD_PACKET_NUM 0x0010c +#define HNS3_PMU_EVT_PPS_RD_EBD_TIME 0x1010c +#define HNS3_PMU_EVT_PPS_RD_PAY_M0_PACKET_NUM 0x0010d +#define HNS3_PMU_EVT_PPS_RD_PAY_M0_TIME 0x1010d +#define HNS3_PMU_EVT_PPS_RD_PAY_M1_PACKET_NUM 0x0010e +#define HNS3_PMU_EVT_PPS_RD_PAY_M1_TIME 0x1010e +#define HNS3_PMU_EVT_PPS_WR_PAY_M0_PACKET_NUM 0x0010f +#define HNS3_PMU_EVT_PPS_WR_PAY_M0_TIME 0x1010f +#define HNS3_PMU_EVT_PPS_WR_PAY_M1_PACKET_NUM 0x00110 +#define HNS3_PMU_EVT_PPS_WR_PAY_M1_TIME 0x10110 +#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_PACKET_NUM 0x00111 +#define HNS3_PMU_EVT_PPS_NICROH_TX_PRE_TIME 0x10111 +#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_PACKET_NUM 0x00112 +#define HNS3_PMU_EVT_PPS_NICROH_RX_PRE_TIME 0x10112 + +/* latency events */ +#define HNS3_PMU_EVT_DLY_TX_PUSH_TIME 0x00202 +#define HNS3_PMU_EVT_DLY_TX_PUSH_PACKET_NUM 0x10202 +#define HNS3_PMU_EVT_DLY_TX_TIME 0x00204 +#define HNS3_PMU_EVT_DLY_TX_PACKET_NUM 0x10204 +#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_TIME 0x00206 +#define HNS3_PMU_EVT_DLY_SSU_TX_NIC_PACKET_NUM 0x10206 +#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_TIME 0x00207 +#define HNS3_PMU_EVT_DLY_SSU_TX_ROCE_PACKET_NUM 0x10207 +#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_TIME 0x00208 +#define HNS3_PMU_EVT_DLY_SSU_RX_NIC_PACKET_NUM 0x10208 +#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_TIME 0x00209 +#define HNS3_PMU_EVT_DLY_SSU_RX_ROCE_PACKET_NUM 0x10209 +#define HNS3_PMU_EVT_DLY_RPU_TIME 0x0020e +#define HNS3_PMU_EVT_DLY_RPU_PACKET_NUM 0x1020e +#define HNS3_PMU_EVT_DLY_TPU_TIME 0x0020f +#define HNS3_PMU_EVT_DLY_TPU_PACKET_NUM 0x1020f +#define HNS3_PMU_EVT_DLY_RPE_TIME 0x00210 +#define HNS3_PMU_EVT_DLY_RPE_PACKET_NUM 0x10210 +#define HNS3_PMU_EVT_DLY_TPE_TIME 0x00211 +#define HNS3_PMU_EVT_DLY_TPE_PACKET_NUM 0x10211 +#define HNS3_PMU_EVT_DLY_TPE_PUSH_TIME 0x00212 +#define HNS3_PMU_EVT_DLY_TPE_PUSH_PACKET_NUM 0x10212 +#define HNS3_PMU_EVT_DLY_WR_FBD_TIME 0x00213 +#define HNS3_PMU_EVT_DLY_WR_FBD_PACKET_NUM 0x10213 +#define HNS3_PMU_EVT_DLY_WR_EBD_TIME 0x00214 +#define HNS3_PMU_EVT_DLY_WR_EBD_PACKET_NUM 0x10214 +#define HNS3_PMU_EVT_DLY_RD_FBD_TIME 0x00215 +#define HNS3_PMU_EVT_DLY_RD_FBD_PACKET_NUM 0x10215 +#define HNS3_PMU_EVT_DLY_RD_EBD_TIME 0x00216 +#define HNS3_PMU_EVT_DLY_RD_EBD_PACKET_NUM 0x10216 +#define HNS3_PMU_EVT_DLY_RD_PAY_M0_TIME 0x00217 +#define HNS3_PMU_EVT_DLY_RD_PAY_M0_PACKET_NUM 0x10217 +#define HNS3_PMU_EVT_DLY_RD_PAY_M1_TIME 0x00218 +#define HNS3_PMU_EVT_DLY_RD_PAY_M1_PACKET_NUM 0x10218 +#define HNS3_PMU_EVT_DLY_WR_PAY_M0_TIME 0x00219 +#define HNS3_PMU_EVT_DLY_WR_PAY_M0_PACKET_NUM 0x10219 +#define HNS3_PMU_EVT_DLY_WR_PAY_M1_TIME 0x0021a +#define HNS3_PMU_EVT_DLY_WR_PAY_M1_PACKET_NUM 0x1021a +#define HNS3_PMU_EVT_DLY_MSIX_WRITE_TIME 0x0021c +#define HNS3_PMU_EVT_DLY_MSIX_WRITE_PACKET_NUM 0x1021c + +/* interrupt rate events */ +#define HNS3_PMU_EVT_PPS_MSIX_NIC_INTR_NUM 0x00300 +#define HNS3_PMU_EVT_PPS_MSIX_NIC_TIME 0x10300 + +/* filter mode supported by each bandwidth event */ +#define HNS3_PMU_FILTER_BW_SSU_EGU 0x07 +#define HNS3_PMU_FILTER_BW_SSU_RPU 0x1f +#define HNS3_PMU_FILTER_BW_SSU_ROCE 0x0f +#define HNS3_PMU_FILTER_BW_ROCE_SSU 0x0f +#define HNS3_PMU_FILTER_BW_TPU_SSU 0x1f +#define HNS3_PMU_FILTER_BW_RPU_RCBRX 0x11 +#define HNS3_PMU_FILTER_BW_RCBTX_TXSCH 0x11 +#define HNS3_PMU_FILTER_BW_WR_FBD 0x1b +#define HNS3_PMU_FILTER_BW_WR_EBD 0x11 +#define HNS3_PMU_FILTER_BW_RD_FBD 0x01 +#define HNS3_PMU_FILTER_BW_RD_EBD 0x1b +#define HNS3_PMU_FILTER_BW_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_BW_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_BW_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_BW_WR_PAY_M1 0x01 + +/* filter mode supported by each packet rate event */ +#define HNS3_PMU_FILTER_PPS_IGU_SSU 0x07 +#define HNS3_PMU_FILTER_PPS_SSU_EGU 0x07 +#define HNS3_PMU_FILTER_PPS_SSU_RPU 0x1f +#define HNS3_PMU_FILTER_PPS_SSU_ROCE 0x0f +#define HNS3_PMU_FILTER_PPS_ROCE_SSU 0x0f +#define HNS3_PMU_FILTER_PPS_TPU_SSU 0x1f +#define HNS3_PMU_FILTER_PPS_RPU_RCBRX 0x11 +#define HNS3_PMU_FILTER_PPS_RCBTX_TPU 0x1f +#define HNS3_PMU_FILTER_PPS_RCBTX_TXSCH 0x11 +#define HNS3_PMU_FILTER_PPS_WR_FBD 0x1b +#define HNS3_PMU_FILTER_PPS_WR_EBD 0x11 +#define HNS3_PMU_FILTER_PPS_RD_FBD 0x01 +#define HNS3_PMU_FILTER_PPS_RD_EBD 0x1b +#define HNS3_PMU_FILTER_PPS_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_PPS_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_PPS_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_PPS_WR_PAY_M1 0x01 +#define HNS3_PMU_FILTER_PPS_NICROH_TX_PRE 0x01 +#define HNS3_PMU_FILTER_PPS_NICROH_RX_PRE 0x01 + +/* filter mode supported by each latency event */ +#define HNS3_PMU_FILTER_DLY_TX_PUSH 0x01 +#define HNS3_PMU_FILTER_DLY_TX 0x01 +#define HNS3_PMU_FILTER_DLY_SSU_TX_NIC 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_TX_ROCE 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_RX_NIC 0x07 +#define HNS3_PMU_FILTER_DLY_SSU_RX_ROCE 0x07 +#define HNS3_PMU_FILTER_DLY_RPU 0x11 +#define HNS3_PMU_FILTER_DLY_TPU 0x1f +#define HNS3_PMU_FILTER_DLY_RPE 0x01 +#define HNS3_PMU_FILTER_DLY_TPE 0x0b +#define HNS3_PMU_FILTER_DLY_TPE_PUSH 0x1b +#define HNS3_PMU_FILTER_DLY_WR_FBD 0x1b +#define HNS3_PMU_FILTER_DLY_WR_EBD 0x11 +#define HNS3_PMU_FILTER_DLY_RD_FBD 0x01 +#define HNS3_PMU_FILTER_DLY_RD_EBD 0x1b +#define HNS3_PMU_FILTER_DLY_RD_PAY_M0 0x01 +#define HNS3_PMU_FILTER_DLY_RD_PAY_M1 0x01 +#define HNS3_PMU_FILTER_DLY_WR_PAY_M0 0x01 +#define HNS3_PMU_FILTER_DLY_WR_PAY_M1 0x01 +#define HNS3_PMU_FILTER_DLY_MSIX_WRITE 0x01 + +/* filter mode supported by each interrupt rate event */ +#define HNS3_PMU_FILTER_INTR_MSIX_NIC 0x01 + +enum hns3_pmu_hw_filter_mode { + HNS3_PMU_HW_FILTER_GLOBAL, + HNS3_PMU_HW_FILTER_PORT, + HNS3_PMU_HW_FILTER_PORT_TC, + HNS3_PMU_HW_FILTER_FUNC, + HNS3_PMU_HW_FILTER_FUNC_QUEUE, + HNS3_PMU_HW_FILTER_FUNC_INTR, +}; + +struct hns3_pmu_event_attr { + u32 event; + u16 filter_support; +}; + +struct hns3_pmu { + struct perf_event *hw_events[HNS3_PMU_MAX_HW_EVENTS]; + struct hlist_node node; + struct pci_dev *pdev; + struct pmu pmu; + void __iomem *base; + int irq; + int on_cpu; + u32 identifier; + u32 hw_clk_freq; /* hardware clock frequency of PMU */ + /* maximum and minimum bdf allowed by PMU */ + u16 bdf_min; + u16 bdf_max; +}; + +#define to_hns3_pmu(p) (container_of((p), struct hns3_pmu, pmu)) + +#define GET_PCI_DEVFN(bdf) ((bdf) & 0xff) + +#define FILTER_CONDITION_PORT(port) ((1 << (port)) & 0xff) +#define FILTER_CONDITION_PORT_TC(port, tc) (((port) << 3) | ((tc) & 0x07)) +#define FILTER_CONDITION_FUNC_INTR(func, intr) (((intr) << 8) | (func)) + +#define HNS3_PMU_FILTER_ATTR(_name, _config, _start, _end) \ + static inline u64 hns3_pmu_get_##_name(struct perf_event *event) \ + { \ + return FIELD_GET(GENMASK_ULL(_end, _start), \ + event->attr._config); \ + } + +HNS3_PMU_FILTER_ATTR(subevent, config, 0, 7); +HNS3_PMU_FILTER_ATTR(event_type, config, 8, 15); +HNS3_PMU_FILTER_ATTR(ext_counter_used, config, 16, 16); +HNS3_PMU_FILTER_ATTR(port, config1, 0, 3); +HNS3_PMU_FILTER_ATTR(tc, config1, 4, 7); +HNS3_PMU_FILTER_ATTR(bdf, config1, 8, 23); +HNS3_PMU_FILTER_ATTR(queue, config1, 24, 39); +HNS3_PMU_FILTER_ATTR(intr, config1, 40, 51); +HNS3_PMU_FILTER_ATTR(global, config1, 52, 52); + +#define HNS3_BW_EVT_BYTE_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_BW_##_name##_BYTE_NUM, \ + HNS3_PMU_FILTER_BW_##_name}) +#define HNS3_BW_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_BW_##_name##_TIME, \ + HNS3_PMU_FILTER_BW_##_name}) +#define HNS3_PPS_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_PACKET_NUM, \ + HNS3_PMU_FILTER_PPS_##_name}) +#define HNS3_PPS_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_TIME, \ + HNS3_PMU_FILTER_PPS_##_name}) +#define HNS3_DLY_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_DLY_##_name##_TIME, \ + HNS3_PMU_FILTER_DLY_##_name}) +#define HNS3_DLY_EVT_PACKET_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_DLY_##_name##_PACKET_NUM, \ + HNS3_PMU_FILTER_DLY_##_name}) +#define HNS3_INTR_EVT_INTR_NUM(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_INTR_NUM, \ + HNS3_PMU_FILTER_INTR_##_name}) +#define HNS3_INTR_EVT_TIME(_name) (&(struct hns3_pmu_event_attr) {\ + HNS3_PMU_EVT_PPS_##_name##_TIME, \ + HNS3_PMU_FILTER_INTR_##_name}) + +static ssize_t hns3_pmu_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + + return sysfs_emit(buf, "%s\n", (char *)eattr->var); +} + +static ssize_t hns3_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu_event_attr *event; + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + event = eattr->var; + + return sysfs_emit(buf, "config=0x%x\n", event->event); +} + +static ssize_t hns3_pmu_filter_mode_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu_event_attr *event; + struct dev_ext_attribute *eattr; + int len; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + event = eattr->var; + + len = sysfs_emit_at(buf, 0, "filter mode supported: "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL) + len += sysfs_emit_at(buf, len, "global "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT) + len += sysfs_emit_at(buf, len, "port "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC) + len += sysfs_emit_at(buf, len, "port-tc "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC) + len += sysfs_emit_at(buf, len, "func "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE) + len += sysfs_emit_at(buf, len, "func-queue "); + if (event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR) + len += sysfs_emit_at(buf, len, "func-intr "); + + len += sysfs_emit_at(buf, len, "\n"); + + return len; +} + +#define HNS3_PMU_ATTR(_name, _func, _config) \ + (&((struct dev_ext_attribute[]) { \ + { __ATTR(_name, 0444, _func, NULL), (void *)_config } \ + })[0].attr.attr) + +#define HNS3_PMU_FORMAT_ATTR(_name, _format) \ + HNS3_PMU_ATTR(_name, hns3_pmu_format_show, (void *)_format) +#define HNS3_PMU_EVENT_ATTR(_name, _event) \ + HNS3_PMU_ATTR(_name, hns3_pmu_event_show, (void *)_event) +#define HNS3_PMU_FLT_MODE_ATTR(_name, _event) \ + HNS3_PMU_ATTR(_name, hns3_pmu_filter_mode_show, (void *)_event) + +#define HNS3_PMU_BW_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro)) +#define HNS3_PMU_PPS_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro)) +#define HNS3_PMU_DLY_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro)) +#define HNS3_PMU_INTR_EVT_PAIR(_name, _macro) \ + HNS3_PMU_EVENT_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \ + HNS3_PMU_EVENT_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro)) + +#define HNS3_PMU_BW_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_byte_num, HNS3_BW_EVT_BYTE_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_BW_EVT_TIME(_macro)) +#define HNS3_PMU_PPS_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_PPS_EVT_PACKET_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_PPS_EVT_TIME(_macro)) +#define HNS3_PMU_DLY_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_DLY_EVT_TIME(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_packet_num, HNS3_DLY_EVT_PACKET_NUM(_macro)) +#define HNS3_PMU_INTR_FLT_MODE_PAIR(_name, _macro) \ + HNS3_PMU_FLT_MODE_ATTR(_name##_intr_num, HNS3_INTR_EVT_INTR_NUM(_macro)), \ + HNS3_PMU_FLT_MODE_ATTR(_name##_time, HNS3_INTR_EVT_TIME(_macro)) + +static u8 hns3_pmu_hw_filter_modes[] = { + HNS3_PMU_HW_FILTER_GLOBAL, + HNS3_PMU_HW_FILTER_PORT, + HNS3_PMU_HW_FILTER_PORT_TC, + HNS3_PMU_HW_FILTER_FUNC, + HNS3_PMU_HW_FILTER_FUNC_QUEUE, + HNS3_PMU_HW_FILTER_FUNC_INTR, +}; + +#define HNS3_PMU_SET_HW_FILTER(_hwc, _mode) \ + ((_hwc)->addr_filters = (void *)&hns3_pmu_hw_filter_modes[(_mode)]) + +static ssize_t identifier_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "0x%x\n", hns3_pmu->identifier); +} +static DEVICE_ATTR_RO(identifier); + +static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%d\n", hns3_pmu->on_cpu); +} +static DEVICE_ATTR_RO(cpumask); + +static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + u16 bdf = hns3_pmu->bdf_min; + + return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf), + PCI_SLOT(bdf), PCI_FUNC(bdf)); +} +static DEVICE_ATTR_RO(bdf_min); + +static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + u16 bdf = hns3_pmu->bdf_max; + + return sysfs_emit(buf, "%02x:%02x.%x\n", PCI_BUS_NUM(bdf), + PCI_SLOT(bdf), PCI_FUNC(bdf)); +} +static DEVICE_ATTR_RO(bdf_max); + +static ssize_t hw_clk_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%u\n", hns3_pmu->hw_clk_freq); +} +static DEVICE_ATTR_RO(hw_clk_freq); + +static struct attribute *hns3_pmu_events_attr[] = { + /* bandwidth events */ + HNS3_PMU_BW_EVT_PAIR(bw_ssu_egu, SSU_EGU), + HNS3_PMU_BW_EVT_PAIR(bw_ssu_rpu, SSU_RPU), + HNS3_PMU_BW_EVT_PAIR(bw_ssu_roce, SSU_ROCE), + HNS3_PMU_BW_EVT_PAIR(bw_roce_ssu, ROCE_SSU), + HNS3_PMU_BW_EVT_PAIR(bw_tpu_ssu, TPU_SSU), + HNS3_PMU_BW_EVT_PAIR(bw_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_BW_EVT_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_BW_EVT_PAIR(bw_wr_fbd, WR_FBD), + HNS3_PMU_BW_EVT_PAIR(bw_wr_ebd, WR_EBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_fbd, RD_FBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_ebd, RD_EBD), + HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_BW_EVT_PAIR(bw_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_BW_EVT_PAIR(bw_wr_pay_m1, WR_PAY_M1), + + /* packet rate events */ + HNS3_PMU_PPS_EVT_PAIR(pps_igu_ssu, IGU_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_egu, SSU_EGU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_rpu, SSU_RPU), + HNS3_PMU_PPS_EVT_PAIR(pps_ssu_roce, SSU_ROCE), + HNS3_PMU_PPS_EVT_PAIR(pps_roce_ssu, ROCE_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_tpu_ssu, TPU_SSU), + HNS3_PMU_PPS_EVT_PAIR(pps_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_tpu, RCBTX_TPU), + HNS3_PMU_PPS_EVT_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_fbd, WR_FBD), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_ebd, WR_EBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_fbd, RD_FBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_ebd, RD_EBD), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_PPS_EVT_PAIR(pps_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_PPS_EVT_PAIR(pps_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE), + HNS3_PMU_PPS_EVT_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE), + + /* latency events */ + HNS3_PMU_DLY_EVT_PAIR(dly_tx_push_to_mac, TX_PUSH), + HNS3_PMU_DLY_EVT_PAIR(dly_tx_normal_to_mac, TX), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC), + HNS3_PMU_DLY_EVT_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE), + HNS3_PMU_DLY_EVT_PAIR(dly_rpu, RPU), + HNS3_PMU_DLY_EVT_PAIR(dly_tpu, TPU), + HNS3_PMU_DLY_EVT_PAIR(dly_rpe, RPE), + HNS3_PMU_DLY_EVT_PAIR(dly_tpe_normal, TPE), + HNS3_PMU_DLY_EVT_PAIR(dly_tpe_push, TPE_PUSH), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_fbd, WR_FBD), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_ebd, WR_EBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_fbd, RD_FBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_ebd, RD_EBD), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_DLY_EVT_PAIR(dly_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_DLY_EVT_PAIR(dly_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_DLY_EVT_PAIR(dly_msix_write, MSIX_WRITE), + + /* interrupt rate events */ + HNS3_PMU_INTR_EVT_PAIR(pps_intr_msix_nic, MSIX_NIC), + + NULL +}; + +static struct attribute *hns3_pmu_filter_mode_attr[] = { + /* bandwidth events */ + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_egu, SSU_EGU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_rpu, SSU_RPU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_ssu_roce, SSU_ROCE), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_roce_ssu, ROCE_SSU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_tpu_ssu, TPU_SSU), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_fbd, WR_FBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_ebd, WR_EBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_fbd, RD_FBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_ebd, RD_EBD), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_BW_FLT_MODE_PAIR(bw_wr_pay_m1, WR_PAY_M1), + + /* packet rate events */ + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_igu_ssu, IGU_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_egu, SSU_EGU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_rpu, SSU_RPU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_ssu_roce, SSU_ROCE), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_roce_ssu, ROCE_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_tpu_ssu, TPU_SSU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rpu_rcbrx, RPU_RCBRX), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_tpu, RCBTX_TPU), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rcbtx_txsch, RCBTX_TXSCH), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_fbd, WR_FBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_ebd, WR_EBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_fbd, RD_FBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_ebd, RD_EBD), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_tx_pre, NICROH_TX_PRE), + HNS3_PMU_PPS_FLT_MODE_PAIR(pps_intr_nicroh_rx_pre, NICROH_RX_PRE), + + /* latency events */ + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_push_to_mac, TX_PUSH), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tx_normal_to_mac, TX), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_nic, SSU_TX_NIC), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_tx_th_roce, SSU_TX_ROCE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_nic, SSU_RX_NIC), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_ssu_rx_th_roce, SSU_RX_ROCE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpu, RPU), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpu, TPU), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rpe, RPE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_normal, TPE), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_tpe_push, TPE_PUSH), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_fbd, WR_FBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_ebd, WR_EBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_fbd, RD_FBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_ebd, RD_EBD), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m0, RD_PAY_M0), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_rd_pay_m1, RD_PAY_M1), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m0, WR_PAY_M0), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_wr_pay_m1, WR_PAY_M1), + HNS3_PMU_DLY_FLT_MODE_PAIR(dly_msix_write, MSIX_WRITE), + + /* interrupt rate events */ + HNS3_PMU_INTR_FLT_MODE_PAIR(pps_intr_msix_nic, MSIX_NIC), + + NULL +}; + +static struct attribute_group hns3_pmu_events_group = { + .name = "events", + .attrs = hns3_pmu_events_attr, +}; + +static struct attribute_group hns3_pmu_filter_mode_group = { + .name = "filtermode", + .attrs = hns3_pmu_filter_mode_attr, +}; + +static struct attribute *hns3_pmu_format_attr[] = { + HNS3_PMU_FORMAT_ATTR(subevent, "config:0-7"), + HNS3_PMU_FORMAT_ATTR(event_type, "config:8-15"), + HNS3_PMU_FORMAT_ATTR(ext_counter_used, "config:16"), + HNS3_PMU_FORMAT_ATTR(port, "config1:0-3"), + HNS3_PMU_FORMAT_ATTR(tc, "config1:4-7"), + HNS3_PMU_FORMAT_ATTR(bdf, "config1:8-23"), + HNS3_PMU_FORMAT_ATTR(queue, "config1:24-39"), + HNS3_PMU_FORMAT_ATTR(intr, "config1:40-51"), + HNS3_PMU_FORMAT_ATTR(global, "config1:52"), + NULL +}; + +static struct attribute_group hns3_pmu_format_group = { + .name = "format", + .attrs = hns3_pmu_format_attr, +}; + +static struct attribute *hns3_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static struct attribute_group hns3_pmu_cpumask_attr_group = { + .attrs = hns3_pmu_cpumask_attrs, +}; + +static struct attribute *hns3_pmu_identifier_attrs[] = { + &dev_attr_identifier.attr, + NULL +}; + +static struct attribute_group hns3_pmu_identifier_attr_group = { + .attrs = hns3_pmu_identifier_attrs, +}; + +static struct attribute *hns3_pmu_bdf_range_attrs[] = { + &dev_attr_bdf_min.attr, + &dev_attr_bdf_max.attr, + NULL +}; + +static struct attribute_group hns3_pmu_bdf_range_attr_group = { + .attrs = hns3_pmu_bdf_range_attrs, +}; + +static struct attribute *hns3_pmu_hw_clk_freq_attrs[] = { + &dev_attr_hw_clk_freq.attr, + NULL +}; + +static struct attribute_group hns3_pmu_hw_clk_freq_attr_group = { + .attrs = hns3_pmu_hw_clk_freq_attrs, +}; + +static const struct attribute_group *hns3_pmu_attr_groups[] = { + &hns3_pmu_events_group, + &hns3_pmu_filter_mode_group, + &hns3_pmu_format_group, + &hns3_pmu_cpumask_attr_group, + &hns3_pmu_identifier_attr_group, + &hns3_pmu_bdf_range_attr_group, + &hns3_pmu_hw_clk_freq_attr_group, + NULL +}; + +static u32 hns3_pmu_get_event(struct perf_event *event) +{ + return hns3_pmu_get_ext_counter_used(event) << 16 | + hns3_pmu_get_event_type(event) << 8 | + hns3_pmu_get_subevent(event); +} + +static u32 hns3_pmu_get_real_event(struct perf_event *event) +{ + return hns3_pmu_get_event_type(event) << 8 | + hns3_pmu_get_subevent(event); +} + +static u32 hns3_pmu_get_offset(u32 offset, u32 idx) +{ + return offset + HNS3_PMU_REG_EVENT_OFFSET + + HNS3_PMU_REG_EVENT_SIZE * idx; +} + +static u32 hns3_pmu_readl(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + return readl(hns3_pmu->base + offset); +} + +static void hns3_pmu_writel(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx, + u32 val) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + writel(val, hns3_pmu->base + offset); +} + +static u64 hns3_pmu_readq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + return readq(hns3_pmu->base + offset); +} + +static void hns3_pmu_writeq(struct hns3_pmu *hns3_pmu, u32 reg_offset, u32 idx, + u64 val) +{ + u32 offset = hns3_pmu_get_offset(reg_offset, idx); + + writeq(val, hns3_pmu->base + offset); +} + +static bool hns3_pmu_cmp_event(struct perf_event *target, + struct perf_event *event) +{ + return hns3_pmu_get_real_event(target) == hns3_pmu_get_real_event(event); +} + +static int hns3_pmu_find_related_event_idx(struct hns3_pmu *hns3_pmu, + struct perf_event *event) +{ + struct perf_event *sibling; + int hw_event_used = 0; + int idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + sibling = hns3_pmu->hw_events[idx]; + if (!sibling) + continue; + + hw_event_used++; + + if (!hns3_pmu_cmp_event(sibling, event)) + continue; + + /* Related events is used in group */ + if (sibling->group_leader == event->group_leader) + return idx; + } + + /* No related event and all hardware events are used up */ + if (hw_event_used >= HNS3_PMU_MAX_HW_EVENTS) + return -EBUSY; + + /* No related event and there is extra hardware events can be use */ + return -ENOENT; +} + +static int hns3_pmu_get_event_idx(struct hns3_pmu *hns3_pmu) +{ + int idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + if (!hns3_pmu->hw_events[idx]) + return idx; + } + + return -EBUSY; +} + +static bool hns3_pmu_valid_bdf(struct hns3_pmu *hns3_pmu, u16 bdf) +{ + struct pci_dev *pdev; + + if (bdf < hns3_pmu->bdf_min || bdf > hns3_pmu->bdf_max) { + pci_err(hns3_pmu->pdev, "Invalid EP device: %#x!\n", bdf); + return false; + } + + pdev = pci_get_domain_bus_and_slot(pci_domain_nr(hns3_pmu->pdev->bus), + PCI_BUS_NUM(bdf), + GET_PCI_DEVFN(bdf)); + if (!pdev) { + pci_err(hns3_pmu->pdev, "Nonexistent EP device: %#x!\n", bdf); + return false; + } + + pci_dev_put(pdev); + return true; +} + +static void hns3_pmu_set_qid_para(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf, + u16 queue) +{ + u32 val; + + val = GET_PCI_DEVFN(bdf); + val |= (u32)queue << HNS3_PMU_QID_PARA_QUEUE_S; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_PARA, idx, val); +} + +static bool hns3_pmu_qid_req_start(struct hns3_pmu *hns3_pmu, u32 idx) +{ + bool queue_id_valid = false; + u32 reg_qid_ctrl, val; + int err; + + /* enable queue id request */ + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, + HNS3_PMU_QID_CTRL_REQ_ENABLE); + + reg_qid_ctrl = hns3_pmu_get_offset(HNS3_PMU_REG_EVENT_QID_CTRL, idx); + err = readl_poll_timeout(hns3_pmu->base + reg_qid_ctrl, val, + val & HNS3_PMU_QID_CTRL_DONE, 1, 1000); + if (err == -ETIMEDOUT) { + pci_err(hns3_pmu->pdev, "QID request timeout!\n"); + goto out; + } + + queue_id_valid = !(val & HNS3_PMU_QID_CTRL_MISS); + +out: + /* disable qid request and clear status */ + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_QID_CTRL, idx, 0); + + return queue_id_valid; +} + +static bool hns3_pmu_valid_queue(struct hns3_pmu *hns3_pmu, u32 idx, u16 bdf, + u16 queue) +{ + hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue); + + return hns3_pmu_qid_req_start(hns3_pmu, idx); +} + +static struct hns3_pmu_event_attr *hns3_pmu_get_pmu_event(u32 event) +{ + struct hns3_pmu_event_attr *pmu_event; + struct dev_ext_attribute *eattr; + struct device_attribute *dattr; + struct attribute *attr; + u32 i; + + for (i = 0; i < ARRAY_SIZE(hns3_pmu_events_attr) - 1; i++) { + attr = hns3_pmu_events_attr[i]; + dattr = container_of(attr, struct device_attribute, attr); + eattr = container_of(dattr, struct dev_ext_attribute, attr); + pmu_event = eattr->var; + + if (event == pmu_event->event) + return pmu_event; + } + + return NULL; +} + +static int hns3_pmu_set_func_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + struct hw_perf_event *hwc = &event->hw; + u16 bdf = hns3_pmu_get_bdf(event); + + if (!hns3_pmu_valid_bdf(hns3_pmu, bdf)) + return -ENOENT; + + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC); + + return 0; +} + +static int hns3_pmu_set_func_queue_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + u16 queue_id = hns3_pmu_get_queue(event); + struct hw_perf_event *hwc = &event->hw; + u16 bdf = hns3_pmu_get_bdf(event); + + if (!hns3_pmu_valid_bdf(hns3_pmu, bdf)) + return -ENOENT; + + if (!hns3_pmu_valid_queue(hns3_pmu, hwc->idx, bdf, queue_id)) { + pci_err(hns3_pmu->pdev, "Invalid queue: %u\n", queue_id); + return -ENOENT; + } + + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_QUEUE); + + return 0; +} + +static bool +hns3_pmu_is_enabled_global_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 global = hns3_pmu_get_global(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_GLOBAL)) + return false; + + return global; +} + +static bool hns3_pmu_is_enabled_func_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 queue_id = hns3_pmu_get_queue(event); + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC)) + return false; + else if (queue_id != HNS3_PMU_FILTER_ALL_QUEUE) + return false; + + return bdf; +} + +static bool +hns3_pmu_is_enabled_func_queue_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 queue_id = hns3_pmu_get_queue(event); + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_QUEUE)) + return false; + else if (queue_id == HNS3_PMU_FILTER_ALL_QUEUE) + return false; + + return bdf; +} + +static bool hns3_pmu_is_enabled_port_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 tc_id = hns3_pmu_get_tc(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT)) + return false; + + return tc_id == HNS3_PMU_FILTER_ALL_TC; +} + +static bool +hns3_pmu_is_enabled_port_tc_mode(struct perf_event *event, + struct hns3_pmu_event_attr *pmu_event) +{ + u8 tc_id = hns3_pmu_get_tc(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_PORT_TC)) + return false; + + return tc_id != HNS3_PMU_FILTER_ALL_TC; +} + +static bool +hns3_pmu_is_enabled_func_intr_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu, + struct hns3_pmu_event_attr *pmu_event) +{ + u16 bdf = hns3_pmu_get_bdf(event); + + if (!(pmu_event->filter_support & HNS3_PMU_FILTER_SUPPORT_FUNC_INTR)) + return false; + + return hns3_pmu_valid_bdf(hns3_pmu, bdf); +} + +static int hns3_pmu_select_filter_mode(struct perf_event *event, + struct hns3_pmu *hns3_pmu) +{ + u32 event_id = hns3_pmu_get_event(event); + struct hw_perf_event *hwc = &event->hw; + struct hns3_pmu_event_attr *pmu_event; + + pmu_event = hns3_pmu_get_pmu_event(event_id); + if (!pmu_event) { + pci_err(hns3_pmu->pdev, "Invalid pmu event\n"); + return -ENOENT; + } + + if (hns3_pmu_is_enabled_global_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_GLOBAL); + return 0; + } + + if (hns3_pmu_is_enabled_func_mode(event, pmu_event)) + return hns3_pmu_set_func_mode(event, hns3_pmu); + + if (hns3_pmu_is_enabled_func_queue_mode(event, pmu_event)) + return hns3_pmu_set_func_queue_mode(event, hns3_pmu); + + if (hns3_pmu_is_enabled_port_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT); + return 0; + } + + if (hns3_pmu_is_enabled_port_tc_mode(event, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_PORT_TC); + return 0; + } + + if (hns3_pmu_is_enabled_func_intr_mode(event, hns3_pmu, pmu_event)) { + HNS3_PMU_SET_HW_FILTER(hwc, HNS3_PMU_HW_FILTER_FUNC_INTR); + return 0; + } + + return -ENOENT; +} + +static bool hns3_pmu_validate_event_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct perf_event *event_group[HNS3_PMU_MAX_HW_EVENTS]; + int counters = 1; + int num; + + event_group[0] = leader; + if (!is_software_event(leader)) { + if (leader->pmu != event->pmu) + return false; + + if (leader != event && !hns3_pmu_cmp_event(leader, event)) + event_group[counters++] = event; + } + + for_each_sibling_event(sibling, event->group_leader) { + if (is_software_event(sibling)) + continue; + + if (sibling->pmu != event->pmu) + return false; + + for (num = 0; num < counters; num++) { + if (hns3_pmu_cmp_event(event_group[num], sibling)) + break; + } + + if (num == counters) + event_group[counters++] = sibling; + } + + return counters <= HNS3_PMU_MAX_HW_EVENTS; +} + +static u32 hns3_pmu_get_filter_condition(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u16 intr_id = hns3_pmu_get_intr(event); + u8 port_id = hns3_pmu_get_port(event); + u16 bdf = hns3_pmu_get_bdf(event); + u8 tc_id = hns3_pmu_get_tc(event); + u8 filter_mode; + + filter_mode = *(u8 *)hwc->addr_filters; + switch (filter_mode) { + case HNS3_PMU_HW_FILTER_PORT: + return FILTER_CONDITION_PORT(port_id); + case HNS3_PMU_HW_FILTER_PORT_TC: + return FILTER_CONDITION_PORT_TC(port_id, tc_id); + case HNS3_PMU_HW_FILTER_FUNC: + case HNS3_PMU_HW_FILTER_FUNC_QUEUE: + return GET_PCI_DEVFN(bdf); + case HNS3_PMU_HW_FILTER_FUNC_INTR: + return FILTER_CONDITION_FUNC_INTR(GET_PCI_DEVFN(bdf), intr_id); + default: + break; + } + + return 0; +} + +static void hns3_pmu_config_filter(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + u8 event_type = hns3_pmu_get_event_type(event); + u8 subevent_id = hns3_pmu_get_subevent(event); + u16 queue_id = hns3_pmu_get_queue(event); + struct hw_perf_event *hwc = &event->hw; + u8 filter_mode = *(u8 *)hwc->addr_filters; + u16 bdf = hns3_pmu_get_bdf(event); + u32 idx = hwc->idx; + u32 val; + + val = event_type; + val |= subevent_id << HNS3_PMU_CTRL_SUBEVENT_S; + val |= filter_mode << HNS3_PMU_CTRL_FILTER_MODE_S; + val |= HNS3_PMU_EVENT_OVERFLOW_RESTART; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); + + val = hns3_pmu_get_filter_condition(event); + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_HIGH, idx, val); + + if (filter_mode == HNS3_PMU_HW_FILTER_FUNC_QUEUE) + hns3_pmu_set_qid_para(hns3_pmu, idx, bdf, queue_id); +} + +static void hns3_pmu_enable_counter(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val |= HNS3_PMU_EVENT_EN; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static void hns3_pmu_disable_counter(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val &= ~HNS3_PMU_EVENT_EN; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static void hns3_pmu_enable_intr(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx); + val &= ~HNS3_PMU_INTR_MASK_OVERFLOW; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val); +} + +static void hns3_pmu_disable_intr(struct hns3_pmu *hns3_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx); + val |= HNS3_PMU_INTR_MASK_OVERFLOW; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_INTR_MASK, idx, val); +} + +static void hns3_pmu_clear_intr_status(struct hns3_pmu *hns3_pmu, u32 idx) +{ + u32 val; + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val |= HNS3_PMU_EVENT_STATUS_RESET; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); + + val = hns3_pmu_readl(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx); + val &= ~HNS3_PMU_EVENT_STATUS_RESET; + hns3_pmu_writel(hns3_pmu, HNS3_PMU_REG_EVENT_CTRL_LOW, idx, val); +} + +static u64 hns3_pmu_read_counter(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + + return hns3_pmu_readq(hns3_pmu, event->hw.event_base, event->hw.idx); +} + +static void hns3_pmu_write_counter(struct perf_event *event, u64 value) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + u32 idx = event->hw.idx; + + hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_COUNTER, idx, value); + hns3_pmu_writeq(hns3_pmu, HNS3_PMU_REG_EVENT_EXT_COUNTER, idx, value); +} + +static void hns3_pmu_init_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + local64_set(&hwc->prev_count, 0); + hns3_pmu_write_counter(event, 0); +} + +static int hns3_pmu_event_init(struct perf_event *event) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx; + int ret; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Sampling is not supported */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EOPNOTSUPP; + + event->cpu = hns3_pmu->on_cpu; + + idx = hns3_pmu_get_event_idx(hns3_pmu); + if (idx < 0) { + pci_err(hns3_pmu->pdev, "Up to %u events are supported!\n", + HNS3_PMU_MAX_HW_EVENTS); + return -EBUSY; + } + + hwc->idx = idx; + + ret = hns3_pmu_select_filter_mode(event, hns3_pmu); + if (ret) { + pci_err(hns3_pmu->pdev, "Invalid filter, ret = %d.\n", ret); + return ret; + } + + if (!hns3_pmu_validate_event_group(event)) { + pci_err(hns3_pmu->pdev, "Invalid event group.\n"); + return -EINVAL; + } + + if (hns3_pmu_get_ext_counter_used(event)) + hwc->event_base = HNS3_PMU_REG_EVENT_EXT_COUNTER; + else + hwc->event_base = HNS3_PMU_REG_EVENT_COUNTER; + + return 0; +} + +static void hns3_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 new_cnt, prev_cnt, delta; + + do { + prev_cnt = local64_read(&hwc->prev_count); + new_cnt = hns3_pmu_read_counter(event); + } while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != + prev_cnt); + + delta = new_cnt - prev_cnt; + local64_add(delta, &event->count); +} + +static void hns3_pmu_start(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + hns3_pmu_config_filter(event); + hns3_pmu_init_counter(event); + hns3_pmu_enable_intr(hns3_pmu, hwc); + hns3_pmu_enable_counter(hns3_pmu, hwc); + + perf_event_update_userpage(event); +} + +static void hns3_pmu_stop(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hns3_pmu_disable_counter(hns3_pmu, hwc); + hns3_pmu_disable_intr(hns3_pmu, hwc); + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + /* Read hardware counter and update the perf counter statistics */ + hns3_pmu_read(event); + hwc->state |= PERF_HES_UPTODATE; +} + +static int hns3_pmu_add(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + /* Check all working events to find a related event. */ + idx = hns3_pmu_find_related_event_idx(hns3_pmu, event); + if (idx < 0 && idx != -ENOENT) + return idx; + + /* Current event shares an enabled hardware event with related event */ + if (idx >= 0 && idx < HNS3_PMU_MAX_HW_EVENTS) { + hwc->idx = idx; + goto start_count; + } + + idx = hns3_pmu_get_event_idx(hns3_pmu); + if (idx < 0) + return idx; + + hwc->idx = idx; + hns3_pmu->hw_events[idx] = event; + +start_count: + if (flags & PERF_EF_START) + hns3_pmu_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void hns3_pmu_del(struct perf_event *event, int flags) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + hns3_pmu_stop(event, PERF_EF_UPDATE); + hns3_pmu->hw_events[hwc->idx] = NULL; + perf_event_update_userpage(event); +} + +static void hns3_pmu_enable(struct pmu *pmu) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu); + u32 val; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); + val |= HNS3_PMU_GLOBAL_START; + writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); +} + +static void hns3_pmu_disable(struct pmu *pmu) +{ + struct hns3_pmu *hns3_pmu = to_hns3_pmu(pmu); + u32 val; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); + val &= ~HNS3_PMU_GLOBAL_START; + writel(val, hns3_pmu->base + HNS3_PMU_REG_GLOBAL_CTRL); +} + +static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) +{ + u16 device_id; + char *name; + u32 val; + + hns3_pmu->base = pcim_iomap_table(pdev)[BAR_2]; + if (!hns3_pmu->base) { + pci_err(pdev, "ioremap failed\n"); + return -ENOMEM; + } + + hns3_pmu->hw_clk_freq = readl(hns3_pmu->base + HNS3_PMU_REG_CLOCK_FREQ); + + val = readl(hns3_pmu->base + HNS3_PMU_REG_BDF); + hns3_pmu->bdf_min = val & 0xffff; + hns3_pmu->bdf_max = val >> 16; + + val = readl(hns3_pmu->base + HNS3_PMU_REG_DEVICE_ID); + device_id = val & 0xffff; + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hns3_pmu_sicl_%u", device_id); + if (!name) + return -ENOMEM; + + hns3_pmu->pdev = pdev; + hns3_pmu->on_cpu = -1; + hns3_pmu->identifier = readl(hns3_pmu->base + HNS3_PMU_REG_VERSION); + hns3_pmu->pmu = (struct pmu) { + .name = name, + .module = THIS_MODULE, + .event_init = hns3_pmu_event_init, + .pmu_enable = hns3_pmu_enable, + .pmu_disable = hns3_pmu_disable, + .add = hns3_pmu_add, + .del = hns3_pmu_del, + .start = hns3_pmu_start, + .stop = hns3_pmu_stop, + .read = hns3_pmu_read, + .task_ctx_nr = perf_invalid_context, + .attr_groups = hns3_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + return 0; +} + +static irqreturn_t hns3_pmu_irq(int irq, void *data) +{ + struct hns3_pmu *hns3_pmu = data; + u32 intr_status, idx; + + for (idx = 0; idx < HNS3_PMU_MAX_HW_EVENTS; idx++) { + intr_status = hns3_pmu_readl(hns3_pmu, + HNS3_PMU_REG_EVENT_INTR_STATUS, + idx); + + /* + * As each counter will restart from 0 when it is overflowed, + * extra processing is no need, just clear interrupt status. + */ + if (intr_status) + hns3_pmu_clear_intr_status(hns3_pmu, idx); + } + + return IRQ_HANDLED; +} + +static int hns3_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hns3_pmu *hns3_pmu; + + hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node); + if (!hns3_pmu) + return -ENODEV; + + if (hns3_pmu->on_cpu == -1) { + hns3_pmu->on_cpu = cpu; + irq_set_affinity(hns3_pmu->irq, cpumask_of(cpu)); + } + + return 0; +} + +static int hns3_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hns3_pmu *hns3_pmu; + unsigned int target; + + hns3_pmu = hlist_entry_safe(node, struct hns3_pmu, node); + if (!hns3_pmu) + return -ENODEV; + + /* Nothing to do if this CPU doesn't own the PMU */ + if (hns3_pmu->on_cpu != cpu) + return 0; + + /* Choose a new CPU from all online cpus */ + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&hns3_pmu->pmu, cpu, target); + hns3_pmu->on_cpu = target; + irq_set_affinity(hns3_pmu->irq, cpumask_of(target)); + + return 0; +} + +static void hns3_pmu_free_irq(void *data) +{ + struct pci_dev *pdev = data; + + pci_free_irq_vectors(pdev); +} + +static int hns3_pmu_irq_register(struct pci_dev *pdev, + struct hns3_pmu *hns3_pmu) +{ + int irq, ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); + if (ret < 0) { + pci_err(pdev, "failed to enable MSI vectors, ret = %d.\n", ret); + return ret; + } + + ret = devm_add_action(&pdev->dev, hns3_pmu_free_irq, pdev); + if (ret) { + pci_err(pdev, "failed to add free irq action, ret = %d.\n", ret); + return ret; + } + + irq = pci_irq_vector(pdev, 0); + ret = devm_request_irq(&pdev->dev, irq, hns3_pmu_irq, 0, + hns3_pmu->pmu.name, hns3_pmu); + if (ret) { + pci_err(pdev, "failed to register irq, ret = %d.\n", ret); + return ret; + } + + hns3_pmu->irq = irq; + + return 0; +} + +static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) +{ + int ret; + + ret = hns3_pmu_alloc_pmu(pdev, hns3_pmu); + if (ret) + return ret; + + ret = hns3_pmu_irq_register(pdev, hns3_pmu); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(hns3_pmu_online, &hns3_pmu->node); + if (ret) { + pci_err(pdev, "failed to register hotplug, ret = %d.\n", ret); + return ret; + } + + ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1); + if (ret) { + pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret); + cpuhp_state_remove_instance(hns3_pmu_online, &hns3_pmu->node); + } + + return ret; +} + +static void hns3_pmu_uninit_pmu(struct pci_dev *pdev) +{ + struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev); + + perf_pmu_unregister(&hns3_pmu->pmu); + cpuhp_state_remove_instance(hns3_pmu_online, &hns3_pmu->node); +} + +static int hns3_pmu_init_dev(struct pci_dev *pdev) +{ + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + pci_err(pdev, "failed to enable pci device, ret = %d.\n", ret); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(BAR_2), "hns3_pmu"); + if (ret < 0) { + pci_err(pdev, "failed to request pci region, ret = %d.\n", ret); + return ret; + } + + pci_set_master(pdev); + + return 0; +} + +static int hns3_pmu_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct hns3_pmu *hns3_pmu; + int ret; + + hns3_pmu = devm_kzalloc(&pdev->dev, sizeof(*hns3_pmu), GFP_KERNEL); + if (!hns3_pmu) + return -ENOMEM; + + ret = hns3_pmu_init_dev(pdev); + if (ret) + return ret; + + ret = hns3_pmu_init_pmu(pdev, hns3_pmu); + if (ret) { + pci_clear_master(pdev); + return ret; + } + + pci_set_drvdata(pdev, hns3_pmu); + + return ret; +} + +static void hns3_pmu_remove(struct pci_dev *pdev) +{ + hns3_pmu_uninit_pmu(pdev); + pci_clear_master(pdev); + pci_set_drvdata(pdev, NULL); +} + +static const struct pci_device_id hns3_pmu_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa22b) }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, hns3_pmu_ids); + +static struct pci_driver hns3_pmu_driver = { + .name = "hns3_pmu", + .id_table = hns3_pmu_ids, + .probe = hns3_pmu_probe, + .remove = hns3_pmu_remove, +}; + +static int __init hns3_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/hns3_pmu/pmcg:online", + hns3_pmu_online_cpu, + hns3_pmu_offline_cpu); + if (ret < 0) { + pr_err("failed to setup HNS3 PMU hotplug, ret = %d.\n", ret); + return ret; + } + hns3_pmu_online = ret; + + ret = pci_register_driver(&hns3_pmu_driver); + if (ret) { + pr_err("failed to register pci driver, ret = %d.\n", ret); + cpuhp_remove_multi_state(hns3_pmu_online); + } + + return ret; +} +module_init(hns3_pmu_module_init); + +static void __exit hns3_pmu_module_exit(void) +{ + pci_unregister_driver(&hns3_pmu_driver); + cpuhp_remove_multi_state(hns3_pmu_online); +} +module_exit(hns3_pmu_module_exit); + +MODULE_DESCRIPTION("HNS3 PMU driver"); +MODULE_LICENSE("GPL v2"); -- Gitee From 063644a29212946cefa4549e5670b68beed00064 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Nov 2022 22:03:12 +0800 Subject: [PATCH 027/173] docs: perf: Include hns3-pmu.rst in toctree to fix 'htmldocs' WARNING [Upstream commit aaaee7b55c9e58410abcef6e3e2fd80e4135d02e] After commit 39915b6b5fc2 ("drivers/perf: hisi: Add description for HNS3 PMU driver"),building the 'htmldocs' target results in the following warning: | Documentation/admin-guide/perf/hns3-pmu.rst: WARNING: document isn't included in any toctree Add 'hns3-pmu' to the perf toctree to silence the warning. Reported-by: Stephen Rothwell Signed-off-by: Will Deacon Signed-off-by: Jiantao Xiao Signed-off-by: huwentao --- Documentation/admin-guide/perf/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index dfab80df50ad..a62690a1d493 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -8,6 +8,7 @@ Performance monitor support :maxdepth: 1 hisi-pmu + hns3-pmu hisi-pcie-pmu qcom_l2_pmu qcom_l3_pmu -- Gitee From 197c79f374028546f5efa8161dbb16868a47d9c7 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 21 Nov 2022 22:03:05 +0800 Subject: [PATCH 028/173] drivers/perf: fixed the issue that the kabi value changed Fixed the issue that the kabi value changed when the HiSilicon PMU driver added the enum variable in "enum cpuhp_state{}". The hisi_pcie_pmu and hisi_cpa_pmu drivers to replace the explicit specify hotplug events with dynamic allocation hotplug events(CPUHP_AP_ONLINE_DYN). The states between *CPUHP_AP_ONLINE_DYN* and *CPUHP_AP_ONLINE_DYN_END* are reserved for the dynamic allocation. Signed-off-by: Junhao He Reviewed-by: Yicong Yang Reviewed-by: Yang Jihong Reviewed-by: Xiongfeng Wang Signed-off-by: Zheng Zengkai Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 22 ++++++++++--------- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 23 ++++++++++---------- include/linux/cpuhotplug.h | 6 ----- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 6553e176f381..6f2a9a110e2c 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -19,6 +19,9 @@ #include #include +/* Dynamic CPU hotplug state used by PCIe PMU */ +static enum cpuhp_state hisi_pcie_pmu_online; + #define DRV_NAME "hisi_pcie_pmu" /* Define registers */ #define HISI_PCIE_GLOBAL_CTRL 0x00 @@ -879,7 +882,7 @@ static int hisi_pcie_init_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_p if (ret) goto err_iounmap; - ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node); + ret = cpuhp_state_add_instance(hisi_pcie_pmu_online, &pcie_pmu->node); if (ret) { pci_err(pdev, "Failed to register hotplug: %d\n", ret); goto err_irq_unregister; @@ -894,8 +897,7 @@ static int hisi_pcie_init_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_p return ret; err_hotplug_unregister: - cpuhp_state_remove_instance_nocalls( - CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node); + cpuhp_state_remove_instance_nocalls(hisi_pcie_pmu_online, &pcie_pmu->node); err_irq_unregister: hisi_pcie_pmu_irq_unregister(pdev, pcie_pmu); @@ -911,8 +913,7 @@ static void hisi_pcie_uninit_pmu(struct pci_dev *pdev) struct hisi_pcie_pmu *pcie_pmu = pci_get_drvdata(pdev); perf_pmu_unregister(&pcie_pmu->pmu); - cpuhp_state_remove_instance_nocalls( - CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, &pcie_pmu->node); + cpuhp_state_remove_instance_nocalls(hisi_pcie_pmu_online, &pcie_pmu->node); hisi_pcie_pmu_irq_unregister(pdev, pcie_pmu); iounmap(pcie_pmu->base); } @@ -983,18 +984,19 @@ static int __init hisi_pcie_module_init(void) { int ret; - ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, - "AP_PERF_ARM_HISI_PCIE_PMU_ONLINE", + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/hisi/pcie:online", hisi_pcie_pmu_online_cpu, hisi_pcie_pmu_offline_cpu); - if (ret) { + if (ret < 0) { pr_err("Failed to setup PCIe PMU hotplug: %d\n", ret); return ret; } + hisi_pcie_pmu_online = ret; ret = pci_register_driver(&hisi_pcie_pmu_driver); if (ret) - cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE); + cpuhp_remove_multi_state(hisi_pcie_pmu_online); return ret; } @@ -1003,7 +1005,7 @@ module_init(hisi_pcie_module_init); static void __exit hisi_pcie_module_exit(void) { pci_unregister_driver(&hisi_pcie_pmu_driver); - cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE); + cpuhp_remove_multi_state(hisi_pcie_pmu_online); } module_exit(hisi_pcie_module_exit); diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index a9bb73f76be4..09839dae9b7c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -19,6 +19,9 @@ #include "hisi_uncore_pmu.h" +/* Dynamic CPU hotplug state used by CPA PMU */ +static enum cpuhp_state hisi_cpa_pmu_online; + /* CPA register definition */ #define CPA_PERF_CTRL 0x1c00 #define CPA_EVENT_CTRL 0x1c04 @@ -334,8 +337,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) /* Power Management should be disabled before using CPA PMU. */ hisi_cpa_pmu_disable_pm(cpa_pmu); - ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, - &cpa_pmu->node); + ret = cpuhp_state_add_instance(hisi_cpa_pmu_online, &cpa_pmu->node); if (ret) { dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); hisi_cpa_pmu_enable_pm(cpa_pmu); @@ -345,8 +347,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) ret = perf_pmu_register(&cpa_pmu->pmu, name, -1); if (ret) { dev_err(cpa_pmu->dev, "PMU register failed\n"); - cpuhp_state_remove_instance_nocalls( - CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, &cpa_pmu->node); + cpuhp_state_remove_instance_nocalls(hisi_cpa_pmu_online, &cpa_pmu->node); hisi_cpa_pmu_enable_pm(cpa_pmu); return ret; } @@ -360,8 +361,7 @@ static int hisi_cpa_pmu_remove(struct platform_device *pdev) struct hisi_pmu *cpa_pmu = platform_get_drvdata(pdev); perf_pmu_unregister(&cpa_pmu->pmu); - cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, - &cpa_pmu->node); + cpuhp_state_remove_instance_nocalls(hisi_cpa_pmu_online, &cpa_pmu->node); hisi_cpa_pmu_enable_pm(cpa_pmu); return 0; } @@ -380,18 +380,19 @@ static int __init hisi_cpa_pmu_module_init(void) { int ret; - ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, - "AP_PERF_ARM_HISI_CPA_ONLINE", + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "pmu/hisi/cpa:online", hisi_uncore_pmu_online_cpu, hisi_uncore_pmu_offline_cpu); - if (ret) { + if (ret < 0) { pr_err("setup hotplug failed: %d\n", ret); return ret; } + hisi_cpa_pmu_online = ret; ret = platform_driver_register(&hisi_cpa_pmu_driver); if (ret) - cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE); + cpuhp_remove_multi_state(hisi_cpa_pmu_online); return ret; } @@ -400,7 +401,7 @@ module_init(hisi_cpa_pmu_module_init); static void __exit hisi_cpa_pmu_module_exit(void) { platform_driver_unregister(&hisi_cpa_pmu_driver); - cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE); + cpuhp_remove_multi_state(hisi_cpa_pmu_online); } module_exit(hisi_cpa_pmu_module_exit); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index a7024884f5b0..2cdbd2f4f4e4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -164,17 +164,11 @@ enum cpuhp_state { CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, - #ifndef __GENKSYMS__ - CPUHP_AP_PERF_ARM_HISI_CPA_ONLINE, - #endif CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, - #ifndef __GENKSYMS__ - CPUHP_AP_PERF_ARM_HISI_PCIE_PMU_ONLINE, - #endif CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, -- Gitee From 9165334aa3b22467c67eed915a7280a3d98e6269 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Sat, 4 Mar 2023 07:24:36 +0000 Subject: [PATCH 029/173] perf: hisi: Add configs for PMU isolation Add CONFIG_HISI_L3T_PMU and CONFIG_HISI_LPDDRC_PMU to isolate features of hisi pmu driver. This patch isolates commit 0edc58409e30 and 6bf896bea639. Signed-off-by: Zhang Zekun Signed-off-by: huwentao --- arch/arm64/configs/tencent.config | 2 ++ drivers/perf/hisilicon/Kconfig | 19 +++++++++++++++++++ drivers/perf/hisilicon/Makefile | 6 +++--- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index 81d20e78cd60..63cebd9a7a46 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -1505,3 +1505,5 @@ CONFIG_HISI_SOC_HHA=m CONFIG_ARM64_NMI=y CONFIG_ARM64_HAFT=y CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y +CONFIG_HISI_L3T_PMU=m +CONFIG_HISI_LPDDRC_PMU=m diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig index 171bfc1b6bc2..e19feba8b5d3 100644 --- a/drivers/perf/hisilicon/Kconfig +++ b/drivers/perf/hisilicon/Kconfig @@ -24,3 +24,22 @@ config HNS3_PMU devices. Adds the HNS3 PMU into perf events system for monitoring latency, bandwidth etc. + +config HISI_L3T_PMU + tristate "HiSilicon SoC L3T PMU drivers" + depends on HISI_PMU + default n + help + Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home + Agent performance monitor and DDR Controller performance monitor. + L3T is a specialized PMU driver. + +config HISI_LPDDRC_PMU + tristate "HiSilicon SoC LDPPRC PMU drivers" + depends on HISI_PMU + default n + help + Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home + Agent performance monitor and DDR Controller performance monitor. + LPDDRC is a specialize PMU driver. + diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index a35705795dfc..fc7ed8c6cc0f 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o \ - hisi_uncore_l3t_pmu.o \ - hisi_uncore_lpddrc_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o +obj-$(CONFIG_HISI_L3T_PMU) += hisi_uncore_l3t_pmu.o +obj-$(CONFIG_HISI_LPDDRC_PMU) += hisi_uncore_lpddrc_pmu.o -- Gitee From 8daad1a3f53aa7d7a42dccacfdad0abc5f6bd2ac Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 25 Aug 2023 11:34:33 +0800 Subject: [PATCH 030/173] drivers/perf: hisi: Add support for HiSilicon H60PA and PAv3 PMU driver [Upstream commit 1a51688474c0d395b864e98236335fba712e29bf] Compared to the original PA device, H60PA offers higher bandwidth. The H60PA is a new device and we use HID to differentiate them. The events supported by PAv3 and PAv2 are different. The PAv3 PMU removed some events which are supported by PAv2 PMU. The older PA PMU driver will probe v3 as v2. Therefore PA events displayed by "perf list" cannot work properly. We add the HISI0275 HID for PAv3 PMU to distinguish different. For each H60PA PMU, except for the overflow interrupt register, other functions of the H60PA PMU are the same as the original PA PMU module. It has 8-programable counters and each counter is free-running. Interrupt is supported to handle counter (64-bits) overflow. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Reviewed-by: Yicong Yang Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230615125926.29832-2-hejunhao3@huawei.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 127 +++++++++++++++++--- drivers/perf/hisilicon/hisi_uncore_pmu.h | 8 ++ 2 files changed, 120 insertions(+), 15 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 8bea7ac4f5ad..cec593621fa9 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -22,9 +22,15 @@ #define PA_TT_CTRL 0x1c08 #define PA_TGTID_CTRL 0x1c14 #define PA_SRCID_CTRL 0x1c18 + +/* H32 PA interrupt registers */ #define PA_INT_MASK 0x1c70 #define PA_INT_STATUS 0x1c78 #define PA_INT_CLEAR 0x1c7c + +#define H60PA_INT_STATUS 0x1c70 +#define H60PA_INT_MASK 0x1c74 + #define PA_EVENT_TYPE0 0x1c80 #define PA_PMU_VERSION 0x1cf0 #define PA_EVENT_CNT0_L 0x1d00 @@ -46,6 +52,12 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); +struct hisi_pa_pmu_int_regs { + u32 mask_offset; + u32 clear_offset; + u32 status_offset; +}; + static void hisi_pa_pmu_enable_tracetag(struct perf_event *event) { struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); @@ -219,40 +231,40 @@ static void hisi_pa_pmu_disable_counter(struct hisi_pmu *pa_pmu, static void hisi_pa_pmu_enable_counter_int(struct hisi_pmu *pa_pmu, struct hw_perf_event *hwc) { + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; u32 val; /* Write 0 to enable interrupt */ - val = readl(pa_pmu->base + PA_INT_MASK); + val = readl(pa_pmu->base + regs->mask_offset); val &= ~(1 << hwc->idx); - writel(val, pa_pmu->base + PA_INT_MASK); + writel(val, pa_pmu->base + regs->mask_offset); } static void hisi_pa_pmu_disable_counter_int(struct hisi_pmu *pa_pmu, struct hw_perf_event *hwc) { + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; u32 val; /* Write 1 to mask interrupt */ - val = readl(pa_pmu->base + PA_INT_MASK); + val = readl(pa_pmu->base + regs->mask_offset); val |= 1 << hwc->idx; - writel(val, pa_pmu->base + PA_INT_MASK); + writel(val, pa_pmu->base + regs->mask_offset); } static u32 hisi_pa_pmu_get_int_status(struct hisi_pmu *pa_pmu) { - return readl(pa_pmu->base + PA_INT_STATUS); + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; + + return readl(pa_pmu->base + regs->status_offset); } static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) { - writel(1 << idx, pa_pmu->base + PA_INT_CLEAR); -} + struct hisi_pa_pmu_int_regs *regs = pa_pmu->dev_info->private; -static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { - { "HISI0273", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match); + writel(1 << idx, pa_pmu->base + regs->clear_offset); +} static int hisi_pa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *pa_pmu) @@ -276,6 +288,10 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev, pa_pmu->ccl_id = -1; pa_pmu->sccl_id = -1; + pa_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!pa_pmu->dev_info) + return -ENODEV; + pa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pa_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for pa_pmu resource.\n"); @@ -314,6 +330,32 @@ static const struct attribute_group hisi_pa_pmu_v2_events_group = { .attrs = hisi_pa_pmu_v2_events_attr, }; +static struct attribute *hisi_pa_pmu_v3_events_attr[] = { + HISI_PMU_EVENT_ATTR(tx_req, 0x0), + HISI_PMU_EVENT_ATTR(tx_dat, 0x1), + HISI_PMU_EVENT_ATTR(tx_snp, 0x2), + HISI_PMU_EVENT_ATTR(rx_req, 0x7), + HISI_PMU_EVENT_ATTR(rx_dat, 0x8), + HISI_PMU_EVENT_ATTR(rx_snp, 0x9), + NULL +}; + +static const struct attribute_group hisi_pa_pmu_v3_events_group = { + .name = "events", + .attrs = hisi_pa_pmu_v3_events_attr, +}; + +static struct attribute *hisi_h60pa_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(rx_flit, 0x50), + HISI_PMU_EVENT_ATTR(tx_flit, 0x65), + NULL +}; + +static const struct attribute_group hisi_h60pa_pmu_events_group = { + .name = "events", + .attrs = hisi_h60pa_pmu_events_attr, +}; + static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { @@ -337,6 +379,12 @@ static struct attribute_group hisi_pa_pmu_identifier_group = { .attrs = hisi_pa_pmu_identifier_attrs, }; +static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { + .mask_offset = PA_INT_MASK, + .clear_offset = PA_INT_CLEAR, + .status_offset = PA_INT_STATUS, +}; + static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v2_events_group, @@ -345,6 +393,46 @@ static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { NULL }; +static const struct hisi_pmu_dev_info hisi_h32pa_v2 = { + .name = "pa", + .attr_groups = hisi_pa_pmu_v2_attr_groups, + .private = &hisi_pa_pmu_regs, +}; + +static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = { + &hisi_pa_pmu_v2_format_group, + &hisi_pa_pmu_v3_events_group, + &hisi_pa_pmu_cpumask_attr_group, + &hisi_pa_pmu_identifier_group, + NULL +}; + +static const struct hisi_pmu_dev_info hisi_h32pa_v3 = { + .name = "pa", + .attr_groups = hisi_pa_pmu_v3_attr_groups, + .private = &hisi_pa_pmu_regs, +}; + +static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = { + .mask_offset = H60PA_INT_MASK, + .clear_offset = H60PA_INT_STATUS, /* Clear on write */ + .status_offset = H60PA_INT_STATUS, +}; + +static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = { + &hisi_pa_pmu_v2_format_group, + &hisi_h60pa_pmu_events_group, + &hisi_pa_pmu_cpumask_attr_group, + &hisi_pa_pmu_identifier_group, + NULL +}; + +static const struct hisi_pmu_dev_info hisi_h60pa = { + .name = "h60pa", + .attr_groups = hisi_h60pa_pmu_attr_groups, + .private = &hisi_h60pa_pmu_regs, +}; + static const struct hisi_uncore_ops hisi_uncore_pa_ops = { .write_evtype = hisi_pa_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -375,7 +463,7 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - pa_pmu->pmu_events.attr_groups = hisi_pa_pmu_v2_attr_groups; + pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups; pa_pmu->num_counters = PA_NR_COUNTERS; pa_pmu->ops = &hisi_uncore_pa_ops; pa_pmu->check_event = 0xB0; @@ -400,8 +488,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u", - pa_pmu->sicl_id, pa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u", + pa_pmu->sicl_id, pa_pmu->dev_info->name, + pa_pmu->index_id); if (!name) return -ENOMEM; @@ -452,6 +541,14 @@ static int hisi_pa_pmu_remove(struct platform_device *pdev) return 0; } +static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { + { "HISI0273", (kernel_ulong_t)&hisi_h32pa_v2 }, + { "HISI0275", (kernel_ulong_t)&hisi_h32pa_v3 }, + { "HISI0274", (kernel_ulong_t)&hisi_h60pa }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match); + static struct platform_driver hisi_pa_pmu_driver = { .driver = { .name = "hisi_pa_pmu", diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 0a983efe9db5..e97a8f1fd46f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -61,6 +61,13 @@ struct hisi_uncore_ops { void (*disable_filter)(struct perf_event *event); }; +/* Describes the HISI PMU chip features information */ +struct hisi_pmu_dev_info { + const char *name; + const struct attribute_group **attr_groups; + void *private; +}; + struct hisi_pmu_hwevents { struct perf_event *hw_events[HISI_MAX_COUNTERS]; DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS); @@ -71,6 +78,7 @@ struct hisi_pmu_hwevents { struct hisi_pmu { struct pmu pmu; const struct hisi_uncore_ops *ops; + const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; /* associated_cpus: All CPUs associated with the PMU */ cpumask_t associated_cpus; -- Gitee From 0697afee0b7f6b9591aa8b515aa2b80ae1b1701a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Feb 2020 19:35:31 +0000 Subject: [PATCH 031/173] arm64: perf: Refactor PMU init callbacks [Upstream commit e424b17985262359181cce1553a1ea2cdf263941] The PMU init callbacks are already drowning in boilerplate, so before doubling the number of supported PMU models, give it a sensible refactor to significantly reduce the bloat, both in source and object code. Although nobody uses non-default sysfs attributes today, there's minimal impact to preserving the notion that maybe, some day, somebody might, so we may as well keep up appearances. Acked-by: Mark Rutland Signed-off-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/kernel/perf_event.c | 124 +++++++-------------------------- 1 file changed, 27 insertions(+), 97 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index c808e346c14b..20deae014783 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1131,7 +1131,10 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) return 0; } -static int armv8_pmu_init(struct arm_pmu *cpu_pmu) +static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, + int (*map_event)(struct perf_event *event), + const struct attribute_group *events, + const struct attribute_group *format) { int ret = armv8pmu_probe_pmu(cpu_pmu); if (ret) @@ -1152,135 +1155,62 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->sched_task = armv8pmu_sched_task; cpu_pmu->branch_reset = armv8pmu_branch_reset; + cpu_pmu->name = name; + cpu_pmu->map_event = map_event; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ? + events : &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? + format : &armv8_pmuv3_format_attr_group; + return 0; } static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_pmuv3"; - cpu_pmu->map_event = armv8_pmuv3_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; - - return 0; + return armv8_pmu_init(cpu_pmu, "armv8_pmuv3", + armv8_pmuv3_map_event, NULL, NULL); } static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cortex_a35"; - cpu_pmu->map_event = armv8_a53_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; - - return 0; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", + armv8_a53_map_event, NULL, NULL); } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cortex_a53"; - cpu_pmu->map_event = armv8_a53_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; - - return 0; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", + armv8_a53_map_event, NULL, NULL); } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cortex_a57"; - cpu_pmu->map_event = armv8_a57_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; - - return 0; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", + armv8_a57_map_event, NULL, NULL); } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cortex_a72"; - cpu_pmu->map_event = armv8_a57_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; - - return 0; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", + armv8_a57_map_event, NULL, NULL); } static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cortex_a73"; - cpu_pmu->map_event = armv8_a73_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; - - return 0; + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", + armv8_a73_map_event, NULL, NULL); } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_cavium_thunder"; - cpu_pmu->map_event = armv8_thunder_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; - - return 0; + return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", + armv8_thunder_map_event, NULL, NULL); } static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) { - int ret = armv8_pmu_init(cpu_pmu); - if (ret) - return ret; - - cpu_pmu->name = "armv8_brcm_vulcan"; - cpu_pmu->map_event = armv8_vulcan_map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = - &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = - &armv8_pmuv3_format_attr_group; - - return 0; + return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", + armv8_vulcan_map_event, NULL, NULL); } static const struct of_device_id armv8_pmu_of_device_ids[] = { -- Gitee From 8326a89285afaba4dc0cffdd5fa3348e9090bcdf Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Feb 2020 19:35:32 +0000 Subject: [PATCH 032/173] arm64: perf: Support new DT compatibles [Upstream commit 29cc4ceeac1274ab8363a11b81ebd99f3b023985] Add support for matching the new PMUs. For now, this just wires them up as generic PMUv3 such that people writing DTs for new SoCs can do the right thing, and at least have architectural and raw events be usable. We can come back and fill in event maps for sysfs and/or perf tools at a later date. Acked-by: Mark Rutland Signed-off-by: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/kernel/perf_event.c | 56 ++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 20deae014783..079a1731099b 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1171,6 +1171,12 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) armv8_pmuv3_map_event, NULL, NULL); } +static int armv8_a34_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a34", + armv8_pmuv3_map_event, NULL, NULL); +} + static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) { return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", @@ -1183,12 +1189,24 @@ static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) armv8_a53_map_event, NULL, NULL); } +static int armv8_a55_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a55", + armv8_pmuv3_map_event, NULL, NULL); +} + static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", armv8_a57_map_event, NULL, NULL); } +static int armv8_a65_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a65", + armv8_pmuv3_map_event, NULL, NULL); +} + static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", @@ -1201,6 +1219,36 @@ static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) armv8_a73_map_event, NULL, NULL); } +static int armv8_a75_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a75", + armv8_pmuv3_map_event, NULL, NULL); +} + +static int armv8_a76_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a76", + armv8_pmuv3_map_event, NULL, NULL); +} + +static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a77", + armv8_pmuv3_map_event, NULL, NULL); +} + +static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_neoverse_e1", + armv8_pmuv3_map_event, NULL, NULL); +} + +static int armv8_n1_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init(cpu_pmu, "armv8_neoverse_n1", + armv8_pmuv3_map_event, NULL, NULL); +} + static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", @@ -1215,11 +1263,19 @@ static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, + {.compatible = "arm,cortex-a34-pmu", .data = armv8_a34_pmu_init}, {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, + {.compatible = "arm,cortex-a55-pmu", .data = armv8_a55_pmu_init}, {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, + {.compatible = "arm,cortex-a65-pmu", .data = armv8_a65_pmu_init}, {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init}, {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init}, + {.compatible = "arm,cortex-a75-pmu", .data = armv8_a75_pmu_init}, + {.compatible = "arm,cortex-a76-pmu", .data = armv8_a76_pmu_init}, + {.compatible = "arm,cortex-a77-pmu", .data = armv8_a77_pmu_init}, + {.compatible = "arm,neoverse-e1-pmu", .data = armv8_e1_pmu_init}, + {.compatible = "arm,neoverse-n1-pmu", .data = armv8_n1_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init}, {}, -- Gitee From f56ae9bcc3b41c40235935d2bf6d0651218ab0dc Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 2 Mar 2020 18:17:52 +0000 Subject: [PATCH 033/173] arm64: perf: Add support for ARMv8.5-PMU 64-bit counters [Upstream commit 8673e02e58410e6c4cefa499efa846286e45a991] At present ARMv8 event counters are limited to 32-bits, though by using the CHAIN event it's possible to combine adjacent counters to achieve 64-bits. The perf config1:0 bit can be set to use such a configuration. With the introduction of ARMv8.5-PMU support, all event counters can now be used as 64-bit counters. Let's enable 64-bit event counters where support exists. Unless the user sets config1:0 we will adjust the counter value such that it overflows upon 32-bit overflow. This follows the same behaviour as the cycle counter which has always been (and remains) 64-bits. Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose [Mark: fix ID field names, compare with 8.5 value] Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/include/asm/perf_event.h | 3 +- arch/arm64/include/asm/sysreg.h | 7 +++ arch/arm64/kernel/perf_event.c | 87 +++++++++++++++++++++++------ include/linux/perf/arm_pmu.h | 1 + 4 files changed, 81 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index a9932ffaaa36..c121eaf67d37 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -176,9 +176,10 @@ #define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */ #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ +#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ #define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ #define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0x7f /* Mask for writable bits */ +#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ /* * PMOVSR: counters overflow flag status reg diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index af35eab7b6e2..7683351db8a9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1250,6 +1250,13 @@ #define ID_AA64DFR0_EL1_BRBE_NI 0x0 #define ID_AA64DFR0_EL1_BRBE_IMP 0x1 #define ID_AA64DFR0_EL1_BRBE_BRBE_V1P1 0x2 + +#define ID_AA64DFR0_PMUVER_8_0 0x1 +#define ID_AA64DFR0_PMUVER_8_1 0x4 +#define ID_AA64DFR0_PMUVER_8_4 0x5 +#define ID_AA64DFR0_PMUVER_8_5 0x6 +#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf + #define ID_AA64DFR0_PMSVER_8_2 0x1 #define ID_AA64DFR0_PMSVER_8_3 0x2 diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 079a1731099b..f0f90d446021 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -346,6 +346,17 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { #define ARMV8_IDX_CYCLE_COUNTER 0 #define ARMV8_IDX_COUNTER0 1 + +/* + * We unconditionally enable ARMv8.5-PMU long event counter support + * (64-bit events) where supported. Indicate if this arm_pmu has long + * event counter support. + */ +static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) +{ + return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5); +} + /* * We must chain two programmable counters for 64 bit events, * except when we have allocated the 64bit cycle counter (for CPU @@ -355,9 +366,11 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { static inline bool armv8pmu_event_is_chained(struct perf_event *event) { int idx = event->hw.idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); return !WARN_ON(idx < 0) && armv8pmu_event_is_64bit(event) && + !armv8pmu_has_long_event(cpu_pmu) && (idx != ARMV8_IDX_CYCLE_COUNTER); } @@ -478,21 +491,60 @@ static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) return val; } +/* + * The cycle counter is always a 64-bit counter. When ARMV8_PMU_PMCR_LP + * is set the event counters also become 64-bit counters. Unless the + * user has requested a long counter (attr.config1) then we want to + * interrupt upon 32-bit overflow - we achieve this by applying a bias. + */ +static bool armv8pmu_event_needs_bias(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_64bit(event)) + return false; + + if (armv8pmu_has_long_event(cpu_pmu) || + idx == ARMV8_IDX_CYCLE_COUNTER) + return true; + + return false; +} + +static u64 armv8pmu_bias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value |= GENMASK(63, 32); + + return value; +} + +static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value) +{ + if (armv8pmu_event_needs_bias(event)) + value &= ~GENMASK(63, 32); + + return value; +} + static u64 armv8pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; u64 value = 0; - if (idx == ARMV8_IDX_CYCLE_COUNTER) + if (idx == ARMV8_IDX_CYCLE_COUNTER) { value = read_sysreg(pmccntr_el0); - else + } else { value = armv8pmu_read_hw_counter(event); + } - return value; + return armv8pmu_unbias_long_counter(event, value); } -static inline void armv8pmu_write_evcntr(int idx, u32 value) +static inline void armv8pmu_write_evcntr(int idx, u64 value) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -516,19 +568,14 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value) { struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + + value = armv8pmu_bias_long_counter(event, value); if (idx == ARMV8_IDX_CYCLE_COUNTER) { - /* - * The cycles counter is really a 64-bit counter. - * When treating it as a 32-bit counter, we only count - * the lower 32 bits, and set the upper 32-bits so that - * we get an interrupt upon 32-bit overflow. - */ - if (!armv8pmu_event_is_64bit(event)) - value |= 0xffffffff00000000ULL; write_sysreg(value, pmccntr_el0); - } else + } else { armv8pmu_write_hw_counter(event, value); + } } static inline void armv8pmu_write_evtype(int idx, u32 val) @@ -850,7 +897,8 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Otherwise use events counters */ - if (armv8pmu_event_is_64bit(event)) + if (armv8pmu_event_is_64bit(event) && + !armv8pmu_has_long_event(cpu_pmu)) return armv8pmu_get_chain_idx(cpuc, cpu_pmu); else return armv8pmu_get_single_idx(cpuc, cpu_pmu); @@ -941,6 +989,7 @@ static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; u32 idx, nb_cnt = cpu_pmu->num_events; + u32 pmcr; /* The counter and interrupt enable registers are unknown at reset. */ for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { @@ -955,8 +1004,13 @@ static void armv8pmu_reset(void *info) * Initialize & Reset PMNC. Request overflow interrupt for * 64 bit cycle counter but cheat in armv8pmu_write_counter(). */ - armv8pmu_pmcr_write(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | - ARMV8_PMU_PMCR_LC); + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; + + /* Enable long event counter support where available */ + if (armv8pmu_has_long_event(cpu_pmu)) + pmcr |= ARMV8_PMU_PMCR_LP; + + armv8pmu_pmcr_write(pmcr); if (cpu_pmu->has_branch_stack) armv8pmu_branch_reset(); @@ -1048,6 +1102,7 @@ static void __armv8pmu_probe_pmu(void *info) if (pmuver == 0xf || pmuver == 0) return; + cpu_pmu->pmuver = pmuver; probe->present = true; /* Read the nb of CNTx counters supported from PMNC */ diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 84460cc91ad7..98a43a577e8c 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -97,6 +97,7 @@ struct arm_pmu { struct pmu pmu; cpumask_t supported_cpus; char *name; + int pmuver; irqreturn_t (*handle_irq)(struct arm_pmu *pmu); void (*enable)(struct perf_event *event); void (*disable)(struct perf_event *event); -- Gitee From c88f150849b7ef9a1169e7dbe2348eb3316bfe10 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Tue, 23 May 2023 14:44:24 +0800 Subject: [PATCH 034/173] perf: hisi: Extract hisi_pmu_init [Upstream commit e500405dd15d956790859fa532c64d8186445372] Extract the initialization code of hisi_pmu->pmu into a function Signed-off-by: Chen Jun Link: https://lore.kernel.org/r/20220516131601.48383-1-chenjun102@huawei.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 16 +-------------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 16 +-------------- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 20 +++++-------------- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 16 +-------------- drivers/perf/hisilicon/hisi_uncore_pmu.c | 18 +++++++++++++++++ drivers/perf/hisilicon/hisi_uncore_pmu.h | 2 ++ drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 15 +------------- 7 files changed, 29 insertions(+), 74 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 2dd726c16461..2d5b485c32d8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -513,21 +513,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); - ddrc_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = ddrc_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&ddrc_pmu->pmu, name, ddrc_pmu->pmu_events.attr_groups, THIS_MODULE); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 6db57bfa3d1a..b0de08eee317 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -524,21 +524,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) return ret; } - hha_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = hha_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&hha_pmu->pmu, name, hha_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 962cad5d86c8..4f97020be4d5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -558,21 +558,11 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) return ret; } - l3c_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = l3c_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + /* + * CCL_ID is used to identify the L3C in the same SCCL which was + * used _UID by mistake. + */ + hisi_pmu_init(&l3c_pmu->pmu, name, l3c_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index cec593621fa9..379bd796d8d0 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -501,21 +501,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) return ret; } - pa_pmu->pmu = (struct pmu) { - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = pa_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; - + hisi_pmu_init(&pa_pmu->pmu, name, pa_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&pa_pmu->pmu, name, -1); if (ret) { dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 02910e93cf57..57db7aec8059 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -538,4 +538,22 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); +void hisi_pmu_init(struct pmu *pmu, const char *name, + const struct attribute_group **attr_groups, struct module *module) +{ + pmu->name = name; + pmu->module = module; + pmu->task_ctx_nr = perf_invalid_context; + pmu->event_init = hisi_uncore_pmu_event_init; + pmu->pmu_enable = hisi_uncore_pmu_enable; + pmu->pmu_disable = hisi_uncore_pmu_disable; + pmu->add = hisi_uncore_pmu_add; + pmu->del = hisi_uncore_pmu_del; + pmu->start = hisi_uncore_pmu_start; + pmu->stop = hisi_uncore_pmu_stop; + pmu->read = hisi_uncore_pmu_read; + pmu->attr_groups = attr_groups; +} +EXPORT_SYMBOL_GPL(hisi_pmu_init); + MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index e97a8f1fd46f..73337f6ae51b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -128,4 +128,6 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); +void hisi_pmu_init(struct pmu *pmu, const char *name, + const struct attribute_group **attr_groups, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 46be312fa126..5554c6ac5b3a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -445,20 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) return ret; } - sllc_pmu->pmu = (struct pmu) { - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = sllc_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(&sllc_pmu->pmu, name, sllc_pmu->pmu_events.attr_groups, THIS_MODULE); ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); if (ret) { -- Gitee From 182e4888d6758522dfa9ea109339d2025013dacf Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 23 May 2023 14:44:25 +0800 Subject: [PATCH 035/173] drivers/perf: hisi: Advertise the PERF_PMU_CAP_NO_EXCLUDE capability [Upstream commit 7f95da9d2dc4c20bb374c281ceb8fa40b6208f4b] Missed initialization the variable of pmu::capabilities when extract the initialization code of hisi_pmu->pmu into a function. HISI UNCORE PMU drivers counters that not support context exclusion. So we have to advertise the PERF_PMU_CAP_NO_EXCLUDE capability. This ensures that perf will prevent us from handling events where any exclusion flags are set. Signed-off-by: Junhao He Link: https://lore.kernel.org/r/20230119100307.3660-2-hejunhao3@huawei.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 57db7aec8059..37c81b45355e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -553,6 +553,7 @@ void hisi_pmu_init(struct pmu *pmu, const char *name, pmu->stop = hisi_uncore_pmu_stop; pmu->read = hisi_uncore_pmu_read; pmu->attr_groups = attr_groups; + pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } EXPORT_SYMBOL_GPL(hisi_pmu_init); -- Gitee From 60b74c041bb22b63011570842cd42262046104a1 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 23 May 2023 14:44:26 +0800 Subject: [PATCH 036/173] drivers/perf: hisi: Simplify the parameters of hisi_pmu_init() [Upstream commit 053b5579dacfc5763dda0c073ee14147421d32d7] Use "hisi_pmu" to simplify the parameter list for the hisi_pmu_init() function. Signed-off-by: Junhao He Link: https://lore.kernel.org/r/20230119100307.3660-3-hejunhao3@huawei.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.c | 8 +++++--- drivers/perf/hisilicon/hisi_uncore_pmu.h | 4 ++-- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +- 7 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 2d5b485c32d8..3edbecbb3751 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -513,7 +513,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); - hisi_pmu_init(&ddrc_pmu->pmu, name, ddrc_pmu->pmu_events.attr_groups, THIS_MODULE); + hisi_pmu_init(ddrc_pmu, name, THIS_MODULE); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index b0de08eee317..51cbafb3ee8d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -524,7 +524,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(&hha_pmu->pmu, name, hha_pmu->pmu_events.attr_groups, THIS_MODULE); + hisi_pmu_init(hha_pmu, name, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 4f97020be4d5..4d79d40ee0fa 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -562,7 +562,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) * CCL_ID is used to identify the L3C in the same SCCL which was * used _UID by mistake. */ - hisi_pmu_init(&l3c_pmu->pmu, name, l3c_pmu->pmu_events.attr_groups, THIS_MODULE); + hisi_pmu_init(l3c_pmu, name, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 379bd796d8d0..f5f32b61f032 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -501,7 +501,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(&pa_pmu->pmu, name, pa_pmu->pmu_events.attr_groups, THIS_MODULE); + hisi_pmu_init(pa_pmu, name, THIS_MODULE); ret = perf_pmu_register(&pa_pmu->pmu, name, -1); if (ret) { dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 37c81b45355e..ce29e3921691 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -538,9 +538,11 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); -void hisi_pmu_init(struct pmu *pmu, const char *name, - const struct attribute_group **attr_groups, struct module *module) +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, + struct module *module) { + struct pmu *pmu = &hisi_pmu->pmu; + pmu->name = name; pmu->module = module; pmu->task_ctx_nr = perf_invalid_context; @@ -552,7 +554,7 @@ void hisi_pmu_init(struct pmu *pmu, const char *name, pmu->start = hisi_uncore_pmu_start; pmu->stop = hisi_uncore_pmu_stop; pmu->read = hisi_uncore_pmu_read; - pmu->attr_groups = attr_groups; + pmu->attr_groups = hisi_pmu->pmu_events.attr_groups; pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } EXPORT_SYMBOL_GPL(hisi_pmu_init); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 73337f6ae51b..8ffb5436dabd 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -128,6 +128,6 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); -void hisi_pmu_init(struct pmu *pmu, const char *name, - const struct attribute_group **attr_groups, struct module *module); +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, + struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 5554c6ac5b3a..20d44f575008 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -445,7 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(&sllc_pmu->pmu, name, sllc_pmu->pmu_events.attr_groups, THIS_MODULE); + hisi_pmu_init(sllc_pmu, name, THIS_MODULE); ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); if (ret) { -- Gitee From cc3000dd478857b5ad2fe595bbcd2541873a7db3 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 23 May 2023 14:44:27 +0800 Subject: [PATCH 037/173] drivers/perf: hisi: Extract initialization of "cpa_pmu->pmu" [Upstream commit e126f6f42f89baee09e088ab6bc48f83ac3a0eae] Use hisi_pmu_init() function to simplify initialization of "cpa_pmu->pmu". Signed-off-by: Junhao He Link: https://lore.kernel.org/r/20230119100307.3660-4-hejunhao3@huawei.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 09839dae9b7c..1a75351013f1 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -319,21 +319,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (!name) return -ENOMEM; - cpa_pmu->pmu = (struct pmu) { - .name = name, - .module = THIS_MODULE, - .task_ctx_nr = perf_invalid_context, - .event_init = hisi_uncore_pmu_event_init, - .pmu_enable = hisi_uncore_pmu_enable, - .pmu_disable = hisi_uncore_pmu_disable, - .add = hisi_uncore_pmu_add, - .del = hisi_uncore_pmu_del, - .start = hisi_uncore_pmu_start, - .stop = hisi_uncore_pmu_stop, - .read = hisi_uncore_pmu_read, - .attr_groups = cpa_pmu->pmu_events.attr_groups, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, - }; + hisi_pmu_init(cpa_pmu, name, THIS_MODULE); /* Power Management should be disabled before using CPA PMU. */ hisi_cpa_pmu_disable_pm(cpa_pmu); -- Gitee From 18f8e2ce50b1f4ecd8e99af10c93ad17cb2fee94 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 23 May 2023 14:44:28 +0800 Subject: [PATCH 038/173] drivers/perf: hisi: Remove redundant initialized of pmu->name [Upstream commit 25d8c25025a46e7621edde2eb6d5f55c6d29ee86] "pmu->name" is initialized by perf_pmu_register() function, so remove the redundant initialized in hisi_pmu_init(). Signed-off-by: Junhao He Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 +--- drivers/perf/hisilicon/hisi_uncore_pmu.h | 3 +-- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +- 8 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 1a75351013f1..9322fc3dac07 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -319,7 +319,7 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (!name) return -ENOMEM; - hisi_pmu_init(cpa_pmu, name, THIS_MODULE); + hisi_pmu_init(cpa_pmu, THIS_MODULE); /* Power Management should be disabled before using CPA PMU. */ hisi_cpa_pmu_disable_pm(cpa_pmu); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 3edbecbb3751..df3a38598153 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -513,7 +513,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, ddrc_pmu->index_id); - hisi_pmu_init(ddrc_pmu, name, THIS_MODULE); + hisi_pmu_init(ddrc_pmu, THIS_MODULE); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 51cbafb3ee8d..10466ac4259e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -524,7 +524,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(hha_pmu, name, THIS_MODULE); + hisi_pmu_init(hha_pmu, THIS_MODULE); ret = perf_pmu_register(&hha_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 4d79d40ee0fa..14f917791944 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -562,7 +562,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) * CCL_ID is used to identify the L3C in the same SCCL which was * used _UID by mistake. */ - hisi_pmu_init(l3c_pmu, name, THIS_MODULE); + hisi_pmu_init(l3c_pmu, THIS_MODULE); ret = perf_pmu_register(&l3c_pmu->pmu, name, -1); if (ret) { diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index f5f32b61f032..bce7f7d7cd09 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -501,7 +501,7 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(pa_pmu, name, THIS_MODULE); + hisi_pmu_init(pa_pmu, THIS_MODULE); ret = perf_pmu_register(&pa_pmu->pmu, name, -1); if (ret) { dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index ce29e3921691..db6936da22f1 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -538,12 +538,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); -void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, - struct module *module) +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { struct pmu *pmu = &hisi_pmu->pmu; - pmu->name = name; pmu->module = module; pmu->task_ctx_nr = perf_invalid_context; pmu->event_init = hisi_uncore_pmu_event_init; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 8ffb5436dabd..aca7d9998bce 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -128,6 +128,5 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); -void hisi_pmu_init(struct hisi_pmu *hisi_pmu, const char *name, - struct module *module); +void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 20d44f575008..771d98ef8712 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -445,7 +445,7 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) return ret; } - hisi_pmu_init(sllc_pmu, name, THIS_MODULE); + hisi_pmu_init(sllc_pmu, THIS_MODULE); ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); if (ret) { -- Gitee From f20d8ffce1afab6d185cc40c0c3d416abdccfeb7 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 25 Aug 2023 11:34:34 +0800 Subject: [PATCH 039/173] drivers/perf: hisi: Add support for HiSilicon UC PMU driver [Upstream commit 312eca95e28d40694aee7c78a18ac0ecfd6d8159] On HiSilicon Hip09 platform, there are 4 UC (unified cache) modules on each chip CCL (CPU Cluster). UC is a cache that provides coherence between NUMA and UMA domains. It is located between L2 and Memory System. Many PMU events are supported. Let's support the UC PMU driver using the HiSilicon uncore PMU framework. * rd_req_en : rd_req_en is the abbreviation of read request tracetag enable and allows user to count only read operations. Details are listed in the hisi-pmu document at Documentation/admin-guide/perf/hisi-pmu.rst * srcid_en & srcid: Allows users to filter statistical information based on specific CPU/ICL by srcid. srcid_en depends on rd_req_en being enabled. * uring_channel: Allows users to filter statistical information based on the specified tx request uring channel. uring_channel only supported events: [0x47 ~ 0x59]. Signed-off-by: Junhao He Reviewed-by: Yicong Yang Reviewed-by: Jonathan Cameron Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230615125926.29832-3-hejunhao3@huawei.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- drivers/perf/hisilicon/Makefile | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 +- drivers/perf/hisilicon/hisi_uncore_pmu.h | 6 + drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 578 ++++++++++++++++++++ 4 files changed, 588 insertions(+), 2 deletions(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_uc_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index fc7ed8c6cc0f..ba35e812caab 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index db6936da22f1..67989b156fa3 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -20,7 +20,6 @@ #include "hisi_uncore_pmu.h" -#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) #define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0)) /* @@ -226,6 +225,9 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) hwc->idx = -1; hwc->config_base = event->attr.config; + if (hisi_pmu->ops->check_filter && hisi_pmu->ops->check_filter(event)) + return -EINVAL; + /* Enforce to use the same CPU for all events in this PMU */ event->cpu = hisi_pmu->on_cpu; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index aca7d9998bce..7e074f8cda1a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -42,9 +42,15 @@ return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \ } +#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) + +#define HISI_PMU_EVTYPE_BITS 8 +#define HISI_PMU_EVTYPE_SHIFT(idx) ((idx) % 4 * HISI_PMU_EVTYPE_BITS) + struct hisi_pmu; struct hisi_uncore_ops { + int (*check_filter)(struct perf_event *event); void (*write_evtype)(struct hisi_pmu *, int, u32); int (*get_event_idx)(struct perf_event *); u64 (*read_counter)(struct hisi_pmu *, struct hw_perf_event *); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c new file mode 100644 index 000000000000..63da05e5831c --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC UC (unified cache) uncore Hardware event counters support + * + * Copyright (C) 2023 HiSilicon Limited + * + * This code is based on the uncore PMUs like hisi_uncore_l3c_pmu. + */ +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* Dynamic CPU hotplug state used by UC PMU */ +static enum cpuhp_state hisi_uc_pmu_online; + +/* UC register definition */ +#define HISI_UC_INT_MASK_REG 0x0800 +#define HISI_UC_INT_STS_REG 0x0808 +#define HISI_UC_INT_CLEAR_REG 0x080c +#define HISI_UC_TRACETAG_CTRL_REG 0x1b2c +#define HISI_UC_TRACETAG_REQ_MSK GENMASK(9, 7) +#define HISI_UC_TRACETAG_MARK_EN BIT(0) +#define HISI_UC_TRACETAG_REQ_EN (HISI_UC_TRACETAG_MARK_EN | BIT(2)) +#define HISI_UC_TRACETAG_SRCID_EN BIT(3) +#define HISI_UC_SRCID_CTRL_REG 0x1b40 +#define HISI_UC_SRCID_MSK GENMASK(14, 1) +#define HISI_UC_EVENT_CTRL_REG 0x1c00 +#define HISI_UC_EVENT_TRACETAG_EN BIT(29) +#define HISI_UC_EVENT_URING_MSK GENMASK(28, 27) +#define HISI_UC_EVENT_GLB_EN BIT(26) +#define HISI_UC_VERSION_REG 0x1cf0 +#define HISI_UC_EVTYPE_REGn(n) (0x1d00 + (n) * 4) +#define HISI_UC_EVTYPE_MASK GENMASK(7, 0) +#define HISI_UC_CNTR_REGn(n) (0x1e00 + (n) * 8) + +#define HISI_UC_NR_COUNTERS 0x8 +#define HISI_UC_V2_NR_EVENTS 0xFF +#define HISI_UC_CNTR_REG_BITS 64 + +#define HISI_UC_RD_REQ_TRACETAG 0x4 +#define HISI_UC_URING_EVENT_MIN 0x47 +#define HISI_UC_URING_EVENT_MAX 0x59 + +HISI_PMU_EVENT_ATTR_EXTRACTOR(rd_req_en, config1, 0, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(uring_channel, config1, 5, 4); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid, config1, 19, 6); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_en, config1, 20, 20); + +static int hisi_uc_pmu_check_filter(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + + if (hisi_get_srcid_en(event) && !hisi_get_rd_req_en(event)) { + dev_err(uc_pmu->dev, + "rcid_en depends on rd_req_en being enabled!\n"); + return -EINVAL; + } + + if (!hisi_get_uring_channel(event)) + return 0; + + if ((HISI_GET_EVENTID(event) < HISI_UC_URING_EVENT_MIN) || + (HISI_GET_EVENTID(event) > HISI_UC_URING_EVENT_MAX)) + dev_warn(uc_pmu->dev, + "Only events: [%#x ~ %#x] support channel filtering!", + HISI_UC_URING_EVENT_MIN, HISI_UC_URING_EVENT_MAX); + + return 0; +} + +static void hisi_uc_pmu_config_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_rd_req_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* The request-type has been configured */ + if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == HISI_UC_RD_REQ_TRACETAG) + return; + + /* Set request-type for tracetag, only read request is supported! */ + val &= ~HISI_UC_TRACETAG_REQ_MSK; + val |= FIELD_PREP(HISI_UC_TRACETAG_REQ_MSK, HISI_UC_RD_REQ_TRACETAG); + val |= HISI_UC_TRACETAG_REQ_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); +} + +static void hisi_uc_pmu_clear_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_rd_req_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the request-type tracetag has been cleaned up */ + if (FIELD_GET(HISI_UC_TRACETAG_REQ_MSK, val) == 0) + return; + + /* Clear request-type */ + val &= ~HISI_UC_TRACETAG_REQ_MSK; + val &= ~HISI_UC_TRACETAG_REQ_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); +} + +static void hisi_uc_pmu_config_srcid_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_srcid_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the source id has been configured */ + if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val)) + return; + + /* Enable source id tracetag */ + val |= HISI_UC_TRACETAG_SRCID_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + val &= ~HISI_UC_SRCID_MSK; + val |= FIELD_PREP(HISI_UC_SRCID_MSK, hisi_get_srcid(event)); + writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + + /* Depend on request-type tracetag enabled */ + hisi_uc_pmu_config_req_tracetag(event); +} + +static void hisi_uc_pmu_clear_srcid_tracetag(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + if (!hisi_get_srcid_en(event)) + return; + + val = readl(uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + /* Do nothing, the source id has been cleaned up */ + if (FIELD_GET(HISI_UC_TRACETAG_SRCID_EN, val) == 0) + return; + + hisi_uc_pmu_clear_req_tracetag(event); + + /* Disable source id tracetag */ + val &= ~HISI_UC_TRACETAG_SRCID_EN; + writel(val, uc_pmu->base + HISI_UC_TRACETAG_CTRL_REG); + + val = readl(uc_pmu->base + HISI_UC_SRCID_CTRL_REG); + val &= ~HISI_UC_SRCID_MSK; + writel(val, uc_pmu->base + HISI_UC_SRCID_CTRL_REG); +} + +static void hisi_uc_pmu_config_uring_channel(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 uring_channel = hisi_get_uring_channel(event); + u32 val; + + /* Do nothing if not being set or is set explicitly to zero (default) */ + if (uring_channel == 0) + return; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + + /* Do nothing, the uring_channel has been configured */ + if (uring_channel == FIELD_GET(HISI_UC_EVENT_URING_MSK, val)) + return; + + val &= ~HISI_UC_EVENT_URING_MSK; + val |= FIELD_PREP(HISI_UC_EVENT_URING_MSK, uring_channel); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_clear_uring_channel(struct perf_event *event) +{ + struct hisi_pmu *uc_pmu = to_hisi_pmu(event->pmu); + u32 val; + + /* Do nothing if not being set or is set explicitly to zero (default) */ + if (hisi_get_uring_channel(event) == 0) + return; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + + /* Do nothing, the uring_channel has been cleaned up */ + if (FIELD_GET(HISI_UC_EVENT_URING_MSK, val) == 0) + return; + + val &= ~HISI_UC_EVENT_URING_MSK; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 == 0) + return; + + hisi_uc_pmu_config_uring_channel(event); + hisi_uc_pmu_config_req_tracetag(event); + hisi_uc_pmu_config_srcid_tracetag(event); +} + +static void hisi_uc_pmu_disable_filter(struct perf_event *event) +{ + if (event->attr.config1 == 0) + return; + + hisi_uc_pmu_clear_srcid_tracetag(event); + hisi_uc_pmu_clear_req_tracetag(event); + hisi_uc_pmu_clear_uring_channel(event); +} + +static void hisi_uc_pmu_write_evtype(struct hisi_pmu *uc_pmu, int idx, u32 type) +{ + u32 val; + + /* + * Select the appropriate event select register. + * There are 2 32-bit event select registers for the + * 8 hardware counters, each event code is 8-bit wide. + */ + val = readl(uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4)); + val &= ~(HISI_UC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); + writel(val, uc_pmu->base + HISI_UC_EVTYPE_REGn(idx / 4)); +} + +static void hisi_uc_pmu_start_counters(struct hisi_pmu *uc_pmu) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val |= HISI_UC_EVENT_GLB_EN; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_stop_counters(struct hisi_pmu *uc_pmu) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val &= ~HISI_UC_EVENT_GLB_EN; + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_enable_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Enable counter index */ + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val |= (1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static void hisi_uc_pmu_disable_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index */ + val = readl(uc_pmu->base + HISI_UC_EVENT_CTRL_REG); + val &= ~(1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_EVENT_CTRL_REG); +} + +static u64 hisi_uc_pmu_read_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + return readq(uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx)); +} + +static void hisi_uc_pmu_write_counter(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, uc_pmu->base + HISI_UC_CNTR_REGn(hwc->idx)); +} + +static void hisi_uc_pmu_enable_counter_int(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG); + val &= ~(1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG); +} + +static void hisi_uc_pmu_disable_counter_int(struct hisi_pmu *uc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(uc_pmu->base + HISI_UC_INT_MASK_REG); + val |= (1 << hwc->idx); + writel(val, uc_pmu->base + HISI_UC_INT_MASK_REG); +} + +static u32 hisi_uc_pmu_get_int_status(struct hisi_pmu *uc_pmu) +{ + return readl(uc_pmu->base + HISI_UC_INT_STS_REG); +} + +static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx) +{ + writel(1 << idx, uc_pmu->base + HISI_UC_INT_CLEAR_REG); +} + +static int hisi_uc_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *uc_pmu) +{ + /* + * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to + * identify the topology information of UC PMU devices in the chip. + * They have some CCLs per SCCL and then 4 UC PMU per CCL. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &uc_pmu->sccl_id)) { + dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", + &uc_pmu->ccl_id)) { + dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", + &uc_pmu->sub_id)) { + dev_err(&pdev->dev, "Can not read sub-id!\n"); + return -EINVAL; + } + + uc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(uc_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for uc_pmu resource\n"); + return PTR_ERR(uc_pmu->base); + } + + uc_pmu->identifier = readl(uc_pmu->base + HISI_UC_VERSION_REG); + + return 0; +} + +static struct attribute *hisi_uc_pmu_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(rd_req_en, "config1:0-0"), + HISI_PMU_FORMAT_ATTR(uring_channel, "config1:4-5"), + HISI_PMU_FORMAT_ATTR(srcid, "config1:6-19"), + HISI_PMU_FORMAT_ATTR(srcid_en, "config1:20-20"), + NULL +}; + +static const struct attribute_group hisi_uc_pmu_format_group = { + .name = "format", + .attrs = hisi_uc_pmu_format_attr, +}; + +static struct attribute *hisi_uc_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(sq_time, 0x00), + HISI_PMU_EVENT_ATTR(pq_time, 0x01), + HISI_PMU_EVENT_ATTR(hbm_time, 0x02), + HISI_PMU_EVENT_ATTR(iq_comp_time_cring, 0x03), + HISI_PMU_EVENT_ATTR(iq_comp_time_uring, 0x05), + HISI_PMU_EVENT_ATTR(cpu_rd, 0x10), + HISI_PMU_EVENT_ATTR(cpu_rd64, 0x17), + HISI_PMU_EVENT_ATTR(cpu_rs64, 0x19), + HISI_PMU_EVENT_ATTR(cpu_mru, 0x1a), + HISI_PMU_EVENT_ATTR(cycles, 0x9c), + HISI_PMU_EVENT_ATTR(spipe_hit, 0xb3), + HISI_PMU_EVENT_ATTR(hpipe_hit, 0xdb), + HISI_PMU_EVENT_ATTR(cring_rxdat_cnt, 0xfa), + HISI_PMU_EVENT_ATTR(cring_txdat_cnt, 0xfb), + HISI_PMU_EVENT_ATTR(uring_rxdat_cnt, 0xfc), + HISI_PMU_EVENT_ATTR(uring_txdat_cnt, 0xfd), + NULL +}; + +static const struct attribute_group hisi_uc_pmu_events_group = { + .name = "events", + .attrs = hisi_uc_pmu_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_uc_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = { + .attrs = hisi_uc_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_uc_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_uc_pmu_identifier_attrs[] = { + &hisi_uc_pmu_identifier_attr.attr, + NULL +}; + +static const struct attribute_group hisi_uc_pmu_identifier_group = { + .attrs = hisi_uc_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_uc_pmu_attr_groups[] = { + &hisi_uc_pmu_format_group, + &hisi_uc_pmu_events_group, + &hisi_uc_pmu_cpumask_attr_group, + &hisi_uc_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_uc_pmu_ops = { + .check_filter = hisi_uc_pmu_check_filter, + .write_evtype = hisi_uc_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_uc_pmu_start_counters, + .stop_counters = hisi_uc_pmu_stop_counters, + .enable_counter = hisi_uc_pmu_enable_counter, + .disable_counter = hisi_uc_pmu_disable_counter, + .enable_counter_int = hisi_uc_pmu_enable_counter_int, + .disable_counter_int = hisi_uc_pmu_disable_counter_int, + .write_counter = hisi_uc_pmu_write_counter, + .read_counter = hisi_uc_pmu_read_counter, + .get_int_status = hisi_uc_pmu_get_int_status, + .clear_int_status = hisi_uc_pmu_clear_int_status, + .enable_filter = hisi_uc_pmu_enable_filter, + .disable_filter = hisi_uc_pmu_disable_filter, +}; + +static int hisi_uc_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *uc_pmu) +{ + int ret; + + ret = hisi_uc_pmu_init_data(pdev, uc_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(uc_pmu, pdev); + if (ret) + return ret; + + uc_pmu->pmu_events.attr_groups = hisi_uc_pmu_attr_groups; + uc_pmu->check_event = HISI_UC_EVTYPE_MASK; + uc_pmu->ops = &hisi_uncore_uc_pmu_ops; + uc_pmu->counter_bits = HISI_UC_CNTR_REG_BITS; + uc_pmu->num_counters = HISI_UC_NR_COUNTERS; + uc_pmu->dev = &pdev->dev; + uc_pmu->on_cpu = -1; + + return 0; +} + +static void hisi_uc_pmu_remove_cpuhp_instance(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_uc_pmu_online, hotplug_node); +} + +static void hisi_uc_pmu_unregister_pmu(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_uc_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *uc_pmu; + char *name; + int ret; + + uc_pmu = devm_kzalloc(&pdev->dev, sizeof(*uc_pmu), GFP_KERNEL); + if (!uc_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, uc_pmu); + + ret = hisi_uc_pmu_dev_probe(pdev, uc_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u", + uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(hisi_uc_pmu_online, &uc_pmu->node); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Error registering hotplug\n"); + + ret = devm_add_action_or_reset(&pdev->dev, + hisi_uc_pmu_remove_cpuhp_instance, + &uc_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(uc_pmu, THIS_MODULE); + + ret = perf_pmu_register(&uc_pmu->pmu, name, -1); + if (ret) + return ret; + + return devm_add_action_or_reset(&pdev->dev, + hisi_uc_pmu_unregister_pmu, + &uc_pmu->pmu); +} + +static const struct acpi_device_id hisi_uc_pmu_acpi_match[] = { + { "HISI0291", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_uc_pmu_acpi_match); + +static struct platform_driver hisi_uc_pmu_driver = { + .driver = { + .name = "hisi_uc_pmu", + .acpi_match_table = hisi_uc_pmu_acpi_match, + /* + * We have not worked out a safe bind/unbind process, + * Forcefully unbinding during sampling will lead to a + * kernel panic, so this is not supported yet. + */ + .suppress_bind_attrs = true, + }, + .probe = hisi_uc_pmu_probe, +}; + +static int __init hisi_uc_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/hisi/uc:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("UC PMU: Error setup hotplug, ret = %d\n", ret); + return ret; + } + hisi_uc_pmu_online = ret; + + ret = platform_driver_register(&hisi_uc_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_uc_pmu_online); + + return ret; +} +module_init(hisi_uc_pmu_module_init); + +static void __exit hisi_uc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_uc_pmu_driver); + cpuhp_remove_multi_state(hisi_uc_pmu_online); +} +module_exit(hisi_uc_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Junhao He "); -- Gitee From fd4b41ac9d006baddb7c8ca42680cd8262d10f28 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:51 +0800 Subject: [PATCH 040/173] drivers/perf: hisi: Define a symbol namespace for HiSilicon Uncore PMUs [Upstream commit 41729809ac8504abb7ac757105c6db2c2fbbc466] The HiSilicon Uncore PMU framework implements some common functions and exported them to the drivers. Use a specific HISI_PMU namespace for the exported symbols to avoid pollute the generic ones. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_pmu.c | 36 +++++++++---------- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 1 + 8 files changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 9322fc3dac07..e69ceea8463b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -391,6 +391,7 @@ static void __exit hisi_cpa_pmu_module_exit(void) } module_exit(hisi_cpa_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Qi Liu "); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index df3a38598153..a54711ba491d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -587,6 +587,7 @@ static void __exit hisi_ddrc_pmu_module_exit(void) } module_exit(hisi_ddrc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 10466ac4259e..1df877faa68e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -587,6 +587,7 @@ static void __exit hisi_hha_pmu_module_exit(void) } module_exit(hisi_hha_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 14f917791944..15298249f096 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -625,6 +625,7 @@ static void __exit hisi_l3c_pmu_module_exit(void) } module_exit(hisi_l3c_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Anurup M "); diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index bce7f7d7cd09..910279b93fc6 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -573,6 +573,7 @@ static void __exit hisi_pa_pmu_module_exit(void) } module_exit(hisi_pa_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 67989b156fa3..87a42828e743 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -48,7 +48,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev, return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var); } -EXPORT_SYMBOL_GPL(hisi_event_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, "HISI_PMU"); /* * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show @@ -60,7 +60,7 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, return sprintf(buf, "%d\n", hisi_pmu->on_cpu); } -EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, "HISI_PMU"); static bool hisi_validate_event_group(struct perf_event *event) { @@ -110,7 +110,7 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) return idx; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, "HISI_PMU"); ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, struct device_attribute *attr, @@ -120,7 +120,7 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, return snprintf(page, PAGE_SIZE, "0x%08x\n", hisi_pmu->identifier); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, "HISI_PMU"); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { @@ -179,7 +179,7 @@ int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, "HISI_PMU"); int hisi_uncore_pmu_event_init(struct perf_event *event) { @@ -233,7 +233,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, "HISI_PMU"); /* * Set the counter to count the event that we're interested in, @@ -287,7 +287,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event) /* Write start value to the hardware event counter */ hisi_pmu->ops->write_counter(hisi_pmu, hwc, val); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, "HISI_PMU"); void hisi_uncore_pmu_event_update(struct perf_event *event) { @@ -308,7 +308,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event) HISI_MAX_PERIOD(hisi_pmu->counter_bits); local64_add(delta, &event->count); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, "HISI_PMU"); void hisi_uncore_pmu_start(struct perf_event *event, int flags) { @@ -331,7 +331,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags) hisi_uncore_pmu_enable_event(event); perf_event_update_userpage(event); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, "HISI_PMU"); void hisi_uncore_pmu_stop(struct perf_event *event, int flags) { @@ -348,7 +348,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags) hisi_uncore_pmu_event_update(event); hwc->state |= PERF_HES_UPTODATE; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, "HISI_PMU"); int hisi_uncore_pmu_add(struct perf_event *event, int flags) { @@ -371,7 +371,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, "HISI_PMU"); void hisi_uncore_pmu_del(struct perf_event *event, int flags) { @@ -383,14 +383,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, "HISI_PMU"); void hisi_uncore_pmu_read(struct perf_event *event) { /* Read hardware counter and update the perf counter statistics */ hisi_uncore_pmu_event_update(event); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, "HISI_PMU"); void hisi_uncore_pmu_enable(struct pmu *pmu) { @@ -403,7 +403,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu) hisi_pmu->ops->start_counters(hisi_pmu); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, "HISI_PMU"); void hisi_uncore_pmu_disable(struct pmu *pmu) { @@ -411,7 +411,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) hisi_pmu->ops->stop_counters(hisi_pmu); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, "HISI_PMU"); /* @@ -505,7 +505,7 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, "HISI_PMU"); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { @@ -538,7 +538,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, "HISI_PMU"); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { @@ -557,6 +557,6 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) pmu->attr_groups = hisi_pmu->pmu_events.attr_groups; pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } -EXPORT_SYMBOL_GPL(hisi_pmu_init); +EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, "HISI_PMU"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 771d98ef8712..a2e09822ac41 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -511,6 +511,7 @@ static void __exit hisi_sllc_pmu_module_exit(void) } module_exit(hisi_sllc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index 63da05e5831c..d90bb8c877b8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -573,6 +573,7 @@ static void __exit hisi_uc_pmu_module_exit(void) } module_exit(hisi_uc_pmu_module_exit); +MODULE_IMPORT_NS("HISI_PMU"); MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Junhao He "); -- Gitee From 93481ef09d93b70a0f57aab990be8ffad5bd2dd3 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:52 +0800 Subject: [PATCH 041/173] drivers/perf: hisi: Don't update the associated_cpus on CPU offline [Upstream commit f2368a209a713267b68f7a4906a5012a29925410] Event will be scheduled on CPU of hisi_pmu::on_cpu which is selected from the intersection of hisi_pmu::associated_cpus and online CPUs. So the associated_cpus don't need to be maintained with online CPUs. This will save one update operation if one associated CPU is offlined. Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 87a42828e743..f9044f2ecb55 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -514,9 +514,6 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) cpumask_t pmu_online_cpus; unsigned int target; - if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus)) - return 0; - /* Nothing to do if this CPU doesn't own the PMU */ if (hisi_pmu->on_cpu != cpu) return 0; -- Gitee From c12cebba0ed36092496d8dc5758fe1028b76740d Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:53 +0800 Subject: [PATCH 042/173] drivers/perf: hisi: Migrate to one online CPU if no associated one online [Upstream commit 83037a47d3aa5f3e35b0c02433a87806e9c34438] If the selected CPU hisi_pmu::on_cpu goes offline, driver will select a new online CPU from hisi_pmu::associated_cpus, or if no online CPU found the PMU context won't be migrated. However for uncore PMUs the associated CPUs are just a peference and it also works to schedule the events on any online CPUs. So add a fallback to choose an online CPU if no associated CPUs found. Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index f9044f2ecb55..068422cf0916 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -525,6 +525,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus, cpu_online_mask); target = cpumask_any_but(&pmu_online_cpus, cpu); + + if (target >= nr_cpu_ids) + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) return 0; -- Gitee From 11972002d3fafca40a6e4c4f19a7bc218fe8fb8b Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:54 +0800 Subject: [PATCH 043/173] drivers/perf: hisi: Refactor the detection of associated CPUs [Upstream commit 6cd137088fdf02488ab29d11c64f66ac650ec1ad] There are two type of PMUs supported currently: 1) PMUs locate on SCCL (Super CPU Cluster [1]), associated with certain CCL (CPU cluster [1])(e.g. L3C PMU) or not (e.g. DDRC PMU) 2) PMUs locate on the SICL (Super IO Cluster [1]), which has no association with certain CPU topology (e.g. CPA PMU) Currently the associated CPUs of the PMU is detected in the cpuhp online callback as below: - for type 1) the CPUs match PMU's sccl_id and ccl_id - for type 2) PMU's sccl_id is -1 and all online CPUs will be associated Since uncore PMUs are not bound to certain CPU context and event could be counting started by any online CPU, the associated CPUs are just a preference. Below disadvantages are observed in current implementation: - the PMU cannot be used if its associated CPUs are offline - SICL PMUs are associated to all the online CPUs implicitly without the consideration of locality So refactor the way we detect the associated CPUs in below aspects: - add a clear definition of hisi_pmu::associated_cpus - initialize hisi_pmu::on_cpu based on locality if no associated CPU found, otherwise update it from associated CPUs - drop the detection with a sccl_id of -1 for SICL PMUs [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/perf/hisi-pmu.rst?h=v6.12-rc1 Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 22 +++++++++++++++------- drivers/perf/hisilicon/hisi_uncore_pmu.h | 6 +++++- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 068422cf0916..70aa27903018 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -467,10 +467,6 @@ static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu) { int sccl_id, ccl_id; - /* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */ - if (hisi_pmu->sccl_id == -1) - return true; - if (hisi_pmu->ccl_id == -1) { /* If CCL_ID is -1, the PMU only shares the same SCCL */ hisi_read_sccl_and_ccl_id(&sccl_id, NULL); @@ -488,13 +484,25 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu, node); - if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) + /* + * If the CPU is not associated to PMU, initialize the hisi_pmu->on_cpu + * based on the locality if it hasn't been initialized yet. For PMUs + * do have associated CPUs, it'll be updated later. + */ + if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) { + if (hisi_pmu->on_cpu != -1) + return 0; + + hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev)); + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); return 0; + } cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus); - /* If another CPU is already managing this PMU, simply return. */ - if (hisi_pmu->on_cpu != -1) + /* If another associated CPU is already managing this PMU, simply return. */ + if (hisi_pmu->on_cpu != -1 && + cpumask_test_cpu(hisi_pmu->on_cpu, &hisi_pmu->associated_cpus)) return 0; /* Use this CPU in cpumask for event counting */ diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 7e074f8cda1a..2d9a341910ac 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -86,7 +86,11 @@ struct hisi_pmu { const struct hisi_uncore_ops *ops; const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; - /* associated_cpus: All CPUs associated with the PMU */ + /* + * CPUs associated to the PMU and are preferred to use for counting. + * Could be empty if PMU has no association (e.g. PMU on SICL), in + * which case any online CPU will be used. + */ cpumask_t associated_cpus; /* CPU used for counting */ int on_cpu; -- Gitee From 8b6ab29c9f1773b50a3a4dbb57c3c28689246e8b Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:55 +0800 Subject: [PATCH 044/173] drivers/perf: hisi: Extract topology information to a separate structure [Upstream commit c192026ceea793a73d4b54ed46dc1cfeb21d3853] HiSilicon Uncore PMUs are identified by the IDs of the topology element on which the PMUs are located. Add a new separate struct hisi_pmu_toplogy to encapsulate this information. Add additional documentation on the meaning of each ID. - make sccl_id and sicl_id into a union since they're exclusive. It can also be accessed by scl_id if the SICL/SCCL distinction is not relevant - make index_id and sub_id signed so -1 may be used to indicate the PMU doesn't have this topology element or it could not be retrieved. This patch should have no functional changes. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 12 +++--- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 18 ++++----- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 12 +++--- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 8 ++-- drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c | 10 ++--- .../perf/hisilicon/hisi_uncore_lpddrc_pmu.c | 12 +++--- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 14 +++---- drivers/perf/hisilicon/hisi_uncore_pmu.c | 7 ++-- drivers/perf/hisilicon/hisi_uncore_pmu.h | 38 +++++++++++++++---- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 10 ++--- drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 11 +++--- 11 files changed, 87 insertions(+), 65 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index e69ceea8463b..9cffe153b554 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -184,19 +184,19 @@ static int hisi_cpa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *cpa_pmu) { if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &cpa_pmu->sicl_id)) { + &cpa_pmu->topo.sicl_id)) { dev_err(&pdev->dev, "Can not read sicl-id\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &cpa_pmu->index_id)) { + &cpa_pmu->topo.index_id)) { dev_err(&pdev->dev, "Cannot read idx-id\n"); return -EINVAL; } - cpa_pmu->ccl_id = -1; - cpa_pmu->sccl_id = -1; + cpa_pmu->topo.ccl_id = -1; + cpa_pmu->topo.sccl_id = -1; cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cpa_pmu->base)) return PTR_ERR(cpa_pmu->base); @@ -314,8 +314,8 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u", - cpa_pmu->sicl_id, cpa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%d", + cpa_pmu->topo.sicl_id, cpa_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index a54711ba491d..e649333232fe 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -304,18 +304,18 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, * DDRC PMU, while SCCL_ID is in MPIDR[aff2]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id", - &ddrc_pmu->index_id)) { + &ddrc_pmu->topo.index_id)) { dev_err(&pdev->dev, "Can not read ddrc channel-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &ddrc_pmu->sccl_id)) { + &ddrc_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n"); return -EINVAL; } /* DDRC PMUs only share the same SCCL */ - ddrc_pmu->ccl_id = -1; + ddrc_pmu->topo.ccl_id = -1; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ddrc_pmu->base = devm_ioremap_resource(&pdev->dev, res); @@ -327,7 +327,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); if (ddrc_pmu->identifier >= HISI_PMU_V2) { if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &ddrc_pmu->sub_id)) { + &ddrc_pmu->topo.sub_id)) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } @@ -505,13 +505,13 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) if (ddrc_pmu->identifier >= HISI_PMU_V2) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_ddrc%u_%u", - ddrc_pmu->sccl_id, ddrc_pmu->index_id, - ddrc_pmu->sub_id); + "hisi_sccl%d_ddrc%d_%d", + ddrc_pmu->topo.sccl_id, ddrc_pmu->topo.index_id, + ddrc_pmu->topo.sub_id); else name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, - ddrc_pmu->index_id); + "hisi_sccl%d_ddrc%d", ddrc_pmu->topo.sccl_id, + ddrc_pmu->topo.index_id); hisi_pmu_init(ddrc_pmu, THIS_MODULE); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 1df877faa68e..61272f99ab80 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -301,7 +301,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * SCCL_ID is in MPIDR[aff2]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &hha_pmu->sccl_id)) { + &hha_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read hha sccl-id!\n"); return -EINVAL; } @@ -311,7 +311,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * both "hisilicon, idx-id" as preference, if available. */ if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &hha_pmu->index_id)) { + &hha_pmu->topo.index_id)) { status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), "_UID", NULL, &id); if (ACPI_FAILURE(status)) { @@ -319,10 +319,10 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, return -EINVAL; } - hha_pmu->index_id = id; + hha_pmu->topo.index_id = id; } /* HHA PMUs only share the same SCCL */ - hha_pmu->ccl_id = -1; + hha_pmu->topo.ccl_id = -1; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); hha_pmu->base = devm_ioremap_resource(&pdev->dev, res); @@ -512,8 +512,8 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", - hha_pmu->sccl_id, hha_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_hha%d", + hha_pmu->topo.sccl_id, hha_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 15298249f096..490a0a764297 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -361,13 +361,13 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &l3c_pmu->sccl_id)) { + &l3c_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read l3c sccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &l3c_pmu->ccl_id)) { + &l3c_pmu->topo.ccl_id)) { dev_err(&pdev->dev, "Can not read l3c ccl-id!\n"); return -EINVAL; } @@ -546,8 +546,8 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->ccl_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c index f414dc1736aa..669010383394 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c @@ -180,14 +180,12 @@ static int hisi_l3t_pmu_init_data(struct platform_device *pdev, * Use the SCCL_ID and CCL_ID to identify the L3T PMU, while * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &l3t_pmu->sccl_id)) { + if (l3t_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read l3t sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &l3t_pmu->ccl_id)) { + if (l3t_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read l3t ccl-id!\n"); return -EINVAL; } @@ -322,7 +320,7 @@ static int hisi_l3t_pmu_probe(struct platform_device *pdev) if (ret) return ret; - if (device_property_read_u32(&pdev->dev, "hisilicon,index-id", &l3t_pmu->index_id)) { + if (l3t_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Can not read l3t index-id!\n"); return -EINVAL; } @@ -332,7 +330,7 @@ static int hisi_l3t_pmu_probe(struct platform_device *pdev) * used _UID by mistake. */ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3t%u", - l3t_pmu->sccl_id, l3t_pmu->index_id); + l3t_pmu->topo.sccl_id, l3t_pmu->topo.index_id); l3t_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, diff --git a/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c index 03a4bb1a9948..8157d888d3b8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c @@ -196,19 +196,17 @@ static int hisi_lpddrc_pmu_init_data(struct platform_device *pdev, * Use the SCCL_ID and LPDDRC channel ID to identify the * LPDDRC PMU, while SCCL_ID is in MPIDR[aff2]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id", - &lpddrc_pmu->index_id)) { + if (lpddrc_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Can not read lpddrc channel-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &lpddrc_pmu->sccl_id)) { + if (lpddrc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read lpddrc sccl-id!\n"); return -EINVAL; } /* LPDDRC PMUs only share the same SCCL */ - lpddrc_pmu->ccl_id = -1; + lpddrc_pmu->topo.ccl_id = -1; lpddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(lpddrc_pmu->base)) { @@ -336,8 +334,8 @@ static int hisi_lpddrc_pmu_probe(struct platform_device *pdev) return ret; name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_lpddrc%u", lpddrc_pmu->sccl_id, - lpddrc_pmu->index_id); + "hisi_sccl%u_lpddrc%u", lpddrc_pmu->topo.sccl_id, + lpddrc_pmu->topo.index_id); lpddrc_pmu->pmu = (struct pmu) { .name = name, diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 910279b93fc6..ecafdad342ef 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -274,19 +274,19 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev, * to identify the PA PMU. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &pa_pmu->sicl_id)) { + &pa_pmu->topo.sicl_id)) { dev_err(&pdev->dev, "Cannot read sicl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &pa_pmu->index_id)) { + &pa_pmu->topo.index_id)) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - pa_pmu->ccl_id = -1; - pa_pmu->sccl_id = -1; + pa_pmu->topo.ccl_id = -1; + pa_pmu->topo.sccl_id = -1; pa_pmu->dev_info = device_get_match_data(&pdev->dev); if (!pa_pmu->dev_info) @@ -488,9 +488,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u", - pa_pmu->sicl_id, pa_pmu->dev_info->name, - pa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%d", + pa_pmu->topo.sicl_id, pa_pmu->dev_info->name, + pa_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 70aa27903018..ef82fdf09225 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -465,18 +465,19 @@ static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp) */ static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu) { + struct hisi_pmu_topology *topo = &hisi_pmu->topo; int sccl_id, ccl_id; - if (hisi_pmu->ccl_id == -1) { + if (topo->ccl_id == -1) { /* If CCL_ID is -1, the PMU only shares the same SCCL */ hisi_read_sccl_and_ccl_id(&sccl_id, NULL); - return sccl_id == hisi_pmu->sccl_id; + return sccl_id == topo->sccl_id; } hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id); - return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id; + return sccl_id == topo->sccl_id && ccl_id == topo->ccl_id; } int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 2d9a341910ac..2c28a4c490f5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -80,12 +80,43 @@ struct hisi_pmu_hwevents { const struct attribute_group **attr_groups; }; +/** + * struct hisi_pmu_topology - Describe the topology hierarchy on which the PMU + * is located. + * @sccl_id: ID of the SCCL on which the PMU locate is located. + * @sicl_id: ID of the SICL on which the PMU locate is located. + * @scl_id: ID used by the core which is unaware of the SCCL/SICL. + * @ccl_id: ID of the CCL (CPU cluster) on which the PMU is located. + * @index_id: the ID of the PMU module if there're several PMUs at a + * particularly location in the topology. + * @sub_id: submodule ID of the PMU. For example we use this for DDRC PMU v2 + * since each DDRC has more than one DMC + * + * The ID will be -1 if the PMU isn't located on a certain topology. + */ +struct hisi_pmu_topology { + /* + * SCCL (Super CPU CLuster) and SICL (Super I/O Cluster) are parallel + * so a PMU cannot locate on a SCCL and a SICL. If the SCCL/SICL + * distinction is not relevant, use scl_id instead. + */ + union { + int sccl_id; + int sicl_id; + int scl_id; + }; + int ccl_id; + int index_id; + int sub_id; +}; + /* Generic pmu struct for different pmu types */ struct hisi_pmu { struct pmu pmu; const struct hisi_uncore_ops *ops; const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; + struct hisi_pmu_topology topo; /* * CPUs associated to the PMU and are preferred to use for counting. * Could be empty if PMU has no association (e.g. PMU on SICL), in @@ -97,14 +128,7 @@ struct hisi_pmu { int irq; struct device *dev; struct hlist_node node; - int sccl_id; - int sicl_id; - int ccl_id; void __iomem *base; - /* the ID of the PMU modules */ - u32 index_id; - /* For DDRC PMU v2: each DDRC has more than one DMC */ - u32 sub_id; int num_counters; int counter_bits; /* check event code range */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index a2e09822ac41..7e7a98d57cc2 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -293,19 +293,19 @@ static int hisi_sllc_pmu_init_data(struct platform_device *pdev, * while SCCL_ID is from MPIDR_EL1 by CPU. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &sllc_pmu->sccl_id)) { + &sllc_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Cannot read sccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &sllc_pmu->index_id)) { + &sllc_pmu->topo.index_id)) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } /* SLLC PMUs only share the same SCCL */ - sllc_pmu->ccl_id = -1; + sllc_pmu->topo.ccl_id = -1; sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { @@ -433,8 +433,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u", - sllc_pmu->sccl_id, sllc_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_sllc%d", + sllc_pmu->topo.sccl_id, sllc_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index d90bb8c877b8..b432b79187e3 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -332,19 +332,19 @@ static int hisi_uc_pmu_init_data(struct platform_device *pdev, * They have some CCLs per SCCL and then 4 UC PMU per CCL. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &uc_pmu->sccl_id)) { + &uc_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &uc_pmu->ccl_id)) { + &uc_pmu->topo.ccl_id)) { dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &uc_pmu->sub_id)) { + &uc_pmu->topo.sub_id)) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } @@ -498,8 +498,9 @@ static int hisi_uc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u", - uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%d", + uc_pmu->topo.sccl_id, uc_pmu->topo.ccl_id, + uc_pmu->topo.sub_id); if (!name) return -ENOMEM; -- Gitee From c481eff129c755adf22ed37c2ff5b0f7222080e7 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:56 +0800 Subject: [PATCH 045/173] drivers/perf: hisi: Add a common function to retrieve topology from firmware [Upstream commit 32528b165ea1266ee25afe6a29be0107b3c5e76a] Currently each type of uncore PMU driver uses almost the same routine and the same firmware interface (or properties) to retrieve the topology information, then reset the unused IDs to -1. Extract the common parts to the framework in hisi_uncore_pmu_init_topology(). Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 10 +++---- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 9 ++---- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 10 +++---- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 8 ++--- drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c | 2 ++ .../perf/hisilicon/hisi_uncore_lpddrc_pmu.c | 2 ++ drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 11 +++---- drivers/perf/hisilicon/hisi_uncore_pmu.c | 30 +++++++++++++++++++ drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 + drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 11 +++---- drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 12 ++++---- 11 files changed, 63 insertions(+), 43 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 9cffe153b554..e5108023d17a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -183,20 +183,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match); static int hisi_cpa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *cpa_pmu) { - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &cpa_pmu->topo.sicl_id)) { + hisi_uncore_pmu_init_topology(cpa_pmu, &pdev->dev); + + if (cpa_pmu->topo.sicl_id < 0) { dev_err(&pdev->dev, "Can not read sicl-id\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &cpa_pmu->topo.index_id)) { + if (cpa_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id\n"); return -EINVAL; } - cpa_pmu->topo.ccl_id = -1; - cpa_pmu->topo.sccl_id = -1; cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cpa_pmu->base)) return PTR_ERR(cpa_pmu->base); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index e649333232fe..629e04fb6ac8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -298,6 +298,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { struct resource *res; + hisi_uncore_pmu_init_topology(ddrc_pmu, &pdev->dev); /* * Use the SCCL_ID and DDRC channel ID to identify the @@ -309,13 +310,10 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &ddrc_pmu->topo.sccl_id)) { + if (ddrc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n"); return -EINVAL; } - /* DDRC PMUs only share the same SCCL */ - ddrc_pmu->topo.ccl_id = -1; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ddrc_pmu->base = devm_ioremap_resource(&pdev->dev, res); @@ -326,8 +324,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); if (ddrc_pmu->identifier >= HISI_PMU_V2) { - if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &ddrc_pmu->topo.sub_id)) { + if (ddrc_pmu->topo.sub_id < 0) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 61272f99ab80..418432856234 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -296,12 +296,13 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, struct resource *res; acpi_status status; + hisi_uncore_pmu_init_topology(hha_pmu, &pdev->dev); + /* * Use SCCL_ID and UID to identify the HHA PMU, while * SCCL_ID is in MPIDR[aff2]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &hha_pmu->topo.sccl_id)) { + if (hha_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read hha sccl-id!\n"); return -EINVAL; } @@ -310,8 +311,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * Early versions of BIOS support _UID by mistake, so we support * both "hisilicon, idx-id" as preference, if available. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &hha_pmu->topo.index_id)) { + if (hha_pmu->topo.index_id < 0) { status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), "_UID", NULL, &id); if (ACPI_FAILURE(status)) { @@ -321,8 +321,6 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, hha_pmu->topo.index_id = id; } - /* HHA PMUs only share the same SCCL */ - hha_pmu->topo.ccl_id = -1; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); hha_pmu->base = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 490a0a764297..f13e6e78550e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -356,18 +356,18 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { struct resource *res; + hisi_uncore_pmu_init_topology(l3c_pmu, &pdev->dev); + /* * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &l3c_pmu->topo.sccl_id)) { + if (l3c_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read l3c sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &l3c_pmu->topo.ccl_id)) { + if (l3c_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read l3c ccl-id!\n"); return -EINVAL; } diff --git a/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c index 669010383394..31b0c5028e35 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3t_pmu.c @@ -176,6 +176,8 @@ static const struct of_device_id l3t_of_match[] = { static int hisi_l3t_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3t_pmu) { + hisi_uncore_pmu_init_topology(l3t_pmu, &pdev->dev); + /* * Use the SCCL_ID and CCL_ID to identify the L3T PMU, while * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. diff --git a/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c index 8157d888d3b8..d16b4d22d714 100644 --- a/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_lpddrc_pmu.c @@ -192,6 +192,8 @@ static const struct of_device_id lpddrc_of_match[] = { static int hisi_lpddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *lpddrc_pmu) { + hisi_uncore_pmu_init_topology(lpddrc_pmu, &pdev->dev); + /* * Use the SCCL_ID and LPDDRC channel ID to identify the * LPDDRC PMU, while SCCL_ID is in MPIDR[aff2]. diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index ecafdad342ef..24bb5347c05a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -269,25 +269,22 @@ static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) static int hisi_pa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *pa_pmu) { + hisi_uncore_pmu_init_topology(pa_pmu, &pdev->dev); + /* * As PA PMU is in a SICL, use the SICL_ID and the index ID * to identify the PA PMU. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &pa_pmu->topo.sicl_id)) { + if (pa_pmu->topo.sicl_id < 0) { dev_err(&pdev->dev, "Cannot read sicl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &pa_pmu->topo.index_id)) { + if (pa_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - pa_pmu->topo.ccl_id = -1; - pa_pmu->topo.sccl_id = -1; - pa_pmu->dev_info = device_get_match_data(&pdev->dev); if (!pa_pmu->dev_info) return -ENODEV; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index ef82fdf09225..41ca075ce8c2 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -550,6 +551,35 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, "HISI_PMU"); +/* + * Retrieve the topology information from the firmware for the hisi_pmu device. + * The topology ID will be -1 if we cannot initialize it, it may either due to + * the PMU doesn't locate on this certain topology or the firmware needs to be + * fixed. + */ +void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev) +{ + struct hisi_pmu_topology *topo = &hisi_pmu->topo; + + topo->sccl_id = -1; + topo->ccl_id = -1; + topo->index_id = -1; + topo->sub_id = -1; + + if (device_property_read_u32(dev, "hisilicon,scl-id", &topo->sccl_id)) + dev_dbg(dev, "no scl-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,ccl-id", &topo->ccl_id)) + dev_dbg(dev, "no ccl-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,idx-id", &topo->index_id)) + dev_dbg(dev, "no idx-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,sub-id", &topo->sub_id)) + dev_dbg(dev, "no sub-id present\n"); +} +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, "HISI_PMU"); + void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { struct pmu *pmu = &hisi_pmu->pmu; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 2c28a4c490f5..52c635db6fc5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -161,6 +161,7 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, char *page); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); +void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 7e7a98d57cc2..a51df13f7f74 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -288,25 +288,22 @@ MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); static int hisi_sllc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *sllc_pmu) { + hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev); + /* * Use the SCCL_ID and the index ID to identify the SLLC PMU, * while SCCL_ID is from MPIDR_EL1 by CPU. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &sllc_pmu->topo.sccl_id)) { + if (sllc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Cannot read sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &sllc_pmu->topo.index_id)) { + if (sllc_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - /* SLLC PMUs only share the same SCCL */ - sllc_pmu->topo.ccl_id = -1; - sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n"); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index b432b79187e3..c0934eeec4bd 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "hisi_uncore_pmu.h" @@ -326,25 +325,24 @@ static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx) static int hisi_uc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *uc_pmu) { + hisi_uncore_pmu_init_topology(uc_pmu, &pdev->dev); + /* * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to * identify the topology information of UC PMU devices in the chip. * They have some CCLs per SCCL and then 4 UC PMU per CCL. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &uc_pmu->topo.sccl_id)) { + if (uc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &uc_pmu->topo.ccl_id)) { + if (uc_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &uc_pmu->topo.sub_id)) { + if (uc_pmu->topo.sub_id < 0) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } -- Gitee From 7e9be8af9385b1cdf8d65561c37d5263a3f01559 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:57 +0800 Subject: [PATCH 046/173] drivers/perf: hisi: Provide a generic implementation of cpumask/identifier [Upstream commit 8688c01e313d542124fae82e82c8d6d5c073899f] Each type of HiSilicon Uncore PMU has the following sysfs attributes: - format: bitmask in perf_event_attr::config[012] of corresponding attribute - event: events name and corresponding event code - cpumask: range of CPUs the events can be opened on - identifier: the version of this PMU Different types of PMU have different implementations of the "format" and "event" but all share the same implementation of the "cpumask" and "identifier". Thus we can move cpumask and identifier to the hisi_uncore_pmu framework and drivers can use the generic implementation. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 29 +------ drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 33 ++------ drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 33 ++------ drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 33 ++------ drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 37 ++------- drivers/perf/hisilicon/hisi_uncore_pmu.c | 83 ++++++++++++------- drivers/perf/hisilicon/hisi_uncore_pmu.h | 4 + drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 29 +------ drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 29 +------ 9 files changed, 89 insertions(+), 221 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index e5108023d17a..cca85d2f746d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -228,34 +228,11 @@ static const struct attribute_group hisi_cpa_pmu_events_group = { .attrs = hisi_cpa_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = { - .attrs = hisi_cpa_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_cpa_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_cpa_pmu_identifier_attrs[] = { - &hisi_cpa_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_cpa_pmu_identifier_group = { - .attrs = hisi_cpa_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = { &hisi_cpa_pmu_format_group, &hisi_cpa_pmu_events_group, - &hisi_cpa_pmu_cpumask_attr_group, - &hisi_cpa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -389,7 +366,7 @@ static void __exit hisi_cpa_pmu_module_exit(void) } module_exit(hisi_cpa_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Qi Liu "); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 629e04fb6ac8..3ea21d8f6971 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -382,42 +382,19 @@ static const struct attribute_group hisi_ddrc_pmu_v2_events_group = { .attrs = hisi_ddrc_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = { - .attrs = hisi_ddrc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_ddrc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = { - &hisi_ddrc_pmu_identifier_attr.attr, - NULL -}; - -static struct attribute_group hisi_ddrc_pmu_identifier_group = { - .attrs = hisi_ddrc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = { &hisi_ddrc_pmu_v1_format_group, &hisi_ddrc_pmu_v1_events_group, - &hisi_ddrc_pmu_cpumask_attr_group, - &hisi_ddrc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = { &hisi_ddrc_pmu_v2_format_group, &hisi_ddrc_pmu_v2_events_group, - &hisi_ddrc_pmu_cpumask_attr_group, - &hisi_ddrc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -584,7 +561,7 @@ static void __exit hisi_ddrc_pmu_module_exit(void) } module_exit(hisi_ddrc_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 418432856234..af71f2f3082e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -407,42 +407,19 @@ static const struct attribute_group hisi_hha_pmu_v2_events_group = { .attrs = hisi_hha_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_hha_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = { - .attrs = hisi_hha_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_hha_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_hha_pmu_identifier_attrs[] = { - &hisi_hha_pmu_identifier_attr.attr, - NULL -}; - -static struct attribute_group hisi_hha_pmu_identifier_group = { - .attrs = hisi_hha_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = { &hisi_hha_pmu_v1_format_group, &hisi_hha_pmu_v1_events_group, - &hisi_hha_pmu_cpumask_attr_group, - &hisi_hha_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_hha_pmu_v2_attr_groups[] = { &hisi_hha_pmu_v2_format_group, &hisi_hha_pmu_v2_events_group, - &hisi_hha_pmu_cpumask_attr_group, - &hisi_hha_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -585,7 +562,7 @@ static void __exit hisi_hha_pmu_module_exit(void) } module_exit(hisi_hha_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index f13e6e78550e..5fc9045c31f1 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -443,42 +443,19 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = { .attrs = hisi_l3c_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = { - .attrs = hisi_l3c_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_l3c_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_l3c_pmu_identifier_attrs[] = { - &hisi_l3c_pmu_identifier_attr.attr, - NULL -}; - -static struct attribute_group hisi_l3c_pmu_identifier_group = { - .attrs = hisi_l3c_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { &hisi_l3c_pmu_v1_format_group, &hisi_l3c_pmu_v1_events_group, - &hisi_l3c_pmu_cpumask_attr_group, - &hisi_l3c_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { &hisi_l3c_pmu_v2_format_group, &hisi_l3c_pmu_v2_events_group, - &hisi_l3c_pmu_cpumask_attr_group, - &hisi_l3c_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -625,7 +602,7 @@ static void __exit hisi_l3c_pmu_module_exit(void) } module_exit(hisi_l3c_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Anurup M "); diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 24bb5347c05a..c868a3169990 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -353,29 +353,6 @@ static const struct attribute_group hisi_h60pa_pmu_events_group = { .attrs = hisi_h60pa_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = { - .attrs = hisi_pa_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_pa_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_pa_pmu_identifier_attrs[] = { - &hisi_pa_pmu_identifier_attr.attr, - NULL -}; - -static struct attribute_group hisi_pa_pmu_identifier_group = { - .attrs = hisi_pa_pmu_identifier_attrs, -}; - static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { .mask_offset = PA_INT_MASK, .clear_offset = PA_INT_CLEAR, @@ -385,8 +362,8 @@ static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v2_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -399,8 +376,8 @@ static const struct hisi_pmu_dev_info hisi_h32pa_v2 = { static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v3_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -419,8 +396,8 @@ static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = { static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_h60pa_pmu_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -570,7 +547,7 @@ static void __exit hisi_pa_pmu_module_exit(void) } module_exit(hisi_pa_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 41ca075ce8c2..42021823f482 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -35,7 +35,7 @@ ssize_t hisi_format_sysfs_show(struct device *dev, return sprintf(buf, "%s\n", (char *)eattr->var); } -EXPORT_SYMBOL_GPL(hisi_format_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_format_sysfs_show, HISI_PMU); /* * PMU event attributes @@ -49,7 +49,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev, return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var); } -EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, HISI_PMU); /* * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show @@ -61,7 +61,42 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, return sprintf(buf, "%d\n", hisi_pmu->on_cpu); } -EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, HISI_PMU); + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +const struct attribute_group hisi_pmu_cpumask_attr_group = { + .attrs = hisi_pmu_cpumask_attrs, +}; +EXPORT_SYMBOL_NS_GPL(hisi_pmu_cpumask_attr_group, HISI_PMU); + +ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); +} +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, HISI_PMU); + +static struct device_attribute hisi_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_pmu_identifier_attrs[] = { + &hisi_pmu_identifier_attr.attr, + NULL +}; + +const struct attribute_group hisi_pmu_identifier_group = { + .attrs = hisi_pmu_identifier_attrs, +}; +EXPORT_SYMBOL_NS_GPL(hisi_pmu_identifier_group, HISI_PMU); static bool hisi_validate_event_group(struct perf_event *event) { @@ -111,17 +146,7 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) return idx; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, "HISI_PMU"); - -ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, - struct device_attribute *attr, - char *page) -{ - struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); - - return snprintf(page, PAGE_SIZE, "0x%08x\n", hisi_pmu->identifier); -} -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, HISI_PMU); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { @@ -180,7 +205,7 @@ int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, return 0; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, HISI_PMU); int hisi_uncore_pmu_event_init(struct perf_event *event) { @@ -234,7 +259,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return 0; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, HISI_PMU); /* * Set the counter to count the event that we're interested in, @@ -288,7 +313,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event) /* Write start value to the hardware event counter */ hisi_pmu->ops->write_counter(hisi_pmu, hwc, val); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, HISI_PMU); void hisi_uncore_pmu_event_update(struct perf_event *event) { @@ -309,7 +334,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event) HISI_MAX_PERIOD(hisi_pmu->counter_bits); local64_add(delta, &event->count); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, HISI_PMU); void hisi_uncore_pmu_start(struct perf_event *event, int flags) { @@ -332,7 +357,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags) hisi_uncore_pmu_enable_event(event); perf_event_update_userpage(event); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, HISI_PMU); void hisi_uncore_pmu_stop(struct perf_event *event, int flags) { @@ -349,7 +374,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags) hisi_uncore_pmu_event_update(event); hwc->state |= PERF_HES_UPTODATE; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, HISI_PMU); int hisi_uncore_pmu_add(struct perf_event *event, int flags) { @@ -372,7 +397,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags) return 0; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, HISI_PMU); void hisi_uncore_pmu_del(struct perf_event *event, int flags) { @@ -384,14 +409,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, HISI_PMU); void hisi_uncore_pmu_read(struct perf_event *event) { /* Read hardware counter and update the perf counter statistics */ hisi_uncore_pmu_event_update(event); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, HISI_PMU); void hisi_uncore_pmu_enable(struct pmu *pmu) { @@ -404,7 +429,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu) hisi_pmu->ops->start_counters(hisi_pmu); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, HISI_PMU); void hisi_uncore_pmu_disable(struct pmu *pmu) { @@ -412,7 +437,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) hisi_pmu->ops->stop_counters(hisi_pmu); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, HISI_PMU); /* @@ -515,7 +540,7 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, HISI_PMU); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { @@ -549,7 +574,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, HISI_PMU); /* * Retrieve the topology information from the firmware for the hisi_pmu device. @@ -578,7 +603,7 @@ void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev if (device_property_read_u32(dev, "hisilicon,sub-id", &topo->sub_id)) dev_dbg(dev, "no sub-id present\n"); } -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, HISI_PMU); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { @@ -597,6 +622,6 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) pmu->attr_groups = hisi_pmu->pmu_events.attr_groups; pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } -EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, "HISI_PMU"); +EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, HISI_PMU); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 52c635db6fc5..4019869abdc4 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -136,6 +136,10 @@ struct hisi_pmu { u32 identifier; }; +/* Generic implementation of cpumask/identifier group */ +extern const struct attribute_group hisi_pmu_cpumask_attr_group; +extern const struct attribute_group hisi_pmu_identifier_group; + int hisi_uncore_pmu_get_event_idx(struct perf_event *event); void hisi_uncore_pmu_read(struct perf_event *event); int hisi_uncore_pmu_add(struct perf_event *event, int flags); diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index a51df13f7f74..44f4ff42cb2c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -344,34 +344,11 @@ static const struct attribute_group hisi_sllc_pmu_v2_events_group = { .attrs = hisi_sllc_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = { - .attrs = hisi_sllc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_sllc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_sllc_pmu_identifier_attrs[] = { - &hisi_sllc_pmu_identifier_attr.attr, - NULL -}; - -static struct attribute_group hisi_sllc_pmu_identifier_group = { - .attrs = hisi_sllc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { &hisi_sllc_pmu_v2_format_group, &hisi_sllc_pmu_v2_events_group, - &hisi_sllc_pmu_cpumask_attr_group, - &hisi_sllc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -508,7 +485,7 @@ static void __exit hisi_sllc_pmu_module_exit(void) } module_exit(hisi_sllc_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index c0934eeec4bd..81046f3de38d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -397,34 +397,11 @@ static const struct attribute_group hisi_uc_pmu_events_group = { .attrs = hisi_uc_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_uc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = { - .attrs = hisi_uc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_uc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_uc_pmu_identifier_attrs[] = { - &hisi_uc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_uc_pmu_identifier_group = { - .attrs = hisi_uc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_uc_pmu_attr_groups[] = { &hisi_uc_pmu_format_group, &hisi_uc_pmu_events_group, - &hisi_uc_pmu_cpumask_attr_group, - &hisi_uc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -572,7 +549,7 @@ static void __exit hisi_uc_pmu_module_exit(void) } module_exit(hisi_uc_pmu_module_exit); -MODULE_IMPORT_NS("HISI_PMU"); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Junhao He "); -- Gitee From 60217ee56ef9304e063ec84545870d0d650105af Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:58 +0800 Subject: [PATCH 047/173] drivers/perf: hisi: Export associated CPUs of each PMU through sysfs [Upstream commit 3b051bb7cb4344d12b9b9b4974c77706462d4246] Although the event of the uncore PMU can only be opened on a single CPU, some PMU does have the affinity on a range of CPUs. For example the L3C PMU is associated to the CPUs sharing the L3T it monitors. Users may infer this affinity by the PMU name which may have SCCL ID and CCL ID encoded (for L3C etc), but it's not that straightforward. So export this information by adding an "associated_cpus" sysfs attribute then user can get this directly. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- Documentation/admin-guide/perf/hisi-pmu.rst | 5 ++++- drivers/perf/hisilicon/hisi_uncore_pmu.c | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index 404a5c3d9d00..3532d659f62a 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -36,7 +36,10 @@ e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in SCCL ID #1. The driver also provides a "cpumask" sysfs attribute, which shows the CPU core -ID used to count the uncore PMU event. +ID used to count the uncore PMU event. An "associated_cpus" sysfs attribute is +also provided to show the CPUs associated with this PMU. The "cpumask" indicates +the CPUs to open the events, usually as a hint for userspaces tools like perf. +It only contains one associated CPU from the "associated_cpus". Example usage of perf:: diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 42021823f482..8a70d180c27c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -65,8 +65,18 @@ EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, HISI_PMU); static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); +static ssize_t hisi_associated_cpus_sysfs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, &hisi_pmu->associated_cpus); +} +static DEVICE_ATTR(associated_cpus, 0444, hisi_associated_cpus_sysfs_show, NULL); + static struct attribute *hisi_pmu_cpumask_attrs[] = { &dev_attr_cpumask.attr, + &dev_attr_associated_cpus.attr, NULL }; -- Gitee From 672ffc02d366ebbd49065e3a0269fa8f5199c018 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:13:59 +0800 Subject: [PATCH 048/173] drivers/perf: hisi: Fix incorrect variable name "hha_pmu" in DDRC PMU driver [Upstream commit 4e15bcffa19acf15b6acb2cb3f4a1dd923ee4708] In the callback function write_evtype(), the variable name of struct hisi_pmu should be "ddrc_pmu" instead of "hha_pmu". Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 3ea21d8f6971..25c8554eb7b9 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -111,14 +111,14 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, * so there is no need to write event type, while it is programmable counter in * PMU v2. */ -static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, +static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx, u32 type) { u32 offset; - if (hha_pmu->identifier >= HISI_PMU_V2) { + if (ddrc_pmu->identifier >= HISI_PMU_V2) { offset = DDRC_V2_EVENT_TYPE + 4 * idx; - writel(type, hha_pmu->base + offset); + writel(type, ddrc_pmu->base + offset); } } -- Gitee From fedd122f2ac75a5e626275db906a221e2246673c Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:00 +0800 Subject: [PATCH 049/173] drivers/perf: hisi: Delete redundant blank line of DDRC PMU [Upstream commit f03241fbebdf47b9b435752f7e72d3f1e96e4529] Do not add blank line at the end of a code block defined by braces. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 25c8554eb7b9..7867bd870968 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -557,7 +557,6 @@ static void __exit hisi_ddrc_pmu_module_exit(void) { platform_driver_unregister(&hisi_ddrc_pmu_driver); cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE); - } module_exit(hisi_ddrc_pmu_module_exit); -- Gitee From ef673a1e6649cb0721bb4d5def3f9a27e2a25160 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 5 Dec 2024 11:07:16 +0800 Subject: [PATCH 050/173] drivers/perf: hisi: Add support for HiSilicon DDRC v3 PMU driver [Upstream commit 17aa34e86936d0dba4e7c05c55ffc3e12c0ccec9] On HiSilicon HIP10C platform, the DDRC PMU is a v3 PMU. And only the offset of it's interrupt registers offset quite a different tune DDRC v2 PMUs. The control registeres and events code are the same as v2 PMU. Their modification was unexpected, which caused the interrupt handler to not handle counter overflows correctly (HiSilicon Erratum 162400501). The before DDRC PMU driver will probe v3 as v2. Therefore DDRC v3 interrupt handler cannot work properly. We fixed that by adding new ID HISI0235 to correct the DDRC v3 PMU interrupt register offset (including mask/status/clear registers). Signed-off-by: Junhao He Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 146 ++++++++++-------- 1 file changed, 85 insertions(+), 61 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 7867bd870968..48df744eed9c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -43,6 +43,11 @@ #define DDRC_V2_EVENT_TYPE 0xe74 #define DDRC_V2_PERF_CTRL 0xeA0 +/* DDRC interrupt registers definition in v3 */ +#define DDRC_V3_INT_MASK 0x534 +#define DDRC_V3_INT_STATUS 0x538 +#define DDRC_V3_INT_CLEAR 0x53C + /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 #define DDRC_V1_PERF_CTRL_EN 0x2 @@ -63,6 +68,12 @@ static const u32 ddrc_reg_off[] = { DDRC_PRE_CMD, DDRC_ACT_CMD, DDRC_RNK_CHG, DDRC_RW_CHG }; +struct hisi_ddrc_pmu_regs { + u32 int_mask; + u32 int_clear; + u32 int_status; +}; + /* * Select the counter register offset using the counter index. * In PMU v1, there are no programmable counter, the count @@ -223,77 +234,43 @@ static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); } -static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Write 0 to enable interrupt */ - val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_INT_MASK); -} - -static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Write 1 to mask interrupt */ - val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_INT_MASK); -} - -static void hisi_ddrc_pmu_v2_enable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); + val = readl(ddrc_pmu->base + regs->int_mask); val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); + writel(val, ddrc_pmu->base + regs->int_mask); } -static void hisi_ddrc_pmu_v2_disable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); + val = readl(ddrc_pmu->base + regs->int_mask); val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); + writel(val, ddrc_pmu->base + regs->int_mask); } -static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu) +static u32 hisi_ddrc_pmu_get_int_status(struct hisi_pmu *ddrc_pmu) { - return readl(ddrc_pmu->base + DDRC_INT_STATUS); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu, - int idx) -{ - writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR); + return readl(ddrc_pmu->base + regs->int_status); } -static u32 hisi_ddrc_pmu_v2_get_int_status(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu, + int idx) { - return readl(ddrc_pmu->base + DDRC_V2_INT_STATUS); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v2_clear_int_status(struct hisi_pmu *ddrc_pmu, - int idx) -{ - writel(1 << idx, ddrc_pmu->base + DDRC_V2_INT_CLEAR); + writel(1 << idx, ddrc_pmu->base + regs->int_clear); } -static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { - { "HISI0233", }, - { "HISI0234", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); - static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { @@ -315,6 +292,10 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, return -EINVAL; } + ddrc_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!ddrc_pmu->dev_info) + return -ENODEV; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ddrc_pmu->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(ddrc_pmu->base)) { @@ -405,12 +386,12 @@ static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { .stop_counters = hisi_ddrc_pmu_v1_stop_counters, .enable_counter = hisi_ddrc_pmu_v1_enable_counter, .disable_counter = hisi_ddrc_pmu_v1_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_v1_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_v1_disable_counter_int, + .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, .write_counter = hisi_ddrc_pmu_v1_write_counter, .read_counter = hisi_ddrc_pmu_v1_read_counter, - .get_int_status = hisi_ddrc_pmu_v1_get_int_status, - .clear_int_status = hisi_ddrc_pmu_v1_clear_int_status, + .get_int_status = hisi_ddrc_pmu_get_int_status, + .clear_int_status = hisi_ddrc_pmu_clear_int_status, }; static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = { @@ -420,12 +401,12 @@ static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = { .stop_counters = hisi_ddrc_pmu_v2_stop_counters, .enable_counter = hisi_ddrc_pmu_v2_enable_counter, .disable_counter = hisi_ddrc_pmu_v2_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_v2_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_v2_disable_counter_int, + .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, .write_counter = hisi_ddrc_pmu_v2_write_counter, .read_counter = hisi_ddrc_pmu_v2_read_counter, - .get_int_status = hisi_ddrc_pmu_v2_get_int_status, - .clear_int_status = hisi_ddrc_pmu_v2_clear_int_status, + .get_int_status = hisi_ddrc_pmu_get_int_status, + .clear_int_status = hisi_ddrc_pmu_clear_int_status, }; static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, @@ -444,15 +425,14 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, if (ddrc_pmu->identifier >= HISI_PMU_V2) { ddrc_pmu->counter_bits = 48; ddrc_pmu->check_event = DDRC_V2_NR_EVENTS; - ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v2_attr_groups; ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops; } else { ddrc_pmu->counter_bits = 32; ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; - ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v1_attr_groups; ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; } + ddrc_pmu->pmu_events.attr_groups = ddrc_pmu->dev_info->attr_groups; ddrc_pmu->num_counters = DDRC_NR_COUNTERS; ddrc_pmu->dev = &pdev->dev; ddrc_pmu->on_cpu = -1; @@ -522,6 +502,50 @@ static int hisi_ddrc_pmu_remove(struct platform_device *pdev) return 0; } +static struct hisi_ddrc_pmu_regs hisi_ddrc_v1_pmu_regs = { + .int_mask = DDRC_INT_MASK, + .int_clear = DDRC_INT_CLEAR, + .int_status = DDRC_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v1 = { + .name = "ddrc", + .attr_groups = hisi_ddrc_pmu_v1_attr_groups, + .private = &hisi_ddrc_v1_pmu_regs, +}; + +static struct hisi_ddrc_pmu_regs hisi_ddrc_v2_pmu_regs = { + .int_mask = DDRC_V2_INT_MASK, + .int_clear = DDRC_V2_INT_CLEAR, + .int_status = DDRC_V2_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v2 = { + .name = "ddrc", + .attr_groups = hisi_ddrc_pmu_v2_attr_groups, + .private = &hisi_ddrc_v2_pmu_regs, +}; + +static struct hisi_ddrc_pmu_regs hisi_ddrc_v3_pmu_regs = { + .int_mask = DDRC_V3_INT_MASK, + .int_clear = DDRC_V3_INT_CLEAR, + .int_status = DDRC_V3_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v3 = { + .name = "ddrc", + .attr_groups = hisi_ddrc_pmu_v2_attr_groups, + .private = &hisi_ddrc_v3_pmu_regs, +}; + +static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { + { "HISI0233", (kernel_ulong_t)&hisi_ddrc_v1}, + { "HISI0234", (kernel_ulong_t)&hisi_ddrc_v2}, + { "HISI0235", (kernel_ulong_t)&hisi_ddrc_v3}, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); + static struct platform_driver hisi_ddrc_pmu_driver = { .driver = { .name = "hisi_ddrc_pmu", -- Gitee From 5f54888191e7fae9cc612eb89d4f83a47e4c9406 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:01 +0800 Subject: [PATCH 051/173] drivers/perf: hisi: Simplify the probe process for each DDRC version [Upstream commit dc86791ff68c38a2954c3bf2c444b6d6d9da52f3] Version 1 and 2 of DDRC PMU also use different HID. Make use of struct acpi_device_id::driver_data for version specific information rather than judge the version register. This will help to simplify the probe process and also a bit easier for extension. In order to support this extend struct hisi_pmu_dev_info for version specific counter bits and event range. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 232 +++++++----------- drivers/perf/hisilicon/hisi_uncore_pmu.h | 2 + 2 files changed, 84 insertions(+), 150 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 48df744eed9c..00f635a9cb73 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -43,11 +43,6 @@ #define DDRC_V2_EVENT_TYPE 0xe74 #define DDRC_V2_PERF_CTRL 0xeA0 -/* DDRC interrupt registers definition in v3 */ -#define DDRC_V3_INT_MASK 0x534 -#define DDRC_V3_INT_STATUS 0x538 -#define DDRC_V3_INT_CLEAR 0x53C - /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 #define DDRC_V1_PERF_CTRL_EN 0x2 @@ -55,6 +50,10 @@ #define DDRC_V1_NR_EVENTS 0x7 #define DDRC_V2_NR_EVENTS 0x90 +#define DDRC_EVENT_CNTn(base, n) ((base) + (n) * 8) +#define DDRC_EVENT_TYPEn(base, n) ((base) + (n) * 4) +#define DDRC_UNIMPLEMENTED_REG GENMASK(31, 0) + /* * For PMU v1, there are eight-events and every event has been mapped * to fixed-purpose counters which register offset is not consistent. @@ -69,52 +68,36 @@ static const u32 ddrc_reg_off[] = { }; struct hisi_ddrc_pmu_regs { + u32 event_cnt; + u32 event_ctrl; + u32 event_type; + u32 perf_ctrl; + u32 perf_ctrl_en; u32 int_mask; u32 int_clear; u32 int_status; }; -/* - * Select the counter register offset using the counter index. - * In PMU v1, there are no programmable counter, the count - * is read form the statistics counter register itself. - */ -static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx) +static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { - return ddrc_reg_off[cntr_idx]; -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static u32 hisi_ddrc_pmu_v2_get_counter_offset(int cntr_idx) -{ - return DDRC_V2_EVENT_CNT + cntr_idx * 8; -} + if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG) + return readl(ddrc_pmu->base + ddrc_reg_off[hwc->idx]); -static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - return readl(ddrc_pmu->base + - hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); + return readq(ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx)); } -static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc, u64 val) { - writel((u32)val, - ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); -} - -static u64 hisi_ddrc_pmu_v2_read_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - return readq(ddrc_pmu->base + - hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc, u64 val) -{ - writeq(val, - ddrc_pmu->base + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); + if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG) + writel((u32)val, ddrc_pmu->base + ddrc_reg_off[hwc->idx]); + else + writeq(val, ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx)); } /* @@ -125,54 +108,12 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx, u32 type) { - u32 offset; - - if (ddrc_pmu->identifier >= HISI_PMU_V2) { - offset = DDRC_V2_EVENT_TYPE + 4 * idx; - writel(type, ddrc_pmu->base + offset); - } -} - -static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu) -{ - u32 val; - - /* Set perf_enable in DDRC_PERF_CTRL to start event counting */ - val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val |= DDRC_V1_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); -} - -static void hisi_ddrc_pmu_v1_stop_counters(struct hisi_pmu *ddrc_pmu) -{ - u32 val; - - /* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */ - val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val &= ~DDRC_V1_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); -} - -static void hisi_ddrc_pmu_v1_enable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; - /* Set counter index(event code) in DDRC_EVENT_CTRL register */ - val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL); - val |= (1 << GET_DDRC_EVENTID(hwc)); - writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); -} + if (regs->event_type == DDRC_UNIMPLEMENTED_REG) + return; -static void hisi_ddrc_pmu_v1_disable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Clear counter index(event code) in DDRC_EVENT_CTRL register */ - val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL); - val &= ~(1 << GET_DDRC_EVENTID(hwc)); - writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); + writel(type, ddrc_pmu->base + DDRC_EVENT_TYPEn(regs->event_type, idx)); } static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) @@ -191,47 +132,58 @@ static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) return idx; } -static int hisi_ddrc_pmu_v2_get_event_idx(struct perf_event *event) +static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event) { + struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu); + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; + + if (regs->event_type == DDRC_UNIMPLEMENTED_REG) + return hisi_ddrc_pmu_v1_get_event_idx(event); + return hisi_uncore_pmu_get_event_idx(event); } -static void hisi_ddrc_pmu_v2_start_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); - val |= DDRC_V2_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val = readl(ddrc_pmu->base + regs->perf_ctrl); + val |= regs->perf_ctrl_en; + writel(val, ddrc_pmu->base + regs->perf_ctrl); } -static void hisi_ddrc_pmu_v2_stop_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); - val &= ~DDRC_V2_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val = readl(ddrc_pmu->base + regs->perf_ctrl); + val &= ~regs->perf_ctrl_en; + writel(val, ddrc_pmu->base + regs->perf_ctrl); } -static void hisi_ddrc_pmu_v2_enable_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); - val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); + val = readl(ddrc_pmu->base + regs->event_ctrl); + val |= BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->event_ctrl); } -static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); - val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); + + val = readl(ddrc_pmu->base + regs->event_ctrl); + val &= ~BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->event_ctrl); } static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, @@ -241,7 +193,7 @@ static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, u32 val; val = readl(ddrc_pmu->base + regs->int_mask); - val &= ~(1 << hwc->idx); + val &= ~BIT_ULL(hwc->idx); writel(val, ddrc_pmu->base + regs->int_mask); } @@ -251,8 +203,9 @@ static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; + val = readl(ddrc_pmu->base + regs->int_mask); - val |= 1 << hwc->idx; + val |= BIT_ULL(hwc->idx); writel(val, ddrc_pmu->base + regs->int_mask); } @@ -379,32 +332,17 @@ static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = { NULL }; -static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { - .write_evtype = hisi_ddrc_pmu_write_evtype, - .get_event_idx = hisi_ddrc_pmu_v1_get_event_idx, - .start_counters = hisi_ddrc_pmu_v1_start_counters, - .stop_counters = hisi_ddrc_pmu_v1_stop_counters, - .enable_counter = hisi_ddrc_pmu_v1_enable_counter, - .disable_counter = hisi_ddrc_pmu_v1_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, - .write_counter = hisi_ddrc_pmu_v1_write_counter, - .read_counter = hisi_ddrc_pmu_v1_read_counter, - .get_int_status = hisi_ddrc_pmu_get_int_status, - .clear_int_status = hisi_ddrc_pmu_clear_int_status, -}; - -static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = { +static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = { .write_evtype = hisi_ddrc_pmu_write_evtype, - .get_event_idx = hisi_ddrc_pmu_v2_get_event_idx, - .start_counters = hisi_ddrc_pmu_v2_start_counters, - .stop_counters = hisi_ddrc_pmu_v2_stop_counters, - .enable_counter = hisi_ddrc_pmu_v2_enable_counter, - .disable_counter = hisi_ddrc_pmu_v2_disable_counter, + .get_event_idx = hisi_ddrc_pmu_get_event_idx, + .start_counters = hisi_ddrc_pmu_start_counters, + .stop_counters = hisi_ddrc_pmu_stop_counters, + .enable_counter = hisi_ddrc_pmu_enable_counter, + .disable_counter = hisi_ddrc_pmu_disable_counter, .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, - .write_counter = hisi_ddrc_pmu_v2_write_counter, - .read_counter = hisi_ddrc_pmu_v2_read_counter, + .write_counter = hisi_ddrc_pmu_write_counter, + .read_counter = hisi_ddrc_pmu_read_counter, .get_int_status = hisi_ddrc_pmu_get_int_status, .clear_int_status = hisi_ddrc_pmu_clear_int_status, }; @@ -422,15 +360,10 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - if (ddrc_pmu->identifier >= HISI_PMU_V2) { - ddrc_pmu->counter_bits = 48; - ddrc_pmu->check_event = DDRC_V2_NR_EVENTS; - ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops; - } else { - ddrc_pmu->counter_bits = 32; - ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; - ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; - } + ddrc_pmu->pmu_events.attr_groups = ddrc_pmu->dev_info->attr_groups; + ddrc_pmu->counter_bits = ddrc_pmu->dev_info->counter_bits; + ddrc_pmu->check_event = ddrc_pmu->dev_info->check_event; + ddrc_pmu->ops = &hisi_uncore_ddrc_ops; ddrc_pmu->pmu_events.attr_groups = ddrc_pmu->dev_info->attr_groups; ddrc_pmu->num_counters = DDRC_NR_COUNTERS; @@ -503,45 +436,44 @@ static int hisi_ddrc_pmu_remove(struct platform_device *pdev) } static struct hisi_ddrc_pmu_regs hisi_ddrc_v1_pmu_regs = { + .event_cnt = DDRC_UNIMPLEMENTED_REG, + .event_ctrl = DDRC_EVENT_CTRL, + .event_type = DDRC_UNIMPLEMENTED_REG, + .perf_ctrl = DDRC_PERF_CTRL, + .perf_ctrl_en = DDRC_V1_PERF_CTRL_EN, .int_mask = DDRC_INT_MASK, .int_clear = DDRC_INT_CLEAR, .int_status = DDRC_INT_STATUS, }; static const struct hisi_pmu_dev_info hisi_ddrc_v1 = { - .name = "ddrc", + .counter_bits = 32, + .check_event = DDRC_V1_NR_EVENTS, .attr_groups = hisi_ddrc_pmu_v1_attr_groups, .private = &hisi_ddrc_v1_pmu_regs, }; static struct hisi_ddrc_pmu_regs hisi_ddrc_v2_pmu_regs = { + .event_cnt = DDRC_V2_EVENT_CNT, + .event_ctrl = DDRC_V2_EVENT_CTRL, + .event_type = DDRC_V2_EVENT_TYPE, + .perf_ctrl = DDRC_V2_PERF_CTRL, + .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN, .int_mask = DDRC_V2_INT_MASK, .int_clear = DDRC_V2_INT_CLEAR, .int_status = DDRC_V2_INT_STATUS, }; static const struct hisi_pmu_dev_info hisi_ddrc_v2 = { - .name = "ddrc", + .counter_bits = 48, + .check_event = DDRC_V2_NR_EVENTS, .attr_groups = hisi_ddrc_pmu_v2_attr_groups, .private = &hisi_ddrc_v2_pmu_regs, }; -static struct hisi_ddrc_pmu_regs hisi_ddrc_v3_pmu_regs = { - .int_mask = DDRC_V3_INT_MASK, - .int_clear = DDRC_V3_INT_CLEAR, - .int_status = DDRC_V3_INT_STATUS, -}; - -static const struct hisi_pmu_dev_info hisi_ddrc_v3 = { - .name = "ddrc", - .attr_groups = hisi_ddrc_pmu_v2_attr_groups, - .private = &hisi_ddrc_v3_pmu_regs, -}; - static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { { "HISI0233", (kernel_ulong_t)&hisi_ddrc_v1}, { "HISI0234", (kernel_ulong_t)&hisi_ddrc_v2}, - { "HISI0235", (kernel_ulong_t)&hisi_ddrc_v3}, {} }; MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 4019869abdc4..8fc983c83008 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -71,6 +71,8 @@ struct hisi_uncore_ops { struct hisi_pmu_dev_info { const char *name; const struct attribute_group **attr_groups; + u32 counter_bits; + u32 check_event; void *private; }; -- Gitee From ba801a35412974c51e741b506912c304796cd029 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:03 +0800 Subject: [PATCH 052/173] drivers/perf: hisi: Use ACPI driver_data to retrieve SLLC PMU information [Upstream commit 29614c55fe6ff8e5ddee9c6247d5c3dbe05ca8bc] Make use of struct acpi_device_id::driver_data for version specific information rather than judge the version register. This will help to simplify the probe process and also a bit easier for extension. Factor out SLLC register definition to struct hisi_sllc_pmu_regs. No functional changes intended. Signed-off-by: Junhao He Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 178 ++++++++++++------ 1 file changed, 118 insertions(+), 60 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 44f4ff42cb2c..9d5ba7af5cf8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -41,6 +41,7 @@ #define SLLC_SRCID_CMD_SHIFT 1 #define SLLC_SRCID_MSK_SHIFT 12 #define SLLC_NR_EVENTS 0x80 +#define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8) HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_max, config1, 21, 11); @@ -48,6 +49,23 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); +struct hisi_sllc_pmu_regs { + u32 int_mask; + u32 int_clear; + u32 int_status; + u32 perf_ctrl; + u32 srcid_ctrl; + u32 srcid_cmd_shift; + u32 srcid_mask_shift; + u32 tgtid_ctrl; + u32 tgtid_min_shift; + u32 tgtid_max_shift; + u32 event_ctrl; + u32 event_type0; + u32 version; + u32 event_cnt0; +}; + static bool tgtid_is_valid(u32 max, u32 min) { return max > 0 && max >= min; @@ -56,96 +74,104 @@ static bool tgtid_is_valid(u32 max, u32 min) static void hisi_sllc_pmu_enable_tracetag(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 tt_en = hisi_get_tracetag_en(event); if (tt_en) { u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_TRACETAG_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_disable_tracetag(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 tt_en = hisi_get_tracetag_en(event); if (tt_en) { u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_TRACETAG_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_config_tgtid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 min = hisi_get_tgtid_min(event); u32 max = hisi_get_tgtid_max(event); if (tgtid_is_valid(max, min)) { - u32 val = (max << SLLC_TGTID_MAX_SHIFT) | (min << SLLC_TGTID_MIN_SHIFT); + u32 val = (max << regs->tgtid_max_shift) | + (min << regs->tgtid_min_shift); - writel(val, sllc_pmu->base + SLLC_TGTID_CTRL); + writel(val, sllc_pmu->base + regs->tgtid_ctrl); /* Enable the tgtid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_TGTID_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_clear_tgtid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 min = hisi_get_tgtid_min(event); u32 max = hisi_get_tgtid_max(event); if (tgtid_is_valid(max, min)) { u32 val; - writel(SLLC_TGTID_NONE, sllc_pmu->base + SLLC_TGTID_CTRL); + writel(SLLC_TGTID_NONE, sllc_pmu->base + regs->tgtid_ctrl); /* Disable the tgtid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_TGTID_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_config_srcid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 cmd = hisi_get_srcid_cmd(event); if (cmd) { u32 val, msk; msk = hisi_get_srcid_msk(event); - val = (cmd << SLLC_SRCID_CMD_SHIFT) | (msk << SLLC_SRCID_MSK_SHIFT); - writel(val, sllc_pmu->base + SLLC_SRCID_CTRL); + val = (cmd << regs->srcid_cmd_shift) | + (msk << regs->srcid_mask_shift); + writel(val, sllc_pmu->base + regs->srcid_ctrl); /* Enable the srcid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_SRCID_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_clear_srcid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 cmd = hisi_get_srcid_cmd(event); if (cmd) { u32 val; - writel(SLLC_SRCID_NONE, sllc_pmu->base + SLLC_SRCID_CTRL); + writel(SLLC_SRCID_NONE, sllc_pmu->base + regs->srcid_ctrl); /* Disable the srcid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_SRCID_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } @@ -167,29 +193,27 @@ static void hisi_sllc_pmu_clear_filter(struct perf_event *event) } } -static u32 hisi_sllc_pmu_get_counter_offset(int idx) -{ - return (SLLC_EVENT_CNT0_L + idx * 8); -} - static u64 hisi_sllc_pmu_read_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { - return readq(sllc_pmu->base + - hisi_sllc_pmu_get_counter_offset(hwc->idx)); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + return readq(sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx)); } static void hisi_sllc_pmu_write_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc, u64 val) { - writeq(val, sllc_pmu->base + - hisi_sllc_pmu_get_counter_offset(hwc->idx)); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + writeq(val, sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx)); } static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx, u32 type) { - u32 reg, reg_idx, shift, val; + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + u32 reg, val; /* * Select the appropriate event select register(SLLC_EVENT_TYPE0/1). @@ -198,96 +222,98 @@ static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx, * SLLC_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, * SLLC_EVENT_TYPE1 is chosen. */ - reg = SLLC_EVENT_TYPE0 + (idx / 4) * 4; - reg_idx = idx % 4; - shift = 8 * reg_idx; + reg = regs->event_type0 + (idx / 4) * 4; /* Write event code to SLLC_EVENT_TYPEx Register */ val = readl(sllc_pmu->base + reg); - val &= ~(SLLC_EVTYPE_MASK << shift); - val |= (type << shift); + val &= ~(SLLC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); writel(val, sllc_pmu->base + reg); } static void hisi_sllc_pmu_start_counters(struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_PERF_CTRL_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } static void hisi_sllc_pmu_stop_counters(struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_PERF_CTRL_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } static void hisi_sllc_pmu_enable_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); - val |= 1 << hwc->idx; - writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); + val = readl(sllc_pmu->base + regs->event_ctrl); + val |= BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->event_ctrl); } static void hisi_sllc_pmu_disable_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); - val &= ~(1 << hwc->idx); - writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); + val = readl(sllc_pmu->base + regs->event_ctrl); + val &= ~BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->event_ctrl); } static void hisi_sllc_pmu_enable_counter_int(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_INT_MASK); - /* Write 0 to enable interrupt */ - val &= ~(1 << hwc->idx); - writel(val, sllc_pmu->base + SLLC_INT_MASK); + val = readl(sllc_pmu->base + regs->int_mask); + val &= ~BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->int_mask); } static void hisi_sllc_pmu_disable_counter_int(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_INT_MASK); - /* Write 1 to mask interrupt */ - val |= 1 << hwc->idx; - writel(val, sllc_pmu->base + SLLC_INT_MASK); + val = readl(sllc_pmu->base + regs->int_mask); + val |= BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->int_mask); } static u32 hisi_sllc_pmu_get_int_status(struct hisi_pmu *sllc_pmu) { - return readl(sllc_pmu->base + SLLC_INT_STATUS); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + return readl(sllc_pmu->base + regs->int_status); } static void hisi_sllc_pmu_clear_int_status(struct hisi_pmu *sllc_pmu, int idx) { - writel(1 << idx, sllc_pmu->base + SLLC_INT_CLEAR); -} + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; -static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { - { "HISI0263", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); + writel(BIT_ULL(idx), sllc_pmu->base + regs->int_clear); +} static int hisi_sllc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs; + hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev); /* @@ -304,13 +330,18 @@ static int hisi_sllc_pmu_init_data(struct platform_device *pdev, return -EINVAL; } + sllc_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!sllc_pmu->dev_info) + return -ENODEV; + sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n"); return PTR_ERR(sllc_pmu->base); } - sllc_pmu->identifier = readl(sllc_pmu->base + SLLC_VERSION); + regs = sllc_pmu->dev_info->private; + sllc_pmu->identifier = readl(sllc_pmu->base + regs->version); return 0; } @@ -352,6 +383,27 @@ static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { NULL }; +static struct hisi_sllc_pmu_regs hisi_sllc_v2_pmu_regs = { + .int_mask = SLLC_INT_MASK, + .int_clear = SLLC_INT_CLEAR, + .int_status = SLLC_INT_STATUS, + .perf_ctrl = SLLC_PERF_CTRL, + .srcid_ctrl = SLLC_SRCID_CTRL, + .srcid_cmd_shift = SLLC_SRCID_CMD_SHIFT, + .srcid_mask_shift = SLLC_SRCID_MSK_SHIFT, + .tgtid_ctrl = SLLC_TGTID_CTRL, + .tgtid_min_shift = SLLC_TGTID_MIN_SHIFT, + .tgtid_max_shift = SLLC_TGTID_MAX_SHIFT, + .event_ctrl = SLLC_EVENT_CTRL, + .event_type0 = SLLC_EVENT_TYPE0, + .version = SLLC_VERSION, + .event_cnt0 = SLLC_EVENT_CNT0_L, +}; + +static const struct hisi_pmu_dev_info hisi_sllc_v2 = { + .private = &hisi_sllc_v2_pmu_regs, +}; + static const struct hisi_uncore_ops hisi_uncore_sllc_ops = { .write_evtype = hisi_sllc_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -447,6 +499,12 @@ static int hisi_sllc_pmu_remove(struct platform_device *pdev) return 0; } +static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { + { "HISI0263", (kernel_ulong_t)&hisi_sllc_v2 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); + static struct platform_driver hisi_sllc_pmu_driver = { .driver = { .name = "hisi_sllc_pmu", -- Gitee From 024f5f8f892ff889df0aa934c798f7bdc1c2842f Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:04 +0800 Subject: [PATCH 053/173] drivers/perf: hisi: Add support for HiSilicon SLLC v3 PMU driver [Upstream commit 1fd20ba0a1dcaf3bf8757c0e0b8ff754ab25b228] SLLC v3 PMU has the following changes compared to previous version: a) update the register layout b) update the definition of SRCID_CTRL and TGTID_CTRL registers. To be compatible with v2, we use maximum width (11 bits) and mask the extra length for themselves. c) remove latency events (driver does not need to be adapted). SLLC v3 PMU is identified with HID HISI0264. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 9d5ba7af5cf8..46822ddca2a8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -28,6 +28,18 @@ #define SLLC_VERSION 0x1cf0 #define SLLC_EVENT_CNT0_L 0x1d00 +/* SLLC registers definition in v3 */ +#define SLLC_V3_INT_MASK 0x6834 +#define SLLC_V3_INT_STATUS 0x6838 +#define SLLC_V3_INT_CLEAR 0x683c +#define SLLC_V3_VERSION 0x6c00 +#define SLLC_V3_PERF_CTRL 0x6d00 +#define SLLC_V3_SRCID_CTRL 0x6d04 +#define SLLC_V3_TGTID_CTRL 0x6d08 +#define SLLC_V3_EVENT_CTRL 0x6d14 +#define SLLC_V3_EVENT_TYPE0 0x6d18 +#define SLLC_V3_EVENT_CNT0_L 0x6e00 + #define SLLC_EVTYPE_MASK 0xff #define SLLC_PERF_CTRL_EN BIT(0) #define SLLC_FILT_EN BIT(1) @@ -40,6 +52,12 @@ #define SLLC_TGTID_MAX_SHIFT 12 #define SLLC_SRCID_CMD_SHIFT 1 #define SLLC_SRCID_MSK_SHIFT 12 + +#define SLLC_V3_TGTID_MIN_SHIFT 1 +#define SLLC_V3_TGTID_MAX_SHIFT 10 +#define SLLC_V3_SRCID_CMD_SHIFT 1 +#define SLLC_V3_SRCID_MSK_SHIFT 10 + #define SLLC_NR_EVENTS 0x80 #define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8) @@ -404,6 +422,27 @@ static const struct hisi_pmu_dev_info hisi_sllc_v2 = { .private = &hisi_sllc_v2_pmu_regs, }; +static struct hisi_sllc_pmu_regs hisi_sllc_v3_pmu_regs = { + .int_mask = SLLC_V3_INT_MASK, + .int_clear = SLLC_V3_INT_CLEAR, + .int_status = SLLC_V3_INT_STATUS, + .perf_ctrl = SLLC_V3_PERF_CTRL, + .srcid_ctrl = SLLC_V3_SRCID_CTRL, + .srcid_cmd_shift = SLLC_V3_SRCID_CMD_SHIFT, + .srcid_mask_shift = SLLC_V3_SRCID_MSK_SHIFT, + .tgtid_ctrl = SLLC_V3_TGTID_CTRL, + .tgtid_min_shift = SLLC_V3_TGTID_MIN_SHIFT, + .tgtid_max_shift = SLLC_V3_TGTID_MAX_SHIFT, + .event_ctrl = SLLC_V3_EVENT_CTRL, + .event_type0 = SLLC_V3_EVENT_TYPE0, + .version = SLLC_V3_VERSION, + .event_cnt0 = SLLC_V3_EVENT_CNT0_L, +}; + +static const struct hisi_pmu_dev_info hisi_sllc_v3 = { + .private = &hisi_sllc_v3_pmu_regs, +}; + static const struct hisi_uncore_ops hisi_uncore_sllc_ops = { .write_evtype = hisi_sllc_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -501,6 +540,7 @@ static int hisi_sllc_pmu_remove(struct platform_device *pdev) static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { { "HISI0263", (kernel_ulong_t)&hisi_sllc_v2 }, + { "HISI0264", (kernel_ulong_t)&hisi_sllc_v3 }, {} }; MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); -- Gitee From 579ab0d7f5598542e3af9891fa5061da63733d3e Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:05 +0800 Subject: [PATCH 054/173] drivers/perf: hisi: Relax the event number check of v2 PMUs [Upstream commit 35f5b36e8cc2d241083ee0f08fa8b5366bde6f22] The supported event number range of each Uncore PMUs is provided by each driver in hisi_pmu::check_event and out of range events will be rejected. A later version with expanded event number range needs to register the PMU with updated hisi_pmu::check_event even if it's the only update, which means the expanded events cannot be used unless the driver's updated. However the unsupported events won't be counted by the hardware so we can relax the event number check to allow the use the expanded events. Signed-off-by: Junhao He Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 6 +++--- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 00f635a9cb73..f4b53394b732 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -48,7 +48,7 @@ #define DDRC_V1_PERF_CTRL_EN 0x2 #define DDRC_V2_PERF_CTRL_EN 0x1 #define DDRC_V1_NR_EVENTS 0x7 -#define DDRC_V2_NR_EVENTS 0x90 +#define DDRC_V2_NR_EVENTS 0xFF #define DDRC_EVENT_CNTn(base, n) ((base) + (n) * 8) #define DDRC_EVENT_TYPEn(base, n) ((base) + (n) * 4) diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index af71f2f3082e..5b065cea578a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -47,9 +47,9 @@ #define HHA_SRCID_CMD GENMASK(16, 6) #define HHA_SRCID_MSK GENMASK(30, 20) #define HHA_DATSRC_SKT_EN BIT(23) -#define HHA_EVTYPE_NONE 0xff +#define HHA_EVTYPE_MASK GENMASK(7, 0) #define HHA_V1_NR_EVENT 0x65 -#define HHA_V2_NR_EVENT 0xCE +#define HHA_V2_NR_EVENT 0xFF HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 10, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 21, 11); @@ -197,7 +197,7 @@ static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, /* Write event code to HHA_EVENT_TYPEx register */ val = readl(hha_pmu->base + reg); - val &= ~(HHA_EVTYPE_NONE << shift); + val &= ~(HHA_EVTYPE_MASK << shift); val |= (type << shift); writel(val, hha_pmu->base + reg); } diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index c868a3169990..b611fbae55cb 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -440,7 +440,7 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev, pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups; pa_pmu->num_counters = PA_NR_COUNTERS; pa_pmu->ops = &hisi_uncore_pa_ops; - pa_pmu->check_event = 0xB0; + pa_pmu->check_event = PA_EVTYPE_MASK; pa_pmu->counter_bits = 64; pa_pmu->dev = &pdev->dev; pa_pmu->on_cpu = -1; diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 46822ddca2a8..a2a85edb59b4 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -58,7 +58,7 @@ #define SLLC_V3_SRCID_CMD_SHIFT 1 #define SLLC_V3_SRCID_MSK_SHIFT 10 -#define SLLC_NR_EVENTS 0x80 +#define SLLC_NR_EVENTS 0xff #define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8) HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0); -- Gitee From 3eb91e34f7bcf1aeca8b3ed4ded17a76c85a8b80 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:14:06 +0800 Subject: [PATCH 055/173] drivers/perf: hisi: Support PMUs with no interrupt [Upstream commit e480898e767c54a883e965fc306e2ece013cbca5] We'll have PMUs don't have an interrupt to indicate the counter overflow, but the Uncore PMU core assume all the PMUs have interrupt. So handle this case in the core. The existing PMUs won't be affected. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 8a70d180c27c..8a5263d2103e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -531,7 +531,9 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) return 0; hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev)); - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, + cpumask_of(hisi_pmu->on_cpu))); return 0; } @@ -546,7 +548,8 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) hisi_pmu->on_cpu = cpu; /* Overflow interrupt also should use the same CPU */ - WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(cpu))); + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); return 0; } @@ -580,7 +583,9 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target); /* Use this CPU for event counting */ hisi_pmu->on_cpu = target; - WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(target))); + + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); return 0; } -- Gitee From 266937f52cb6296a3652f2aedc95ce77d505d8b6 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:14:07 +0800 Subject: [PATCH 056/173] drivers/perf: hisi: Add support for HiSilicon NoC PMU Adds the support for HiSilicon NoC (Network on Chip) PMU which will be used to monitor the events on the system bus. The PMU device will be named after the SCL ID (either Super CPU cluster or Super IO cluster) and the index ID, just similar to other HiSilicon Uncore PMUs. Below PMU formats are provided besides the event: - ch: the transaction channel (data, request, response, etc) which can be used to filter the counting. - tt_en: tracetag filtering enable. Just as other HiSilicon Uncore PMUs the NoC PMU supports only counting the transactions with tracetag. The NoC PMU doesn't have an interrupt to indicate the overflow. However we have a 64 bit counter which is large enough and it's nearly impossible to overflow. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- Documentation/admin-guide/perf/hisi-pmu.rst | 77 ++++ drivers/perf/hisilicon/Makefile | 3 +- drivers/perf/hisilicon/hisi_uncore_noc_pmu.c | 392 +++++++++++++++++++ 3 files changed, 471 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_noc_pmu.c diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index 3532d659f62a..102ea54fd64c 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -56,6 +56,83 @@ Example usage of perf:: $# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5 $# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5 +For HiSilicon uncore PMU v2 whose identifier is 0x30, the topology is the same +as PMU v1, but some new functions are added to the hardware. + +1. L3C PMU supports filtering by core/thread within the cluster which can be +specified as a bitmap:: + + $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0x3/ sleep 5 + +This will only count the operations from core/thread 0 and 1 in this cluster. + +2. Tracetag allow the user to chose to count only read, write or atomic +operations via the tt_req parameeter in perf. The default value counts all +operations. tt_req is 3bits, 3'b100 represents read operations, 3'b101 +represents write operations, 3'b110 represents atomic store operations and +3'b111 represents atomic non-store operations, other values are reserved:: + + $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5 + +This will only count the read operations in this cluster. + +3. Datasrc allows the user to check where the data comes from. It is 5 bits. +Some important codes are as follows: + +- 5'b00001: comes from L3C in this die; +- 5'b01000: comes from L3C in the cross-die; +- 5'b01001: comes from L3C which is in another socket; +- 5'b01110: comes from the local DDR; +- 5'b01111: comes from the cross-die DDR; +- 5'b10000: comes from cross-socket DDR; + +etc, it is mainly helpful to find that the data source is nearest from the CPU +cores. If datasrc_cfg is used in the multi-chips, the datasrc_skt shall be +configured in perf command:: + + $# perf stat -a -e hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xE/, + hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xF/ sleep 5 + +4. Some HiSilicon SoCs encapsulate multiple CPU and IO dies. Each CPU die +contains several Compute Clusters (CCLs). The I/O dies are called Super I/O +clusters (SICL) containing multiple I/O clusters (ICLs). Each CCL/ICL in the +SoC has a unique ID. Each ID is 11bits, include a 6-bit SCCL-ID and 5-bit +CCL/ICL-ID. For I/O die, the ICL-ID is followed by: + +- 5'b00000: I/O_MGMT_ICL; +- 5'b00001: Network_ICL; +- 5'b00011: HAC_ICL; +- 5'b10000: PCIe_ICL; + +5. uring_channel: UC PMU events 0x47~0x59 supports filtering by tx request +uring channel. It is 2 bits. Some important codes are as follows: + +- 2'b11: count the events which sent to the uring_ext (MATA) channel; +- 2'b01: is the same as 2'b11; +- 2'b10: count the events which sent to the uring (non-MATA) channel; +- 2'b00: default value, count the events which sent to the both uring and + uring_ext channel; + +6. ch: NoC PMU supports filtering the event counts of certain transaction +channel with this option. The current supported channels are as follows: + +- 3'b010: Request channel +- 3'b100: Snoop channel +- 3'b110: Response channel +- 3'b111: Data channel + +7. tt_en: NoC PMU supports counting only transactions that have tracetag set +if this option is set. See the 2nd list for more information about tracetag. + +Users could configure IDs to count data come from specific CCL/ICL, by setting +srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting +tgtid_cmd & tgtid_msk. A set bit in srcid_msk/tgtid_msk means the PMU will not +check the bit when matching against the srcid_cmd/tgtid_cmd. + +If all of these options are disabled, it can works by the default value that +doesn't distinguish the filter condition and ID information and will return +the total counter values in the PMU counters. + The current driver does not support sampling. So "perf record" is unsupported. Also attach to a task is unsupported as the events are all uncore. diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index ba35e812caab..fe195500bd98 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o \ + hisi_uncore_noc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c new file mode 100644 index 000000000000..bf0a6bce71c4 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c @@ -0,0 +1,392 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for HiSilicon Uncore NoC (Network on Chip) PMU device + * + * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. + * Author: Yicong Yang + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +#define NOC_PMU_VERSION 0x1e00 +#define NOC_PMU_GLOBAL_CTRL 0x1e04 +#define NOC_PMU_GLOBAL_CTRL_PMU_EN BIT(0) +#define NOC_PMU_GLOBAL_CTRL_TT_EN BIT(1) +#define NOC_PMU_CNT_INFO 0x1e08 +#define NOC_PMU_CNT_INFO_OVERFLOW(n) BIT(n) +#define NOC_PMU_EVENT_CTRL(n) (0x1e20 + 4 * (n)) +#define NOC_PMU_EVENT_CTRL_TYPE GENMASK(4, 0) +/* + * Note channel of 0x0 will reset the counter value, so don't do it before + * we read out the counter. + */ +#define NOC_PMU_EVENT_CTRL_CHANNEL GENMASK(10, 8) +#define NOC_PMU_EVENT_CTRL_EN BIT(11) +#define NOC_PMU_EVENT_COUNTER(n) (0x1e80 + 8 * (n)) + +#define NOC_PMU_NR_COUNTERS 4 +#define NOC_PMU_COUNTER_BITS 64 + +#define NOC_PMU_CH_DEFAULT 0x7 + +HISI_PMU_EVENT_ATTR_EXTRACTOR(ch, config1, 2, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_en, config1, 3, 3); + +/* Dynamic CPU hotplug state used by this PMU driver */ +static enum cpuhp_state hisi_noc_pmu_cpuhp_state; + +/* + * Tracetag filtering is not per event and all the events should keep + * the consistence. Return true if the new comer doesn't match the + * tracetag filtering configuration of the current scheduled events. + */ +static bool hisi_noc_pmu_check_global_filter(struct perf_event *curr, + struct perf_event *new) +{ + return hisi_get_tt_en(curr) == hisi_get_tt_en(new); +} + +static void hisi_noc_pmu_write_evtype(struct hisi_pmu *noc_pmu, int idx, u32 type) +{ + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRL(idx)); + reg &= ~NOC_PMU_EVENT_CTRL_TYPE; + reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_TYPE, type); + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRL(idx)); +} + +static int hisi_noc_pmu_get_event_idx(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + struct hisi_pmu_hwevents *pmu_events = &noc_pmu->pmu_events; + int cur_idx; + + cur_idx = find_first_bit(pmu_events->used_mask, noc_pmu->num_counters); + if (cur_idx != noc_pmu->num_counters && + !hisi_noc_pmu_check_global_filter(pmu_events->hw_events[cur_idx], event)) + return -EAGAIN; + + return hisi_uncore_pmu_get_event_idx(event); +} + +static u64 hisi_noc_pmu_read_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + return readq(noc_pmu->base + NOC_PMU_EVENT_COUNTER(hwc->idx)); +} + +static void hisi_noc_pmu_write_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, noc_pmu->base + NOC_PMU_EVENT_COUNTER(hwc->idx)); +} + +static void hisi_noc_pmu_enable_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); + reg |= NOC_PMU_EVENT_CTRL_EN; + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); +} + +static void hisi_noc_pmu_disable_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); + reg &= ~NOC_PMU_EVENT_CTRL_EN; + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); +} + +static void hisi_noc_pmu_enable_counter_int(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + /* We don't support interrupt, so a stub here. */ +} + +static void hisi_noc_pmu_disable_counter_int(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ +} + +static void hisi_noc_pmu_start_counters(struct hisi_pmu *noc_pmu) +{ + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + reg |= NOC_PMU_GLOBAL_CTRL_PMU_EN; + writel(reg, noc_pmu->base + NOC_PMU_GLOBAL_CTRL); +} + +static void hisi_noc_pmu_stop_counters(struct hisi_pmu *noc_pmu) +{ + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + reg &= ~NOC_PMU_GLOBAL_CTRL_PMU_EN; + writel(reg, noc_pmu->base + NOC_PMU_GLOBAL_CTRL); +} + +static u32 hisi_noc_pmu_get_int_status(struct hisi_pmu *noc_pmu) +{ + return readl(noc_pmu->base + NOC_PMU_CNT_INFO); +} + +static void hisi_noc_pmu_clear_int_status(struct hisi_pmu *noc_pmu, int idx) +{ + writel(~NOC_PMU_CNT_INFO_OVERFLOW(idx), noc_pmu->base + NOC_PMU_CNT_INFO); +} + +static void hisi_noc_pmu_enable_filter(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u32 tt_en = hisi_get_tt_en(event); + u32 ch = hisi_get_ch(event); + u32 reg; + + if (!ch) + ch = NOC_PMU_CH_DEFAULT; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); + reg &= ~NOC_PMU_EVENT_CTRL_CHANNEL; + reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_CHANNEL, ch); + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); + + /* + * Since tracetag filter applies to all the counters, don't touch it + * if user doesn't specify it explicitly. + */ + if (tt_en) { + reg = readl(noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + reg |= NOC_PMU_GLOBAL_CTRL_TT_EN; + writel(reg, noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + } +} + +static void hisi_noc_pmu_disable_filter(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tt_en(event); + u32 reg; + + /* + * If we're not the last counter, don't touch the global tracetag + * configuration. + */ + if (bitmap_weight(noc_pmu->pmu_events.used_mask, noc_pmu->num_counters) > 1) + return; + + if (tt_en) { + reg = readl(noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + reg &= ~NOC_PMU_GLOBAL_CTRL_TT_EN; + writel(reg, noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + } +} + +static const struct hisi_uncore_ops hisi_uncore_noc_ops = { + .write_evtype = hisi_noc_pmu_write_evtype, + .get_event_idx = hisi_noc_pmu_get_event_idx, + .read_counter = hisi_noc_pmu_read_counter, + .write_counter = hisi_noc_pmu_write_counter, + .enable_counter = hisi_noc_pmu_enable_counter, + .disable_counter = hisi_noc_pmu_disable_counter, + .enable_counter_int = hisi_noc_pmu_enable_counter_int, + .disable_counter_int = hisi_noc_pmu_disable_counter_int, + .start_counters = hisi_noc_pmu_start_counters, + .stop_counters = hisi_noc_pmu_stop_counters, + .get_int_status = hisi_noc_pmu_get_int_status, + .clear_int_status = hisi_noc_pmu_clear_int_status, + .enable_filter = hisi_noc_pmu_enable_filter, + .disable_filter = hisi_noc_pmu_disable_filter, +}; + +static struct attribute *hisi_noc_pmu_format_attrs[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(ch, "config1:0-2"), + HISI_PMU_FORMAT_ATTR(tt_en, "config1:3"), + NULL +}; + +static const struct attribute_group hisi_noc_pmu_format_group = { + .name = "format", + .attrs = hisi_noc_pmu_format_attrs, +}; + +static struct attribute *hisi_noc_pmu_events_attrs[] = { + HISI_PMU_EVENT_ATTR(cycles, 0x0e), + /* Flux on/off the ring */ + HISI_PMU_EVENT_ATTR(ingress_flow_sum, 0x1a), + HISI_PMU_EVENT_ATTR(egress_flow_sum, 0x17), + /* Buffer full duration on/off the ring */ + HISI_PMU_EVENT_ATTR(ingress_buf_full, 0x19), + HISI_PMU_EVENT_ATTR(egress_buf_full, 0x12), + /* Failure packets count on/off the ring */ + HISI_PMU_EVENT_ATTR(cw_ingress_fail, 0x01), + HISI_PMU_EVENT_ATTR(cc_ingress_fail, 0x09), + HISI_PMU_EVENT_ATTR(cw_egress_fail, 0x03), + HISI_PMU_EVENT_ATTR(cc_egress_fail, 0x0b), + /* Flux of the ring */ + HISI_PMU_EVENT_ATTR(cw_main_flow_sum, 0x05), + HISI_PMU_EVENT_ATTR(cc_main_flow_sum, 0x0d), + NULL +}; + +static const struct attribute_group hisi_noc_pmu_events_group = { + .name = "events", + .attrs = hisi_noc_pmu_events_attrs, +}; + +static const struct attribute_group *hisi_noc_pmu_attr_groups[] = { + &hisi_noc_pmu_format_group, + &hisi_noc_pmu_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + +static int hisi_noc_pmu_dev_init(struct platform_device *pdev, struct hisi_pmu *noc_pmu) +{ + hisi_uncore_pmu_init_topology(noc_pmu, &pdev->dev); + + if (noc_pmu->topo.scl_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get scl-id\n"); + + if (noc_pmu->topo.index_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get idx-id\n"); + + if (noc_pmu->topo.sub_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get sub-id\n"); + + noc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(noc_pmu->base)) + return dev_err_probe(&pdev->dev, PTR_ERR(noc_pmu->base), + "fail to remap io memory\n"); + + noc_pmu->on_cpu = -1; + noc_pmu->dev = &pdev->dev; + noc_pmu->ops = &hisi_uncore_noc_ops; + noc_pmu->pmu_events.attr_groups = hisi_noc_pmu_attr_groups; + noc_pmu->num_counters = NOC_PMU_NR_COUNTERS; + noc_pmu->counter_bits = NOC_PMU_COUNTER_BITS; + noc_pmu->check_event = NOC_PMU_EVENT_CTRL_TYPE; + noc_pmu->identifier = readl(noc_pmu->base + NOC_PMU_VERSION); + return 0; +} + +static void hisi_noc_pmu_remove_cpuhp_instance(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_noc_pmu_cpuhp_state, hotplug_node); +} + +static void hisi_noc_pmu_unregister_pmu(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_noc_pmu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct hisi_pmu *noc_pmu; + char *name; + int ret; + + noc_pmu = devm_kzalloc(dev, sizeof(*noc_pmu), GFP_KERNEL); + if (!noc_pmu) + return -ENOMEM; + + /* + * HiSilicon Uncore PMU framework needs to get common hisi_pmu device + * from device's drvdata. + */ + platform_set_drvdata(pdev, noc_pmu); + + ret = hisi_noc_pmu_dev_init(pdev, noc_pmu); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(hisi_noc_pmu_cpuhp_state, &noc_pmu->node); + if (ret) + return dev_err_probe(dev, ret, "Fail to register cpuhp instance\n"); + + ret = devm_add_action_or_reset(dev, hisi_noc_pmu_remove_cpuhp_instance, + &noc_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(noc_pmu, THIS_MODULE); + + name = devm_kasprintf(dev, GFP_KERNEL, "hisi_scl%d_noc%d_%d", + noc_pmu->topo.scl_id, noc_pmu->topo.index_id, + noc_pmu->topo.sub_id); + if (!name) + return -ENOMEM; + + ret = perf_pmu_register(&noc_pmu->pmu, name, -1); + if (ret) + return dev_err_probe(dev, ret, "Fail to register PMU\n"); + + return devm_add_action_or_reset(dev, hisi_noc_pmu_unregister_pmu, + &noc_pmu->pmu); +} + +const struct acpi_device_id hisi_noc_pmu_ids[] = { + { "HISI04E0", }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_noc_pmu_ids); + +static struct platform_driver hisi_noc_pmu_driver = { + .driver = { + .name = "hisi_noc_pmu", + .acpi_match_table = hisi_noc_pmu_ids, + .suppress_bind_attrs = true, + }, + .probe = hisi_noc_pmu_probe, +}; + +static int __init hisi_noc_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/noc:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("hisi_noc_pmu: Fail to setup cpuhp callbacks, ret = %d\n", ret); + return ret; + } + hisi_noc_pmu_cpuhp_state = ret; + + ret = platform_driver_register(&hisi_noc_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state); + + return ret; +} +module_init(hisi_noc_pmu_module_init); + +static void __exit hisi_noc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_noc_pmu_driver); + cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state); +} +module_exit(hisi_noc_pmu_module_exit); + +MODULE_IMPORT_NS(HISI_PMU); +MODULE_DESCRIPTION("HiSilicon SoC Uncore NoC PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yicong Yang "); -- Gitee From 913502f310dbf2d78c3abcf35d51ef4c57913e5a Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:08 +0800 Subject: [PATCH 057/173] drivers/perf: hisi: Add support for HiSilicon MN PMU driver MN (Miscellaneous Node) is a hybrid node in ARM CHI. It broadcasts the following two types of requests: DVM operations and PCIe configuration. MN PMU devices exist on both SCCL and SICL, so we named the MN pmu driver after SCL (Super cluster) ID. The MN PMU driver using the HiSilicon uncore PMU framework. And only the event parameter is supported. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/Makefile | 2 +- drivers/perf/hisilicon/hisi_uncore_mn_pmu.c | 355 ++++++++++++++++++++ 2 files changed, 356 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_mn_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index fe195500bd98..cdbe62b5aaec 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o \ - hisi_uncore_noc_pmu.o + hisi_uncore_noc_pmu.o hisi_uncore_mn_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c new file mode 100644 index 000000000000..bad06cac3f8a --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c @@ -0,0 +1,355 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC MN uncore Hardware event counters support + * + * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* Dynamic CPU hotplug state used by MN PMU */ +static enum cpuhp_state hisi_mn_pmu_online; + +/* MN register definition */ +#define HISI_MN_DYNAMIC_CTRL 0x400 +#define HISI_MN_DYNAMIC_CTRL_EN BIT(0) +#define HISI_MN_PERF_CTRL_REG 0x408 +#define HISI_MN_PERF_CTRL_EN BIT(6) +#define HISI_MN_INT_MASK_REG 0x800 +#define HISI_MN_INT_STATUS_REG 0x808 +#define HISI_MN_INT_CLEAR_REG 0x80C +#define HISI_MN_EVENT_CTRL_REG 0x1C00 +#define HISI_MN_VERSION_REG 0x1C04 +#define HISI_MN_EVTYPE_REGn(n) (0x1d00 + (n) * 4) +#define HISI_MN_EVTYPE_MASK GENMASK(7, 0) +#define HISI_MN_CNTR_REGn(n) (0x1e00 + (n) * 8) + +#define HISI_MN_NR_COUNTERS 4 +#define HISI_MN_COUNTER_BITS 48 +#define HISI_MN_TIMEOUT_US 500U + +/* + * Each event request takes a certain amount of time to complete. If + * we counting the latency related event, we need to wait for the all + * requests complete. Otherwise, the value of counter is slightly larger. + */ +static void hisi_mn_pmu_counter_flush(struct hisi_pmu *mn_pmu) +{ + int ret; + u32 val; + + val = readl(mn_pmu->base + HISI_MN_DYNAMIC_CTRL); + val |= HISI_MN_DYNAMIC_CTRL_EN; + writel(val, mn_pmu->base + HISI_MN_DYNAMIC_CTRL); + + ret = readl_poll_timeout_atomic(mn_pmu->base + HISI_MN_DYNAMIC_CTRL, + val, !(val & HISI_MN_DYNAMIC_CTRL_EN), + 1, HISI_MN_TIMEOUT_US); + if (ret) + dev_warn(mn_pmu->dev, "Counter flush timeout\n"); +} + +static u64 hisi_mn_pmu_read_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + return readq(mn_pmu->base + HISI_MN_CNTR_REGn(hwc->idx)); +} + +static void hisi_mn_pmu_write_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, mn_pmu->base + HISI_MN_CNTR_REGn(hwc->idx)); +} + +static void hisi_mn_pmu_write_evtype(struct hisi_pmu *mn_pmu, int idx, u32 type) +{ + u32 val; + + /* + * Select the appropriate event select register. + * There are 2 32-bit event select registers for the + * 8 hardware counters, each event code is 8-bit wide. + */ + val = readl(mn_pmu->base + HISI_MN_EVTYPE_REGn(idx / 4)); + val &= ~(HISI_MN_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); + writel(val, mn_pmu->base + HISI_MN_EVTYPE_REGn(idx / 4)); +} + +static void hisi_mn_pmu_start_counters(struct hisi_pmu *mn_pmu) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_PERF_CTRL_REG); + val |= HISI_MN_PERF_CTRL_EN; + writel(val, mn_pmu->base + HISI_MN_PERF_CTRL_REG); +} + +static void hisi_mn_pmu_stop_counters(struct hisi_pmu *mn_pmu) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_PERF_CTRL_REG); + val &= ~HISI_MN_PERF_CTRL_EN; + writel(val, mn_pmu->base + HISI_MN_PERF_CTRL_REG); + + hisi_mn_pmu_counter_flush(mn_pmu); +} + +static void hisi_mn_pmu_enable_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_EVENT_CTRL_REG); + val |= BIT(hwc->idx); + writel(val, mn_pmu->base + HISI_MN_EVENT_CTRL_REG); +} + +static void hisi_mn_pmu_disable_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_EVENT_CTRL_REG); + val &= ~BIT(hwc->idx); + writel(val, mn_pmu->base + HISI_MN_EVENT_CTRL_REG); +} + +static void hisi_mn_pmu_enable_counter_int(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_INT_MASK_REG); + val &= ~BIT(hwc->idx); + writel(val, mn_pmu->base + HISI_MN_INT_MASK_REG); +} + +static void hisi_mn_pmu_disable_counter_int(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_INT_MASK_REG); + val |= BIT(hwc->idx); + writel(val, mn_pmu->base + HISI_MN_INT_MASK_REG); +} + +static u32 hisi_mn_pmu_get_int_status(struct hisi_pmu *mn_pmu) +{ + return readl(mn_pmu->base + HISI_MN_INT_STATUS_REG); +} + +static void hisi_mn_pmu_clear_int_status(struct hisi_pmu *mn_pmu, int idx) +{ + writel(BIT(idx), mn_pmu->base + HISI_MN_INT_CLEAR_REG); +} + +static struct attribute *hisi_mn_pmu_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL +}; + +static const struct attribute_group hisi_mn_pmu_format_group = { + .name = "format", + .attrs = hisi_mn_pmu_format_attr, +}; + +static struct attribute *hisi_mn_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(req_eobarrier_num, 0x00), + HISI_PMU_EVENT_ATTR(req_ecbarrier_num, 0x01), + HISI_PMU_EVENT_ATTR(req_dvmop_num, 0x02), + HISI_PMU_EVENT_ATTR(req_dvmsync_num, 0x03), + HISI_PMU_EVENT_ATTR(req_retry_num, 0x04), + HISI_PMU_EVENT_ATTR(req_writenosnp_num, 0x05), + HISI_PMU_EVENT_ATTR(req_readnosnp_num, 0x06), + HISI_PMU_EVENT_ATTR(snp_dvm_num, 0x07), + HISI_PMU_EVENT_ATTR(snp_dvmsync_num, 0x08), + HISI_PMU_EVENT_ATTR(l3t_req_dvm_num, 0x09), + HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_num, 0x0A), + HISI_PMU_EVENT_ATTR(mn_req_dvm_num, 0x0B), + HISI_PMU_EVENT_ATTR(mn_req_dvmsync_num, 0x0C), + HISI_PMU_EVENT_ATTR(pa_req_dvm_num, 0x0D), + HISI_PMU_EVENT_ATTR(pa_req_dvmsync_num, 0x0E), + HISI_PMU_EVENT_ATTR(snp_dvm_latency, 0x80), + HISI_PMU_EVENT_ATTR(snp_dvmsync_latency, 0x81), + HISI_PMU_EVENT_ATTR(l3t_req_dvm_latency, 0x82), + HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_latency, 0x83), + HISI_PMU_EVENT_ATTR(mn_req_dvm_latency, 0x84), + HISI_PMU_EVENT_ATTR(mn_req_dvmsync_latency, 0x85), + HISI_PMU_EVENT_ATTR(pa_req_dvm_latency, 0x86), + HISI_PMU_EVENT_ATTR(pa_req_dvmsync_latency, 0x87), + NULL +}; + +static const struct attribute_group hisi_mn_pmu_events_group = { + .name = "events", + .attrs = hisi_mn_pmu_events_attr, +}; + +static const struct attribute_group *hisi_mn_pmu_attr_groups[] = { + &hisi_mn_pmu_format_group, + &hisi_mn_pmu_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_mn_ops = { + .write_evtype = hisi_mn_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_mn_pmu_start_counters, + .stop_counters = hisi_mn_pmu_stop_counters, + .enable_counter = hisi_mn_pmu_enable_counter, + .disable_counter = hisi_mn_pmu_disable_counter, + .enable_counter_int = hisi_mn_pmu_enable_counter_int, + .disable_counter_int = hisi_mn_pmu_disable_counter_int, + .write_counter = hisi_mn_pmu_write_counter, + .read_counter = hisi_mn_pmu_read_counter, + .get_int_status = hisi_mn_pmu_get_int_status, + .clear_int_status = hisi_mn_pmu_clear_int_status, +}; + +static int hisi_mn_pmu_dev_init(struct platform_device *pdev, + struct hisi_pmu *mn_pmu) +{ + int ret; + + hisi_uncore_pmu_init_topology(mn_pmu, &pdev->dev); + + if (mn_pmu->topo.scl_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, + "Failed to read MN scl id\n"); + + if (mn_pmu->topo.index_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, + "Failed to read MN index id\n"); + + mn_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mn_pmu->base)) + return dev_err_probe(&pdev->dev, PTR_ERR(mn_pmu->base), + "Failed to ioremap resource\n"); + + ret = hisi_uncore_pmu_init_irq(mn_pmu, pdev); + if (ret) + return ret; + + mn_pmu->on_cpu = -1; + mn_pmu->dev = &pdev->dev; + mn_pmu->ops = &hisi_uncore_mn_ops; + mn_pmu->pmu_events.attr_groups = hisi_mn_pmu_attr_groups; + mn_pmu->check_event = HISI_MN_EVTYPE_MASK; + mn_pmu->num_counters = HISI_MN_NR_COUNTERS; + mn_pmu->counter_bits = HISI_MN_COUNTER_BITS; + mn_pmu->identifier = readl(mn_pmu->base + HISI_MN_VERSION_REG); + return 0; +} + +static void hisi_mn_pmu_remove_cpuhp(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_mn_pmu_online, hotplug_node); +} + +static void hisi_mn_pmu_unregister(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_mn_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *mn_pmu; + char *name; + int ret; + + mn_pmu = devm_kzalloc(&pdev->dev, sizeof(*mn_pmu), GFP_KERNEL); + if (!mn_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, mn_pmu); + + ret = hisi_mn_pmu_dev_init(pdev, mn_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_scl%d_mn%d", + mn_pmu->topo.scl_id, mn_pmu->topo.index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(hisi_mn_pmu_online, &mn_pmu->node); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to register cpu hotplug\n"); + + ret = devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_remove_cpuhp, &mn_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(mn_pmu, THIS_MODULE); + + ret = perf_pmu_register(&mn_pmu->pmu, name, -1); + if (ret) + return dev_err_probe(mn_pmu->dev, ret, "Failed to register MN PMU\n"); + + return devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_unregister, &mn_pmu->pmu); +} + +static const struct acpi_device_id hisi_mn_pmu_acpi_match[] = { + { "HISI0222", }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_mn_pmu_acpi_match); + +static struct platform_driver hisi_mn_pmu_driver = { + .driver = { + .name = "hisi_mn_pmu", + .acpi_match_table = hisi_mn_pmu_acpi_match, + /* + * We have not worked out a safe bind/unbind process, + * Forcefully unbinding during sampling will lead to a + * kernel panic, so this is not supported yet. + */ + .suppress_bind_attrs = true, + }, + .probe = hisi_mn_pmu_probe, +}; + +static int __init hisi_mn_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/mn:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("hisi_mn_pmu: Failed to setup MN PMU hotplug: %d\n", ret); + return ret; + } + hisi_mn_pmu_online = ret; + + ret = platform_driver_register(&hisi_mn_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_mn_pmu_online); + + return ret; +} +module_init(hisi_mn_pmu_module_init); + +static void __exit hisi_mn_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_mn_pmu_driver); + cpuhp_remove_multi_state(hisi_mn_pmu_online); +} +module_exit(hisi_mn_pmu_module_exit); + +MODULE_IMPORT_NS(HISI_PMU); +MODULE_DESCRIPTION("HiSilicon SoC MN uncore PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Junhao He "); -- Gitee From 1df579e9c086c8f455113361482df28fda62ded4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Mar 2023 15:50:26 -0400 Subject: [PATCH 058/173] ARM: perf: Allow the use of the PMUv3 driver on 32bit ARM [Upstream commit 009d6dc87a568db62290b8dc0a517b612217b6da] The only thing stopping the PMUv3 driver from compiling on 32bit is the lack of defined system registers names and the handful of required helpers. This is easily solved by providing the sysreg accessors and updating the Kconfig entry. Signed-off-by: Marc Zyngier Co-developed-by: Zaid Al-Bassam Signed-off-by: Zaid Al-Bassam Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230317195027.3746949-8-zalbassam@google.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm/include/asm/arm_pmuv3.h | 247 +++++++++++++++++++++++++++++++ arch/arm64/kernel/perf_event.c | 5 +- drivers/perf/Kconfig | 10 ++ 3 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 arch/arm/include/asm/arm_pmuv3.h diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h new file mode 100644 index 000000000000..78d3d4b82c6c --- /dev/null +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_PMUV3_H +#define __ASM_PMUV3_H + +#include +#include + +#define PMCCNTR __ACCESS_CP15_64(0, c9) + +#define PMCR __ACCESS_CP15(c9, 0, c12, 0) +#define PMCNTENSET __ACCESS_CP15(c9, 0, c12, 1) +#define PMCNTENCLR __ACCESS_CP15(c9, 0, c12, 2) +#define PMOVSR __ACCESS_CP15(c9, 0, c12, 3) +#define PMSELR __ACCESS_CP15(c9, 0, c12, 5) +#define PMCEID0 __ACCESS_CP15(c9, 0, c12, 6) +#define PMCEID1 __ACCESS_CP15(c9, 0, c12, 7) +#define PMXEVTYPER __ACCESS_CP15(c9, 0, c13, 1) +#define PMXEVCNTR __ACCESS_CP15(c9, 0, c13, 2) +#define PMUSERENR __ACCESS_CP15(c9, 0, c14, 0) +#define PMINTENSET __ACCESS_CP15(c9, 0, c14, 1) +#define PMINTENCLR __ACCESS_CP15(c9, 0, c14, 2) +#define PMMIR __ACCESS_CP15(c9, 0, c14, 6) +#define PMCCFILTR __ACCESS_CP15(c14, 0, c15, 7) + +#define PMEVCNTR0 __ACCESS_CP15(c14, 0, c8, 0) +#define PMEVCNTR1 __ACCESS_CP15(c14, 0, c8, 1) +#define PMEVCNTR2 __ACCESS_CP15(c14, 0, c8, 2) +#define PMEVCNTR3 __ACCESS_CP15(c14, 0, c8, 3) +#define PMEVCNTR4 __ACCESS_CP15(c14, 0, c8, 4) +#define PMEVCNTR5 __ACCESS_CP15(c14, 0, c8, 5) +#define PMEVCNTR6 __ACCESS_CP15(c14, 0, c8, 6) +#define PMEVCNTR7 __ACCESS_CP15(c14, 0, c8, 7) +#define PMEVCNTR8 __ACCESS_CP15(c14, 0, c9, 0) +#define PMEVCNTR9 __ACCESS_CP15(c14, 0, c9, 1) +#define PMEVCNTR10 __ACCESS_CP15(c14, 0, c9, 2) +#define PMEVCNTR11 __ACCESS_CP15(c14, 0, c9, 3) +#define PMEVCNTR12 __ACCESS_CP15(c14, 0, c9, 4) +#define PMEVCNTR13 __ACCESS_CP15(c14, 0, c9, 5) +#define PMEVCNTR14 __ACCESS_CP15(c14, 0, c9, 6) +#define PMEVCNTR15 __ACCESS_CP15(c14, 0, c9, 7) +#define PMEVCNTR16 __ACCESS_CP15(c14, 0, c10, 0) +#define PMEVCNTR17 __ACCESS_CP15(c14, 0, c10, 1) +#define PMEVCNTR18 __ACCESS_CP15(c14, 0, c10, 2) +#define PMEVCNTR19 __ACCESS_CP15(c14, 0, c10, 3) +#define PMEVCNTR20 __ACCESS_CP15(c14, 0, c10, 4) +#define PMEVCNTR21 __ACCESS_CP15(c14, 0, c10, 5) +#define PMEVCNTR22 __ACCESS_CP15(c14, 0, c10, 6) +#define PMEVCNTR23 __ACCESS_CP15(c14, 0, c10, 7) +#define PMEVCNTR24 __ACCESS_CP15(c14, 0, c11, 0) +#define PMEVCNTR25 __ACCESS_CP15(c14, 0, c11, 1) +#define PMEVCNTR26 __ACCESS_CP15(c14, 0, c11, 2) +#define PMEVCNTR27 __ACCESS_CP15(c14, 0, c11, 3) +#define PMEVCNTR28 __ACCESS_CP15(c14, 0, c11, 4) +#define PMEVCNTR29 __ACCESS_CP15(c14, 0, c11, 5) +#define PMEVCNTR30 __ACCESS_CP15(c14, 0, c11, 6) + +#define PMEVTYPER0 __ACCESS_CP15(c14, 0, c12, 0) +#define PMEVTYPER1 __ACCESS_CP15(c14, 0, c12, 1) +#define PMEVTYPER2 __ACCESS_CP15(c14, 0, c12, 2) +#define PMEVTYPER3 __ACCESS_CP15(c14, 0, c12, 3) +#define PMEVTYPER4 __ACCESS_CP15(c14, 0, c12, 4) +#define PMEVTYPER5 __ACCESS_CP15(c14, 0, c12, 5) +#define PMEVTYPER6 __ACCESS_CP15(c14, 0, c12, 6) +#define PMEVTYPER7 __ACCESS_CP15(c14, 0, c12, 7) +#define PMEVTYPER8 __ACCESS_CP15(c14, 0, c13, 0) +#define PMEVTYPER9 __ACCESS_CP15(c14, 0, c13, 1) +#define PMEVTYPER10 __ACCESS_CP15(c14, 0, c13, 2) +#define PMEVTYPER11 __ACCESS_CP15(c14, 0, c13, 3) +#define PMEVTYPER12 __ACCESS_CP15(c14, 0, c13, 4) +#define PMEVTYPER13 __ACCESS_CP15(c14, 0, c13, 5) +#define PMEVTYPER14 __ACCESS_CP15(c14, 0, c13, 6) +#define PMEVTYPER15 __ACCESS_CP15(c14, 0, c13, 7) +#define PMEVTYPER16 __ACCESS_CP15(c14, 0, c14, 0) +#define PMEVTYPER17 __ACCESS_CP15(c14, 0, c14, 1) +#define PMEVTYPER18 __ACCESS_CP15(c14, 0, c14, 2) +#define PMEVTYPER19 __ACCESS_CP15(c14, 0, c14, 3) +#define PMEVTYPER20 __ACCESS_CP15(c14, 0, c14, 4) +#define PMEVTYPER21 __ACCESS_CP15(c14, 0, c14, 5) +#define PMEVTYPER22 __ACCESS_CP15(c14, 0, c14, 6) +#define PMEVTYPER23 __ACCESS_CP15(c14, 0, c14, 7) +#define PMEVTYPER24 __ACCESS_CP15(c14, 0, c15, 0) +#define PMEVTYPER25 __ACCESS_CP15(c14, 0, c15, 1) +#define PMEVTYPER26 __ACCESS_CP15(c14, 0, c15, 2) +#define PMEVTYPER27 __ACCESS_CP15(c14, 0, c15, 3) +#define PMEVTYPER28 __ACCESS_CP15(c14, 0, c15, 4) +#define PMEVTYPER29 __ACCESS_CP15(c14, 0, c15, 5) +#define PMEVTYPER30 __ACCESS_CP15(c14, 0, c15, 6) + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(PMEVCNTR##n) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, PMEVCNTR##n) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, PMEVTYPER##n) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); +} + +static inline unsigned long read_pmmir(void) +{ + return read_sysreg(PMMIR); +} + +static inline u32 read_pmuver(void) +{ + /* PMUVers is not a signed field */ + u32 dfr0 = read_cpuid_ext(CPUID_EXT_DFR0); + + return (dfr0 >> 24) & 0xf; +} + +static inline void write_pmcr(u32 val) +{ + write_sysreg(val, PMCR); +} + +static inline u32 read_pmcr(void) +{ + return read_sysreg(PMCR); +} + +static inline void write_pmselr(u32 val) +{ + write_sysreg(val, PMSELR); +} + +static inline void write_pmccntr(u64 val) +{ + write_sysreg(val, PMCCNTR); +} + +static inline u64 read_pmccntr(void) +{ + return read_sysreg(PMCCNTR); +} + +static inline void write_pmxevcntr(u32 val) +{ + write_sysreg(val, PMXEVCNTR); +} + +static inline u32 read_pmxevcntr(void) +{ + return read_sysreg(PMXEVCNTR); +} + +static inline void write_pmxevtyper(u32 val) +{ + write_sysreg(val, PMXEVTYPER); +} + +static inline void write_pmcntenset(u32 val) +{ + write_sysreg(val, PMCNTENSET); +} + +static inline void write_pmcntenclr(u32 val) +{ + write_sysreg(val, PMCNTENCLR); +} + +static inline void write_pmintenset(u32 val) +{ + write_sysreg(val, PMINTENSET); +} + +static inline void write_pmintenclr(u32 val) +{ + write_sysreg(val, PMINTENCLR); +} + +static inline void write_pmccfiltr(u32 val) +{ + write_sysreg(val, PMCCFILTR); +} + +static inline void write_pmovsclr(u32 val) +{ + write_sysreg(val, PMOVSR); +} + +static inline u32 read_pmovsclr(void) +{ + return read_sysreg(PMOVSR); +} + +static inline void write_pmuserenr(u32 val) +{ + write_sysreg(val, PMUSERENR); +} + +static inline u32 read_pmceid0(void) +{ + return read_sysreg(PMCEID0); +} + +static inline u32 read_pmceid1(void) +{ + return read_sysreg(PMCEID1); +} + +static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} +static inline void kvm_clr_pmu_events(u32 clr) {} +static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) +{ + return false; +} + +/* PMU Version in DFR Register */ +#define ARMV8_PMU_DFR_VER_NI 0 +#define ARMV8_PMU_DFR_VER_V3P4 0x5 +#define ARMV8_PMU_DFR_VER_V3P5 0x6 +#define ARMV8_PMU_DFR_VER_IMP_DEF 0xF + +static inline bool pmuv3_implemented(int pmuver) +{ + return !(pmuver == ARMV8_PMU_DFR_VER_IMP_DEF || + pmuver == ARMV8_PMU_DFR_VER_NI); +} + +static inline bool is_pmuv3p4(int pmuver) +{ + return pmuver >= ARMV8_PMU_DFR_VER_V3P4; +} + +static inline bool is_pmuv3p5(int pmuver) +{ + return pmuver >= ARMV8_PMU_DFR_VER_V3P5; +} + +#endif diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f0f90d446021..0b461bc3d06c 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -351,10 +351,13 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { * We unconditionally enable ARMv8.5-PMU long event counter support * (64-bit events) where supported. Indicate if this arm_pmu has long * event counter support. + * + * On AArch32, long counters make no sense (you can't access the top + * bits), so we only enable this on AArch64. */ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu) { - return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5); + return (IS_ENABLED(CONFIG_ARM64) && (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5)); } /* diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index ac01b8887189..6d2f2cc86379 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -69,6 +69,16 @@ config ARM_SMMU_V3_PMU through the SMMU and allow the resulting information to be filtered based on the Stream ID of the corresponding master. +config ARM_PMUV3 + depends on HW_PERF_EVENTS && ((ARM && CPU_V7) || ARM64) + bool "ARM PMUv3 support" if !ARM64 + default ARM64 + help + Say y if you want to use the ARM performance monitor unit (PMU) + version 3. The PMUv3 is the CPU performance monitors on ARMv8 + (aarch32 and aarch64) systems that implement the PMUv3 + architecture. + config ARM_DSU_PMU tristate "ARM DynamIQ Shared Unit (DSU) PMU" depends on ARM64 -- Gitee From ac7c08dac704868e1bc9e0440e31e4b51857ef1d Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:48 +0800 Subject: [PATCH 059/173] perf mem: Search event name with more flexible path [Upstream commit f9f16dfbe76e63ba9aec68055c08242b09be297e] The perf tool searches a memory event name under the folder '/sys/devices/cpu/events/', this leads to the limitation for the selection of a memory profiling event which must be under this folder. Thus it's impossible to use any other event as memory event which is not under this specific folder, e.g. Arm SPE hardware event is not located in '/sys/devices/cpu/events/' so it cannot be enabled for memory profiling. This patch changes to search folder from '/sys/devices/cpu/events/' to '/sys/devices', so it give flexibility to find events which can be used for memory profiling. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/mem-events.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 3d391583f2ae..a40d783aac8c 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -18,8 +18,8 @@ unsigned int perf_mem_events__loads_ldlat = 30; #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { - E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "mem-loads"), - E("ldlat-stores", "cpu/mem-stores/P", "mem-stores"), + E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), + E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), }; #undef E @@ -93,7 +93,7 @@ int perf_mem_events__init(void) struct perf_mem_event *e = &perf_mem_events[j]; struct stat st; - scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s", + scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, e->sysfs_name); if (!stat(path, &st)) -- Gitee From cff1dc8fb3acc809092840f687591e7c4f84802c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 7 May 2020 15:06:04 -0700 Subject: [PATCH 060/173] perf c2c: Fix 'perf c2c record -e list' to show the default events used [Upstream commit b027cc6fdf1bb41f4572e99e96ff95bbf8cb5783] When the event is passed as list, the default events should be listed as per 'perf mem record -e list'. Previous behavior is: $ perf c2c record -e list failed: event 'list' not found, use '-e list' to get list of available events Usage: perf c2c record [] [] or: perf c2c record [] -- [] -e, --event event selector. Use 'perf mem record -e list' to list available events $ New behavior: $ perf c2c record -e list ldlat-loads : available ldlat-stores : available v3: is a rebase. v2: addresses review comments by Jiri Olsa. https://lore.kernel.org/lkml/20191127081844.GH32367@krava/ Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200507220604.3391-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/Documentation/perf-c2c.txt | 2 +- tools/perf/builtin-c2c.c | 9 ++++++++- tools/perf/builtin-mem.c | 24 +++++++----------------- tools/perf/util/mem-events.c | 15 +++++++++++++++ tools/perf/util/mem-events.h | 2 ++ 5 files changed, 33 insertions(+), 19 deletions(-) diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index e6150f21267d..6ab03e04c925 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -40,7 +40,7 @@ RECORD OPTIONS -------------- -e:: --event=:: - Select the PMU event. Use 'perf mem record -e list' + Select the PMU event. Use 'perf c2c record -e list' to list available events. -v:: diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index cd25ba49195c..3a2d1ac55544 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2878,8 +2878,15 @@ static int parse_record_events(const struct option *opt, { bool *event_set = (bool *) opt->value; + if (!strcmp(str, "list")) { + perf_mem_events__list(); + exit(0); + } + if (perf_mem_events__parse(str)) + exit(-1); + *event_set = true; - return perf_mem_events__parse(str); + return 0; } diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index a0d93d7e0f63..7b8c7eb65875 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -38,26 +38,16 @@ static int parse_record_events(const struct option *opt, const char *str, int unset __maybe_unused) { struct perf_mem *mem = *(struct perf_mem **)opt->value; - int j; - if (strcmp(str, "list")) { - if (!perf_mem_events__parse(str)) { - mem->operation = 0; - return 0; - } - exit(-1); + if (!strcmp(str, "list")) { + perf_mem_events__list(); + exit(0); } + if (perf_mem_events__parse(str)) + exit(-1); - for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = &perf_mem_events[j]; - - fprintf(stderr, "%-13s%-*s%s\n", - e->tag, - verbose > 0 ? 25 : 0, - verbose > 0 ? perf_mem_events__name(j) : "", - e->supported ? ": available" : ""); - } - exit(0); + mem->operation = 0; + return 0; } static const char * const __usage[] = { diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index a40d783aac8c..2c6e7b5c327a 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -103,6 +103,21 @@ int perf_mem_events__init(void) return found ? 0 : -ENOENT; } +void perf_mem_events__list(void) +{ + int j; + + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + struct perf_mem_event *e = &perf_mem_events[j]; + + fprintf(stderr, "%-13s%-*s%s\n", + e->tag, + verbose > 0 ? 25 : 0, + verbose > 0 ? perf_mem_events__name(j) : "", + e->supported ? ": available" : ""); + } +} + static const char * const tlb_access[] = { "N/A", "HIT", diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index f1389bdae7bf..904dad34f7f7 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -39,6 +39,8 @@ int perf_mem_events__init(void); char *perf_mem_events__name(int i); +void perf_mem_events__list(void); + struct mem_info; int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -- Gitee From 0ed59938cb7d482acbec7b52972ccc2b70d1607c Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:49 +0800 Subject: [PATCH 061/173] perf mem: Introduce weak function perf_mem_events__ptr() [Upstream commit eaf6aaeec5fa301c0eb8ae92962909b15d075e5f] Different architectures might use different event or different event parameters for memory profiling, this patch introduces a weak perf_mem_events__ptr() function which allows to return back a architecture specific memory event. Since the variable 'perf_mem_events' can be only accessed by the perf_mem_events__ptr() function, mark the variable as 'static', this allows the architectures to define its own memory event array. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/builtin-c2c.c | 18 ++++++++++++------ tools/perf/builtin-mem.c | 21 ++++++++++++++------- tools/perf/util/mem-events.c | 26 +++++++++++++++++++------- tools/perf/util/mem-events.h | 2 +- 4 files changed, 46 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 3a2d1ac55544..c34916f4ac1a 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2905,6 +2905,7 @@ static int perf_c2c__record(int argc, const char **argv) int ret; bool all_user = false, all_kernel = false; bool event_set = false; + struct perf_mem_event *e; struct option options[] = { OPT_CALLBACK('e', "event", &event_set, "event", "event selector. Use 'perf mem record -e list' to list available events", @@ -2932,11 +2933,15 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "record"; if (!event_set) { - perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; - perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; + + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; } - if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + if (e->record) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; @@ -2944,12 +2949,13 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "--sample-cpu"; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - if (!perf_mem_events[j].record) + e = perf_mem_events__ptr(j); + if (!e->record) continue; - if (!perf_mem_events[j].supported) { + if (!e->supported) { pr_err("failed: event '%s' not supported\n", - perf_mem_events[j].name); + perf_mem_events__name(j)); free(rec_argv); return -1; } diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 7b8c7eb65875..82bffc7e3abb 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -64,6 +64,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) const char **rec_argv; int ret; bool all_user = false, all_kernel = false; + struct perf_mem_event *e; struct option options[] = { OPT_CALLBACK('e', "event", &mem, "event", "event selector. use 'perf mem record -e list' to list available events", @@ -86,13 +87,18 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "record"; - if (mem->operation & MEM_OPERATION_LOAD) - perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + if (mem->operation & MEM_OPERATION_LOAD) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; + } - if (mem->operation & MEM_OPERATION_STORE) - perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + if (mem->operation & MEM_OPERATION_STORE) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; + } - if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + if (e->record) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; @@ -101,10 +107,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "--phys-data"; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - if (!perf_mem_events[j].record) + e = perf_mem_events__ptr(j); + if (!e->record) continue; - if (!perf_mem_events[j].supported) { + if (!e->supported) { pr_err("failed: event '%s' not supported\n", perf_mem_events__name(j)); free(rec_argv); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 2c6e7b5c327a..ffe605711425 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -17,7 +17,7 @@ unsigned int perf_mem_events__loads_ldlat = 30; #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } -struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { +static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), }; @@ -28,19 +28,31 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { static char mem_loads_name[100]; static bool mem_loads_name__init; +struct perf_mem_event * __weak perf_mem_events__ptr(int i) +{ + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + return &perf_mem_events[i]; +} + char * __weak perf_mem_events__name(int i) { + struct perf_mem_event *e = perf_mem_events__ptr(i); + + if (!e) + return NULL; + if (i == PERF_MEM_EVENTS__LOAD) { if (!mem_loads_name__init) { mem_loads_name__init = true; scnprintf(mem_loads_name, sizeof(mem_loads_name), - perf_mem_events[i].name, - perf_mem_events__loads_ldlat); + e->name, perf_mem_events__loads_ldlat); } return mem_loads_name; } - return (char *)perf_mem_events[i].name; + return (char *)e->name; } int perf_mem_events__parse(const char *str) @@ -61,7 +73,7 @@ int perf_mem_events__parse(const char *str) while (tok) { for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = &perf_mem_events[j]; + struct perf_mem_event *e = perf_mem_events__ptr(j); if (strstr(e->tag, tok)) e->record = found = true; @@ -90,7 +102,7 @@ int perf_mem_events__init(void) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { char path[PATH_MAX]; - struct perf_mem_event *e = &perf_mem_events[j]; + struct perf_mem_event *e = perf_mem_events__ptr(j); struct stat st; scnprintf(path, PATH_MAX, "%s/devices/%s", @@ -108,7 +120,7 @@ void perf_mem_events__list(void) int j; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = &perf_mem_events[j]; + struct perf_mem_event *e = perf_mem_events__ptr(j); fprintf(stderr, "%-13s%-*s%s\n", e->tag, diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 904dad34f7f7..726a9c8103e4 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -31,13 +31,13 @@ enum { PERF_MEM_EVENTS__MAX, }; -extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; extern unsigned int perf_mem_events__loads_ldlat; int perf_mem_events__parse(const char *str); int perf_mem_events__init(void); char *perf_mem_events__name(int i); +struct perf_mem_event *perf_mem_events__ptr(int i); void perf_mem_events__list(void); -- Gitee From a4bbc859105f31f81e4bddc813c0bb981c3055c8 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:50 +0800 Subject: [PATCH 062/173] perf mem: Support new memory event PERF_MEM_EVENTS__LOAD_STORE [Upstream commit 4ba2452cd88f39da68a6dc05fcc95e8977fd6403] On the architectures with perf memory profiling, two types of hardware events have been supported: load and store; if want to profile memory for both load and store operations, the tool will use these two events at the same time, the usage is: # perf mem record -t load,store -- uname But this cannot be applied for AUX tracing event, the same PMU event can be used to only trace memory load, or only memory store, or trace for both memory load and store. This patch introduces a new event PERF_MEM_EVENTS__LOAD_STORE, which is used to support the event which can record both memory load and store operations. When user specifies memory operation type as 'load,store', or doesn't set type so use 'load,store' as default, if the arch supports the event PERF_MEM_EVENTS__LOAD_STORE, the tool will convert the required operations to this single event; otherwise, if the arch doesn't support PERF_MEM_EVENTS__LOAD_STORE, the tool rolls back to enable both events PERF_MEM_EVENTS__LOAD and PERF_MEM_EVENTS__STORE, which keeps the same behaviour with before. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/builtin-mem.c | 24 ++++++++++++++++++------ tools/perf/util/mem-events.c | 13 ++++++++++++- tools/perf/util/mem-events.h | 1 + 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 82bffc7e3abb..5ad086223eb5 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -87,14 +87,26 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "record"; - if (mem->operation & MEM_OPERATION_LOAD) { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); - e->record = true; - } + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE); - if (mem->operation & MEM_OPERATION_STORE) { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + /* + * The load and store operations are required, use the event + * PERF_MEM_EVENTS__LOAD_STORE if it is supported. + */ + if (e->tag && + (mem->operation & MEM_OPERATION_LOAD) && + (mem->operation & MEM_OPERATION_STORE)) { e->record = true; + } else { + if (mem->operation & MEM_OPERATION_LOAD) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; + } + + if (mem->operation & MEM_OPERATION_STORE) { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; + } } e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ffe605711425..bf9084b325ac 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -20,6 +20,7 @@ unsigned int perf_mem_events__loads_ldlat = 30; static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), + E(NULL, NULL, NULL), }; #undef E @@ -75,6 +76,9 @@ int perf_mem_events__parse(const char *str) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = perf_mem_events__ptr(j); + if (!e->tag) + continue; + if (strstr(e->tag, tok)) e->record = found = true; } @@ -105,6 +109,13 @@ int perf_mem_events__init(void) struct perf_mem_event *e = perf_mem_events__ptr(j); struct stat st; + /* + * If the event entry isn't valid, skip initialization + * and "e->supported" will keep false. + */ + if (!e->tag) + continue; + scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, e->sysfs_name); @@ -123,7 +134,7 @@ void perf_mem_events__list(void) struct perf_mem_event *e = perf_mem_events__ptr(j); fprintf(stderr, "%-13s%-*s%s\n", - e->tag, + e->tag ?: "", verbose > 0 ? 25 : 0, verbose > 0 ? perf_mem_events__name(j) : "", e->supported ? ": available" : ""); diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 726a9c8103e4..5ef178278909 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -28,6 +28,7 @@ struct mem_info { enum { PERF_MEM_EVENTS__LOAD, PERF_MEM_EVENTS__STORE, + PERF_MEM_EVENTS__LOAD_STORE, PERF_MEM_EVENTS__MAX, }; -- Gitee From a9f5547762c906c6a5cafb86bee21f3e92a1577a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:51 +0800 Subject: [PATCH 063/173] perf c2c: Support memory event PERF_MEM_EVENTS__LOAD_STORE [Upstream commit 8b8173b45a7a9709cc2597548469708a8efbd0d9] When user doesn't specify event name, perf c2c tool enables both the load and store events, and this leads to failure for opening the duplicate PMU device for AUX trace. After the memory event PERF_MEM_EVENTS__LOAD_STORE is introduced, when the user doesn't specify event name, this patch converts the required operation to PERF_MEM_EVENTS__LOAD_STORE if the arch supports it. Otherwise, the tool still rolls back to enable events PERF_MEM_EVENTS__LOAD and PERF_MEM_EVENTS__STORE. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/builtin-c2c.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c34916f4ac1a..ef3a91167b1a 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2933,11 +2933,20 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "record"; if (!event_set) { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); - e->record = true; + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE); + /* + * The load and store operations are required, use the event + * PERF_MEM_EVENTS__LOAD_STORE if it is supported. + */ + if (e->tag) { + e->record = true; + } else { + e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e->record = true; - e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); - e->record = true; + e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e->record = true; + } } e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); -- Gitee From a2d4860195c3ee29f684d9be75a4635ac14783d1 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:52 +0800 Subject: [PATCH 064/173] perf mem: Only initialize memory event for recording [Upstream commit 436cce00710a3f234ab6b735b5980256e773d388] It's needless to initialize memory events for reporting, this patch moves memory event initialization for only recording. Furthermore, the change allows to parse perf data on cross platforms, e.g. perf tool can report result properly even the machine doesn't support the memory events. Signed-off-by: Leo Yan Acked-by: Ian Rogers Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/builtin-mem.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 5ad086223eb5..a950c30d8565 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -77,6 +77,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) OPT_END() }; + if (perf_mem_events__init()) { + pr_err("failed: memory events not supported\n"); + return -1; + } + argc = parse_options(argc, argv, options, record_mem_usage, PARSE_OPT_KEEP_UNKNOWN); @@ -440,11 +445,6 @@ int cmd_mem(int argc, const char **argv) NULL }; - if (perf_mem_events__init()) { - pr_err("failed: memory events not supported\n"); - return -1; - } - argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_KEEP_UNKNOWN); -- Gitee From 6485ad7a2f4fa19ae66645d13c3f2422a35e12e0 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:53 +0800 Subject: [PATCH 065/173] perf auxtrace: Add itrace option '-M' for memory events [Upstream commit 014a771c7867fda5b40a95e1c7bc1aa5ac704c91] This patch is to add itrace option '-M' to synthesize memory event. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-7-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/Documentation/itrace.txt | 1 + tools/perf/util/auxtrace.c | 4 ++++ tools/perf/util/auxtrace.h | 2 ++ 3 files changed, 7 insertions(+) diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 467659bfdeec..37bbb67001a9 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -11,6 +11,7 @@ d create a debug log f synthesize first level cache events m synthesize last level cache events + M synthesize memory events t synthesize TLB events a synthesize remote access events g synthesize a call chain (use with i or x) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 185136bae6f6..036b9750f05d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1182,6 +1182,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, synth_opts->flc = true; synth_opts->llc = true; synth_opts->tlb = true; + synth_opts->mem = true; synth_opts->remote_access = true; if (no_sample) { @@ -1348,6 +1349,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'a': synth_opts->remote_access = true; break; + case 'M': + synth_opts->mem = true; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 15d24882ce40..28f0ae891558 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -82,6 +82,7 @@ enum itrace_period_type { * @llc: whether to synthesize last level cache events * @tlb: whether to synthesize TLB events * @remote_access: whether to synthesize remote access events + * @mem: whether to synthesize memory events * @vm_time_correlation: perform VM Time Correlation * @vm_tm_corr_dry_run: VM Time Correlation dry-run * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments @@ -116,6 +117,7 @@ struct itrace_synth_opts { bool llc; bool tlb; bool remote_access; + bool mem; bool vm_time_correlation; bool vm_tm_corr_dry_run; char *vm_tm_corr_args; -- Gitee From 6430936cb6906bb58aff7b00e3fabc8c7626e807 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:54 +0800 Subject: [PATCH 066/173] perf mem: Support AUX trace [Upstream commit 13e5df1e3f1ba1a90944362bc57690ea1369b3b7] The 'perf mem' tool doesn't support AUX trace data so it cannot receive the hardware tracing data. On arm64, although it doesn't support PMU events for memory load and store, ARM SPE is a good candidate for memory profiling, the hardware tracer can record memory accessing operations with affiliated information (e.g. physical address and virtual address for accessing, cache levels, TLB walking, latency, etc). To allow "perf mem" tool to support AUX trace, this patch adds the AUX callbacks for session structure; make itrace memory event as default for "perf mem", this tells the AUX trace decoder to synthesize memory samples. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-8-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/builtin-mem.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index a950c30d8565..ca7879ddc897 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -7,6 +7,7 @@ #include "perf.h" #include +#include "util/auxtrace.h" #include "util/trace-event.h" #include "util/tool.h" #include "util/session.h" @@ -255,6 +256,12 @@ static int process_sample_event(struct perf_tool *tool, static int report_raw_events(struct perf_mem *mem) { + struct itrace_synth_opts itrace_synth_opts = { + .set = true, + .mem = true, /* Only enable memory event */ + .default_no_sample = true, + }; + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, @@ -266,6 +273,8 @@ static int report_raw_events(struct perf_mem *mem) if (IS_ERR(session)) return PTR_ERR(session); + session->itrace_synth_opts = &itrace_synth_opts; + if (mem->cpu_list) { ret = perf_session__cpu_bitmap(session, mem->cpu_list, mem->cpu_bitmap); @@ -409,8 +418,12 @@ int cmd_mem(int argc, const char **argv) .comm = perf_event__process_comm, .lost = perf_event__process_lost, .fork = perf_event__process_fork, + .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, .namespaces = perf_event__process_namespaces, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, + .auxtrace_error = perf_event__process_auxtrace_error, .ordered_events = true, }, .input_name = "perf.data", -- Gitee From 81429b97b0e875ea16847e4abc87dd91171d5211 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:55 +0800 Subject: [PATCH 067/173] perf c2c: Support AUX trace [Upstream commit c825f7885178f994a2a00ca02016940d94aaed6e] This patch adds the AUX callbacks in session structure, so support AUX trace for "perf c2c" tool; make itrace memory event as default for "perf c2c", this tells the AUX trace decoder to synthesize samples and can be used for statistics. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-9-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/builtin-c2c.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index ef3a91167b1a..f5af579378dc 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -365,6 +365,10 @@ static struct perf_c2c c2c = { .exit = perf_event__process_exit, .fork = perf_event__process_fork, .lost = perf_event__process_lost, + .attr = perf_event__process_attr, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, + .auxtrace_error = perf_event__process_auxtrace_error, .ordered_events = true, .ordering_requires_timestamps = true, }, @@ -2717,6 +2721,12 @@ static int setup_coalesce(const char *coalesce, bool no_source) static int perf_c2c__report(int argc, const char **argv) { + struct itrace_synth_opts itrace_synth_opts = { + .set = true, + .mem = true, /* Only enable memory event */ + .default_no_sample = true, + }; + struct perf_session *session; struct ui_progress prog; struct perf_data data = { @@ -2796,6 +2806,8 @@ static int perf_c2c__report(int argc, const char **argv) goto out; } + session->itrace_synth_opts = &itrace_synth_opts; + err = setup_nodes(session); if (err) { pr_err("Failed setup nodes\n"); -- Gitee From dc15ec28ee43ba99efa35b9a2efbd44ced841b28 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:56 +0800 Subject: [PATCH 068/173] perf mem: Support ARM SPE events [Upstream commit 40714c58630aaaf1eb3acc431fe206a6b36a03d6] This patch adds ARM SPE events for perf memory profiling: 'spe-load': event for only recording memory load ops; 'spe-store': event for only recording memory store ops; 'spe-ldst': event for recording memory load and store ops. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201106094853.21082-10-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/arm64/util/Build | 2 +- tools/perf/arch/arm64/util/mem-events.c | 37 +++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/arm64/util/mem-events.c diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 264ac3c74408..b4a627a37d11 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -8,4 +8,4 @@ perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ ../../arm/util/cs-etm.o \ - arm-spe.o hisi-ptt.o + arm-spe.o mem-events.o hisi-ptt.o diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c new file mode 100644 index 000000000000..2a2497372671 --- /dev/null +++ b/tools/perf/arch/arm64/util/mem-events.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "map_symbol.h" +#include "mem-events.h" + +#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } + +static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { + E("spe-load", "arm_spe_0/ts_enable=1,load_filter=1,store_filter=0,min_latency=%u/", "arm_spe_0"), + E("spe-store", "arm_spe_0/ts_enable=1,load_filter=0,store_filter=1/", "arm_spe_0"), + E("spe-ldst", "arm_spe_0/ts_enable=1,load_filter=1,store_filter=1,min_latency=%u/", "arm_spe_0"), +}; + +static char mem_ev_name[100]; + +struct perf_mem_event *perf_mem_events__ptr(int i) +{ + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + return &perf_mem_events[i]; +} + +char *perf_mem_events__name(int i) +{ + struct perf_mem_event *e = perf_mem_events__ptr(i); + + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) + scnprintf(mem_ev_name, sizeof(mem_ev_name), + e->name, perf_mem_events__loads_ldlat); + else /* PERF_MEM_EVENTS__STORE */ + scnprintf(mem_ev_name, sizeof(mem_ev_name), e->name); + + return mem_ev_name; +} -- Gitee From 039c058b6e16516fc81c1d07e6d517419f8edd13 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 1 Apr 2020 13:16:01 +0300 Subject: [PATCH 069/173] perf arm-spe: Implement ->evsel_is_auxtrace() callback [Upstream commit 508c71e3f90e4ceee7516d691355a36660a3e5bf] Implement ->evsel_is_auxtrace() callback. Signed-off-by: Adrian Hunter Reviewed-by: Leo Yan Cc: Andi Kleen Cc: Jiri Olsa Cc: Kim Phillips Link: http://lore.kernel.org/lkml/20200401101613.6201-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 71308e4830a3..8901a1656a41 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -712,6 +712,14 @@ static void arm_spe_free(struct perf_session *session) free(spe); } +static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); + + return evsel->core.attr.type == spe->pmu_type; +} + static const char * const arm_spe_info_fmts[] = { [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", }; @@ -933,6 +941,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->auxtrace.flush_events = arm_spe_flush; spe->auxtrace.free_events = arm_spe_free_events; spe->auxtrace.free = arm_spe_free; + spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; session->auxtrace = &spe->auxtrace; arm_spe_print_info(&auxtrace_info->priv[0]); -- Gitee From bffbcf86b619871823319017817a2c06a67b2b60 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:57 +0800 Subject: [PATCH 070/173] perf arm-spe: Include bitops.h for BIT() macro [Upstream commit c185f1cde46653cd0a7a1eaf461d16c462870781] Include header linux/bitops.h, directly use its BIT() macro and remove the self defined macros. Committer notes: Use BIT_ULL() instead of BIT to build on 32-bit arches as mentioned in review by Andre Przywara . I noticed the build failure when crossbuilding to arm32 from x86_64. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index f3ac9d40cebf..05a59f2b7e87 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -13,6 +13,11 @@ #include "arm-spe-pkt-decoder.h" + +#define NS_FLAG BIT_ULL(63) +#define EL_FLAG (BIT_ULL(62) | BIT_ULL(61)) + + #if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 -- Gitee From 6ba260554de835a54496b32801a59ee2ff73e40e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:31:59 +0800 Subject: [PATCH 071/173] perf arm-spe: Refactor payload size calculation [Upstream commit b2ded2e2e2764e502fc025f615210434f1eaa2a9] This patch defines macro to extract "sz" field from header, and renames the function payloadlen() to arm_spe_payload_len(). Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 05a59f2b7e87..f12c1d0df5c8 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -66,7 +66,7 @@ static int arm_spe_get_payload(const unsigned char *buf, size_t len, unsigned char ext_hdr, struct arm_spe_pkt *packet) { - size_t payload_len = arm_spe_payload_len(buf[ext_hdr]); + size_t payload_len = arm_spe_payload_len(buf[0]); if (len < 1 + ext_hdr + payload_len) return ARM_SPE_NEED_MORE_BYTES; -- Gitee From 489f9ccdbdb629a3b5e10bb06959751caf7c45f6 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:00 +0800 Subject: [PATCH 072/173] perf arm-spe: Refactor arm_spe_get_events() [Upstream commit b65577baf482909225c79d8a6bad44d2a62751f4] In function arm_spe_get_events(), the event packet's 'index' is assigned as payload length, but the flow is not directive: it firstly gets the packet length from the return value of arm_spe_get_payload(), the value includes header length (1) and payload length: int ret = arm_spe_get_payload(buf, len, packet); and then reduces header length from packet length, so finally get the payload length: packet->index = ret - 1; To simplify the code, this patch directly assigns payload length to event packet's index; and at the end it calls arm_spe_get_payload() to return the payload value. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index f12c1d0df5c8..9fb847e8e7cc 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -127,7 +127,7 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len, */ packet->index = arm_spe_payload_len(buf[0]); - return arm_spe_get_payload(buf, len, 0, packet); + return arm_spe_get_payload(buf, len, packet); } static int arm_spe_get_data_source(const unsigned char *buf, size_t len, -- Gitee From 4fd3bdc440695cab6f2533dd2d9ff1b9f46b4385 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:01 +0800 Subject: [PATCH 073/173] perf arm-spe: Fix packet length handling [Upstream commit 0a04244cabc5560ce1e08555e8712a4cd20ab6ce] When processing address packet and counter packet, if the packet contains extended header, it misses to account the extra one byte for header length calculation, thus returns the wrong packet length. To correct the packet length calculation, one possible fixing is simply to plus extra 1 for extended header, but will spread some duplicate code in the flows for processing address packet and counter packet. Alternatively, we can refine the function arm_spe_get_payload() to not only support short header and allow it to support extended header, and rely on it for the packet length calculation. So this patch refactors function arm_spe_get_payload() with a new argument 'ext_hdr' for support extended header; the packet processing flows can invoke this function to unify the packet length calculation. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20201111071149.815-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 9fb847e8e7cc..57de0e024251 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -66,7 +66,7 @@ static int arm_spe_get_payload(const unsigned char *buf, size_t len, unsigned char ext_hdr, struct arm_spe_pkt *packet) { - size_t payload_len = arm_spe_payload_len(buf[0]); + size_t payload_len = arm_spe_payload_len(buf[ext_hdr]); if (len < 1 + ext_hdr + payload_len) return ARM_SPE_NEED_MORE_BYTES; @@ -127,7 +127,7 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len, */ packet->index = arm_spe_payload_len(buf[0]); - return arm_spe_get_payload(buf, len, packet); + return arm_spe_get_payload(buf, len, 0, packet); } static int arm_spe_get_data_source(const unsigned char *buf, size_t len, @@ -141,7 +141,7 @@ static int arm_spe_get_context(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_CONTEXT; - packet->index = SPE_CTX_PKT_HDR_INDEX(buf[0]); + packet->index = buf[0] & 0x3; return arm_spe_get_payload(buf, len, 0, packet); } @@ -149,7 +149,7 @@ static int arm_spe_get_op_type(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_OP_TYPE; - packet->index = SPE_OP_PKT_HDR_CLASS(buf[0]); + packet->index = buf[0] & 0x3; return arm_spe_get_payload(buf, len, 0, packet); } -- Gitee From db07fd816cc981a8a5b10bdcef0c1955c4c39e1f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:02 +0800 Subject: [PATCH 074/173] perf arm-spe: Refactor printing string to buffer [Upstream commit 75eeaddd57f4a0ac89110547221df8f3757d5a6f] When outputs strings to the decoding buffer with function snprintf(), SPE decoder needs to detects if any error returns from snprintf() and if so needs to directly bail out. If snprintf() returns success, it needs to update buffer pointer and reduce the buffer length so can continue to output the next string into the consequent memory space. This complex logics are spreading in the function arm_spe_pkt_desc() so there has many duplicate codes for handling error detecting, increment buffer pointer and decrement buffer size. To avoid the duplicate code, this patch introduces a new helper function arm_spe_pkt_out_string() which is used to wrap up the complex logics, and it's used by the caller arm_spe_pkt_desc(). This patch moves the variable 'blen' as the function's local variable so allows to remove the unnecessary braces and improve the readability. This patch simplifies the return value for arm_spe_pkt_desc(): '0' means success and other values mean an error has occurred. To realize this, it relies on arm_spe_pkt_out_string()'s parameter 'err', the 'err' is a cumulative value, returns its final value if printing buffer is called for one time or multiple times. Finally, the error is handled in a central place, rather than directly bailing out in switch-cases, it returns error at the end of arm_spe_pkt_desc(). This patch changes the caller arm_spe_dump() to respect the updated return value semantics of arm_spe_pkt_desc(). Suggested-by: Dave Martin Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Reviewed-by: Dave Martin Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 299 ++++++------------ 1 file changed, 101 insertions(+), 198 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 57de0e024251..8e5fdac7a06e 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -292,203 +292,10 @@ static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen, return ret; } -static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, - char *buf, size_t buf_len) -{ - u64 payload = packet->payload; - int err = 0; - - arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV"); - - if (payload & BIT(EV_EXCEPTION_GEN)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN"); - if (payload & BIT(EV_RETIRED)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED"); - if (payload & BIT(EV_L1D_ACCESS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS"); - if (payload & BIT(EV_L1D_REFILL)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL"); - if (payload & BIT(EV_TLB_ACCESS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS"); - if (payload & BIT(EV_TLB_WALK)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL"); - if (payload & BIT(EV_NOT_TAKEN)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN"); - if (payload & BIT(EV_MISPRED)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED"); - if (payload & BIT(EV_LLC_ACCESS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); - if (payload & BIT(EV_LLC_MISS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); - if (payload & BIT(EV_REMOTE_ACCESS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); - if (payload & BIT(EV_ALIGNMENT)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT"); - if (payload & BIT(EV_PARTIAL_PREDICATE)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED"); - if (payload & BIT(EV_EMPTY_PREDICATE)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED"); - - return err; -} - -static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, - char *buf, size_t buf_len) -{ - u64 payload = packet->payload; - int err = 0; - - switch (packet->index) { - case SPE_OP_PKT_HDR_CLASS_OTHER: - if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) { - arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER"); - - /* SVE effective vector length */ - arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", - SPE_OP_PKG_SVE_EVL(payload)); - - if (payload & SPE_OP_PKT_SVE_FP) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); - if (payload & SPE_OP_PKT_SVE_PRED) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); - } else { - arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER"); - arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s", - payload & SPE_OP_PKT_COND ? - "COND-SELECT" : "INSN-OTHER"); - } - break; - case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC: - arm_spe_pkt_out_string(&err, &buf, &buf_len, - payload & 0x1 ? "ST" : "LD"); - - if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) { - if (payload & SPE_OP_PKT_AT) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT"); - if (payload & SPE_OP_PKT_EXCL) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); - if (payload & SPE_OP_PKT_AR) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); - } - - switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) { - case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP: - arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_GP_REG: - arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG: - arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG: - arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG"); - break; - default: - break; - } - - if (SPE_OP_PKT_IS_LDST_SVE(payload)) { - /* SVE effective vector length */ - arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", - SPE_OP_PKG_SVE_EVL(payload)); - - if (payload & SPE_OP_PKT_SVE_PRED) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); - if (payload & SPE_OP_PKT_SVE_SG) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG"); - } - break; - case SPE_OP_PKT_HDR_CLASS_BR_ERET: - arm_spe_pkt_out_string(&err, &buf, &buf_len, "B"); - - if (payload & SPE_OP_PKT_COND) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND"); - - if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND"); - - break; - default: - /* Unknown index */ - err = -1; - break; - } - - return err; -} - -static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, - char *buf, size_t buf_len) -{ - int ns, el, idx = packet->index; - int ch, pat; - u64 payload = packet->payload; - int err = 0; - - switch (idx) { - case SPE_ADDR_PKT_HDR_INDEX_INS: - case SPE_ADDR_PKT_HDR_INDEX_BRANCH: - ns = !!SPE_ADDR_PKT_GET_NS(payload); - el = SPE_ADDR_PKT_GET_EL(payload); - payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); - arm_spe_pkt_out_string(&err, &buf, &buf_len, - "%s 0x%llx el%d ns=%d", - (idx == 1) ? "TGT" : "PC", payload, el, ns); - break; - case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT: - arm_spe_pkt_out_string(&err, &buf, &buf_len, - "VA 0x%llx", payload); - break; - case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS: - ns = !!SPE_ADDR_PKT_GET_NS(payload); - ch = !!SPE_ADDR_PKT_GET_CH(payload); - pat = SPE_ADDR_PKT_GET_PAT(payload); - payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); - arm_spe_pkt_out_string(&err, &buf, &buf_len, - "PA 0x%llx ns=%d ch=%d pat=%x", - payload, ns, ch, pat); - break; - default: - /* Unknown index */ - err = -1; - break; - } - - return err; -} - -static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet, - char *buf, size_t buf_len) -{ - u64 payload = packet->payload; - const char *name = arm_spe_pkt_name(packet->type); - int err = 0; - - arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s %d ", name, - (unsigned short)payload); - - switch (packet->index) { - case SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT: - arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT"); - break; - case SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT: - arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE"); - break; - case SPE_CNT_PKT_HDR_INDEX_TRANS_LAT: - arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT"); - break; - default: - break; - } - - return err; -} - int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { - int idx = packet->index; + int ns, el, idx = packet->index; unsigned long long payload = packet->payload; const char *name = arm_spe_pkt_name(packet->type); char *buf_orig = buf; @@ -502,24 +309,120 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name); break; case ARM_SPE_EVENTS: - err = arm_spe_pkt_desc_event(packet, buf, buf_len); + arm_spe_pkt_out_string(&err, &buf, &blen, "EV"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &blen, " EXCEPTION-GEN"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &blen, " RETIRED"); + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-ACCESS"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-REFILL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-ACCESS"); + if (payload & 0x20) + arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-REFILL"); + if (payload & 0x40) + arm_spe_pkt_out_string(&err, &buf, &blen, " NOT-TAKEN"); + if (payload & 0x80) + arm_spe_pkt_out_string(&err, &buf, &blen, " MISPRED"); + if (idx > 1) { + if (payload & 0x100) + arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-ACCESS"); + if (payload & 0x200) + arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-REFILL"); + if (payload & 0x400) + arm_spe_pkt_out_string(&err, &buf, &blen, " REMOTE-ACCESS"); + } break; case ARM_SPE_OP_TYPE: - err = arm_spe_pkt_desc_op_type(packet, buf, buf_len); + switch (idx) { + case 0: + arm_spe_pkt_out_string(&err, &buf, &blen, + payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &blen, + payload & 0x1 ? "ST" : "LD"); + + if (payload & 0x2) { + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &blen, " AT"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &blen, " EXCL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &blen, " AR"); + } else if (payload & 0x4) { + arm_spe_pkt_out_string(&err, &buf, &blen, " SIMD-FP"); + } + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &blen, "B"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &blen, " COND"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &blen, " IND"); + + break; + default: + /* Unknown index */ + err = -1; + break; + } break; case ARM_SPE_DATA_SOURCE: case ARM_SPE_TIMESTAMP: arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload); break; case ARM_SPE_ADDRESS: - err = arm_spe_pkt_desc_addr(packet, buf, buf_len); + switch (idx) { + case 0: + case 1: + ns = !!(packet->payload & NS_FLAG); + el = (packet->payload & EL_FLAG) >> 61; + payload &= ~(0xffULL << 56); + arm_spe_pkt_out_string(&err, &buf, &blen, + "%s 0x%llx el%d ns=%d", + (idx == 1) ? "TGT" : "PC", payload, el, ns); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &blen, + "VA 0x%llx", payload); + break; + case 3: + ns = !!(packet->payload & NS_FLAG); + payload &= ~(0xffULL << 56); + arm_spe_pkt_out_string(&err, &buf, &blen, + "PA 0x%llx ns=%d", payload, ns); + break; + default: + /* Unknown index */ + err = -1; + break; + } break; case ARM_SPE_CONTEXT: arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d", name, (unsigned long)payload, idx + 1); break; case ARM_SPE_COUNTER: - err = arm_spe_pkt_desc_counter(packet, buf, buf_len); + arm_spe_pkt_out_string(&err, &buf, &blen, "%s %d ", name, + (unsigned short)payload); + switch (idx) { + case 0: + arm_spe_pkt_out_string(&err, &buf, &blen, "TOT"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &blen, "ISSUE"); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &blen, "XLAT"); + break; + default: + break; + } break; default: /* Unknown packet type */ -- Gitee From 7cd253f21c700f64810e77d909b98863c3e0faf2 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:03 +0800 Subject: [PATCH 075/173] perf arm-spe: Refactor packet header parsing [Upstream commit 11695142e25e957dc3e56c29dc5f9daaf9530b10] The packet header parsing uses the hard coded values and it uses nested if-else statements. To improve the readability, this patch refactors the macros for packet header format so it removes the hard coded values. Furthermore, based on the new mask macros it reduces the nested if-else statements and changes to use the flat conditions checking, this is directive and can easily map to the descriptions in ARMv8-a architecture reference manual (ARM DDI 0487E.a), chapter 'D10.1.5 Statistical Profiling Extension protocol packet headers'. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 1 - tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 8e5fdac7a06e..4283ba2a6910 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -17,7 +17,6 @@ #define NS_FLAG BIT_ULL(63) #define EL_FLAG (BIT_ULL(62) | BIT_ULL(61)) - #if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 9b970e7bf1e2..b62c4e4a3f33 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -56,6 +56,11 @@ struct arm_spe_pkt { #define SPE_HEADER0_COUNTER 0x98 #define SPE_HEADER1_ALIGNMENT 0x0 +#define SPE_ADDR_PKT_HDR_INDEX_INS (0x0) +#define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1) +#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2) +#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3) + #define SPE_HDR_SHORT_INDEX(h) ((h) & GENMASK_ULL(2, 0)) #define SPE_HDR_EXTENDED_INDEX(h0, h1) (((h0) & GENMASK_ULL(1, 0)) << 3 | \ SPE_HDR_SHORT_INDEX(h1)) -- Gitee From 98ce9f026e34d010dfedf764b6aa01f6aa2bdac2 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:04 +0800 Subject: [PATCH 076/173] perf arm-spe: Add new function arm_spe_pkt_desc_addr() [Upstream commit ab2aa439e4aaa3ce0fdcfa0f847aed4bf13bf353] This patch moves out the address parsing code from arm_spe_pkt_desc() and uses the new introduced function arm_spe_pkt_desc_addr() to process address packet. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 64 +++++++++++-------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 4283ba2a6910..683c76629f0b 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -291,10 +291,46 @@ static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen, return ret; } +static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + int ns, el, idx = packet->index; + u64 payload = packet->payload; + int err = 0; + + switch (idx) { + case 0: + case 1: + ns = !!(packet->payload & NS_FLAG); + el = (packet->payload & EL_FLAG) >> 61; + payload &= ~(0xffULL << 56); + arm_spe_pkt_out_string(&err, &buf, &buf_len, + "%s 0x%llx el%d ns=%d", + (idx == 1) ? "TGT" : "PC", payload, el, ns); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &buf_len, + "VA 0x%llx", payload); + break; + case 3: + ns = !!(packet->payload & NS_FLAG); + payload &= ~(0xffULL << 56); + arm_spe_pkt_out_string(&err, &buf, &buf_len, + "PA 0x%llx ns=%d", payload, ns); + break; + default: + /* Unknown index */ + err = -1; + break; + } + + return err; +} + int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { - int ns, el, idx = packet->index; + int idx = packet->index; unsigned long long payload = packet->payload; const char *name = arm_spe_pkt_name(packet->type); char *buf_orig = buf; @@ -376,31 +412,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload); break; case ARM_SPE_ADDRESS: - switch (idx) { - case 0: - case 1: - ns = !!(packet->payload & NS_FLAG); - el = (packet->payload & EL_FLAG) >> 61; - payload &= ~(0xffULL << 56); - arm_spe_pkt_out_string(&err, &buf, &blen, - "%s 0x%llx el%d ns=%d", - (idx == 1) ? "TGT" : "PC", payload, el, ns); - break; - case 2: - arm_spe_pkt_out_string(&err, &buf, &blen, - "VA 0x%llx", payload); - break; - case 3: - ns = !!(packet->payload & NS_FLAG); - payload &= ~(0xffULL << 56); - arm_spe_pkt_out_string(&err, &buf, &blen, - "PA 0x%llx ns=%d", payload, ns); - break; - default: - /* Unknown index */ - err = -1; - break; - } + err = arm_spe_pkt_desc_addr(packet, buf, buf_len); break; case ARM_SPE_CONTEXT: arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d", -- Gitee From 1030ecfc62ce9dbe3ee2903fdb832590db54a83f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:05 +0800 Subject: [PATCH 077/173] perf arm-spe: Refactor address packet handling [Upstream commit 09935ca7b64cfa379b6ebf2b8cdb3126e09bffab] This patch is to refactor address packet handling, it defines macros for address packet's header and payload, these macros are used by decoder and the dump flow. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../util/arm-spe-decoder/arm-spe-decoder.c | 20 +----- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 22 +++--- .../arm-spe-decoder/arm-spe-pkt-decoder.h | 72 ------------------- 3 files changed, 12 insertions(+), 102 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 90d575cee1b9..a5d7509d5daa 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -24,7 +24,7 @@ static u64 arm_spe_calc_ip(int index, u64 payload) { - u64 ns, el, val; + u64 ns, el; /* Instruction virtual address or Branch target address */ if (index == SPE_ADDR_PKT_HDR_INDEX_INS || @@ -45,22 +45,8 @@ static u64 arm_spe_calc_ip(int index, u64 payload) /* Clean tags */ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); - /* - * Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.1 Address packet" - * defines the data virtual address payload format, the top byte - * (bits [63:56]) is assigned as top-byte tag; so we only can - * retrieve address value from bits [55:0]. - * - * According to Documentation/arm64/memory.rst, if detects the - * specific pattern in bits [55:52] of payload which falls in - * the kernel space, should fixup the top byte and this allows - * perf tool to parse DSO symbol for data address correctly. - * - * For this reason, if detects the bits [55:52] is 0xf, will - * fill 0xff into the top byte. - */ - val = SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload); - if ((val & 0xf0ULL) == 0xf0ULL) + /* Fill highest byte if bits [48..55] is 0xff */ + if (SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload) == 0xffULL) payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT; /* Data access physical address */ diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 683c76629f0b..44397d89c1c2 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -13,10 +13,6 @@ #include "arm-spe-pkt-decoder.h" - -#define NS_FLAG BIT_ULL(63) -#define EL_FLAG (BIT_ULL(62) | BIT_ULL(61)) - #if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 @@ -299,22 +295,22 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, int err = 0; switch (idx) { - case 0: - case 1: - ns = !!(packet->payload & NS_FLAG); - el = (packet->payload & EL_FLAG) >> 61; - payload &= ~(0xffULL << 56); + case SPE_ADDR_PKT_HDR_INDEX_INS: + case SPE_ADDR_PKT_HDR_INDEX_BRANCH: + ns = !!SPE_ADDR_PKT_GET_NS(payload); + el = SPE_ADDR_PKT_GET_EL(payload); + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s 0x%llx el%d ns=%d", (idx == 1) ? "TGT" : "PC", payload, el, ns); break; - case 2: + case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "VA 0x%llx", payload); break; - case 3: - ns = !!(packet->payload & NS_FLAG); - payload &= ~(0xffULL << 56); + case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS: + ns = !!SPE_ADDR_PKT_GET_NS(payload); + payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, "PA 0x%llx ns=%d", payload, ns); break; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index b62c4e4a3f33..f97d6840be3a 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -56,11 +56,6 @@ struct arm_spe_pkt { #define SPE_HEADER0_COUNTER 0x98 #define SPE_HEADER1_ALIGNMENT 0x0 -#define SPE_ADDR_PKT_HDR_INDEX_INS (0x0) -#define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1) -#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2) -#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3) - #define SPE_HDR_SHORT_INDEX(h) ((h) & GENMASK_ULL(2, 0)) #define SPE_HDR_EXTENDED_INDEX(h0, h1) (((h0) & GENMASK_ULL(1, 0)) << 3 | \ SPE_HDR_SHORT_INDEX(h1)) @@ -78,79 +73,12 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_GET_NS(v) (((v) & BIT_ULL(63)) >> 63) #define SPE_ADDR_PKT_GET_EL(v) (((v) & GENMASK_ULL(62, 61)) >> 61) -#define SPE_ADDR_PKT_GET_CH(v) (((v) & BIT_ULL(62)) >> 62) -#define SPE_ADDR_PKT_GET_PAT(v) (((v) & GENMASK_ULL(59, 56)) >> 56) #define SPE_ADDR_PKT_EL0 0 #define SPE_ADDR_PKT_EL1 1 #define SPE_ADDR_PKT_EL2 2 #define SPE_ADDR_PKT_EL3 3 -/* Context packet header */ -#define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0)) - -/* Counter packet header */ -#define SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT 0x0 -#define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT 0x1 -#define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT 0x2 - -/* Event packet payload */ -enum arm_spe_events { - EV_EXCEPTION_GEN = 0, - EV_RETIRED = 1, - EV_L1D_ACCESS = 2, - EV_L1D_REFILL = 3, - EV_TLB_ACCESS = 4, - EV_TLB_WALK = 5, - EV_NOT_TAKEN = 6, - EV_MISPRED = 7, - EV_LLC_ACCESS = 8, - EV_LLC_MISS = 9, - EV_REMOTE_ACCESS = 10, - EV_ALIGNMENT = 11, - EV_PARTIAL_PREDICATE = 17, - EV_EMPTY_PREDICATE = 18, -}; - -/* Operation packet header */ -#define SPE_OP_PKT_HDR_CLASS(h) ((h) & GENMASK_ULL(1, 0)) -#define SPE_OP_PKT_HDR_CLASS_OTHER 0x0 -#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 -#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 - -#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) - -#define SPE_OP_PKT_COND BIT(0) - -#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) -#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0 -#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4 -#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10 -#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30 - -#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) - -#define SPE_OP_PKT_AR BIT(4) -#define SPE_OP_PKT_EXCL BIT(3) -#define SPE_OP_PKT_AT BIT(2) -#define SPE_OP_PKT_ST BIT(0) - -#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8) - -#define SPE_OP_PKT_SVE_SG BIT(7) -/* - * SVE effective vector length (EVL) is stored in byte 0 bits [6:4]; - * the length is rounded up to a power of two and use 32 as one step, - * so EVL calculation is: - * - * 32 * (2 ^ bits [6:4]) = 32 << (bits [6:4]) - */ -#define SPE_OP_PKG_SVE_EVL(v) (32 << (((v) & GENMASK_ULL(6, 4)) >> 4)) -#define SPE_OP_PKT_SVE_PRED BIT(2) -#define SPE_OP_PKT_SVE_FP BIT(1) - -#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) - const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- Gitee From 6f12a90e300668d6c7b375f3aff92d2956030921 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:06 +0800 Subject: [PATCH 078/173] perf arm_spe: Fixup top byte for data virtual address [Upstream commit 5513ddaf103c62dd1eabe9403c0a8d9f810492dc] To establish a valid address from the address packet payload and finally the address value can be used for parsing data symbol in DSO, current code uses 0xff to replace the tag in the top byte of data virtual address. So far the code only fixups top byte for the memory layouts with 4KB pages, it misses to support memory layouts with 64KB pages. This patch adds the conditions for checking bits [55:52] are 0xf, if detects the pattern it will fill 0xff into the top byte of the address, also adds comment to explain the fixing up. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../util/arm-spe-decoder/arm-spe-decoder.c | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index a5d7509d5daa..90d575cee1b9 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -24,7 +24,7 @@ static u64 arm_spe_calc_ip(int index, u64 payload) { - u64 ns, el; + u64 ns, el, val; /* Instruction virtual address or Branch target address */ if (index == SPE_ADDR_PKT_HDR_INDEX_INS || @@ -45,8 +45,22 @@ static u64 arm_spe_calc_ip(int index, u64 payload) /* Clean tags */ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); - /* Fill highest byte if bits [48..55] is 0xff */ - if (SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload) == 0xffULL) + /* + * Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.1 Address packet" + * defines the data virtual address payload format, the top byte + * (bits [63:56]) is assigned as top-byte tag; so we only can + * retrieve address value from bits [55:0]. + * + * According to Documentation/arm64/memory.rst, if detects the + * specific pattern in bits [55:52] of payload which falls in + * the kernel space, should fixup the top byte and this allows + * perf tool to parse DSO symbol for data address correctly. + * + * For this reason, if detects the bits [55:52] is 0xf, will + * fill 0xff into the top byte. + */ + val = SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload); + if ((val & 0xf0ULL) == 0xf0ULL) payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT; /* Data access physical address */ -- Gitee From b50c0786b12c8d585954f560a15a5d87ec687501 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:07 +0800 Subject: [PATCH 079/173] perf arm-spe: Refactor context packet handling [Upstream commit 6550149e801a32b1533ed86509af76319cb75eba] Minor refactoring to use macro for index mask. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-7-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 44397d89c1c2..f45fbad7b854 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -136,7 +136,7 @@ static int arm_spe_get_context(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_CONTEXT; - packet->index = buf[0] & 0x3; + packet->index = SPE_CTX_PKT_HDR_INDEX(buf[0]); return arm_spe_get_payload(buf, len, 0, packet); } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index f97d6840be3a..9bc876bffd35 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -79,6 +79,9 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_EL2 2 #define SPE_ADDR_PKT_EL3 3 +/* Context packet header */ +#define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0)) + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- Gitee From 8d395fb2d07c514c0bc0b096ea75fd306d6822ee Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:08 +0800 Subject: [PATCH 080/173] perf arm-spe: Add new function arm_spe_pkt_desc_counter() [Upstream commit c52cfe9872132407eef6d734014d6fd7790146f5] This patch moves out the counter packet parsing code from arm_spe_pkt_desc() to the new function arm_spe_pkt_desc_counter(). Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-8-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 43 ++++++++++++------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index f45fbad7b854..6240bd2f3f19 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -323,6 +323,33 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, return err; } +static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + u64 payload = packet->payload; + const char *name = arm_spe_pkt_name(packet->type); + int err = 0; + + arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s %d ", name, + (unsigned short)payload); + + switch (packet->index) { + case 0: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE"); + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT"); + break; + default: + break; + } + + return err; +} + int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { @@ -415,21 +442,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, name, (unsigned long)payload, idx + 1); break; case ARM_SPE_COUNTER: - arm_spe_pkt_out_string(&err, &buf, &blen, "%s %d ", name, - (unsigned short)payload); - switch (idx) { - case 0: - arm_spe_pkt_out_string(&err, &buf, &blen, "TOT"); - break; - case 1: - arm_spe_pkt_out_string(&err, &buf, &blen, "ISSUE"); - break; - case 2: - arm_spe_pkt_out_string(&err, &buf, &blen, "XLAT"); - break; - default: - break; - } + err = arm_spe_pkt_desc_counter(packet, buf, buf_len); break; default: /* Unknown packet type */ -- Gitee From bb638fc7193bd5490a0715c3a6754452c713d6aa Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:09 +0800 Subject: [PATCH 081/173] perf arm-spe: Refactor counter packet handling [Upstream commit d158aa408f221756f99edb128ef35bfd4d3361d5] This patch defines macros for counter packet header, and uses macros to replace hard code values in functions arm_spe_get_counter() and arm_spe_pkt_desc(). In the function arm_spe_get_counter(), adds a new line for more readable. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-9-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 +++--- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 6240bd2f3f19..52f4339b1f0c 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -334,13 +334,13 @@ static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet, (unsigned short)payload); switch (packet->index) { - case 0: + case SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT"); break; - case 1: + case SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE"); break; - case 2: + case SPE_CNT_PKT_HDR_INDEX_TRANS_LAT: arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT"); break; default: diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 9bc876bffd35..7d8e34e35f05 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -82,6 +82,11 @@ struct arm_spe_pkt { /* Context packet header */ #define SPE_CTX_PKT_HDR_INDEX(h) ((h) & GENMASK_ULL(1, 0)) +/* Counter packet header */ +#define SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT 0x0 +#define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT 0x1 +#define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT 0x2 + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- Gitee From 26514c09d95827e5f8ac0ece30226b3677f254ee Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:10 +0800 Subject: [PATCH 082/173] perf arm-spe: Add new function arm_spe_pkt_desc_event() [Upstream commit e66f6d75960220001ce94afe93c981826235c003] This patch moves out the event packet parsing from arm_spe_pkt_desc() to the new function arm_spe_pkt_desc_event(). Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-10-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 63 +++++++++++-------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 52f4339b1f0c..da6b9f76739c 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -287,6 +287,42 @@ static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen, return ret; } +static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + u64 payload = packet->payload; + int err = 0; + + arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED"); + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS"); + if (payload & 0x20) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL"); + if (payload & 0x40) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN"); + if (payload & 0x80) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED"); + if (packet->index > 1) { + if (payload & 0x100) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); + if (payload & 0x200) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); + if (payload & 0x400) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); + } + + return err; +} + static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { @@ -367,32 +403,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name); break; case ARM_SPE_EVENTS: - arm_spe_pkt_out_string(&err, &buf, &blen, "EV"); - - if (payload & 0x1) - arm_spe_pkt_out_string(&err, &buf, &blen, " EXCEPTION-GEN"); - if (payload & 0x2) - arm_spe_pkt_out_string(&err, &buf, &blen, " RETIRED"); - if (payload & 0x4) - arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-ACCESS"); - if (payload & 0x8) - arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-REFILL"); - if (payload & 0x10) - arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-ACCESS"); - if (payload & 0x20) - arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-REFILL"); - if (payload & 0x40) - arm_spe_pkt_out_string(&err, &buf, &blen, " NOT-TAKEN"); - if (payload & 0x80) - arm_spe_pkt_out_string(&err, &buf, &blen, " MISPRED"); - if (idx > 1) { - if (payload & 0x100) - arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-ACCESS"); - if (payload & 0x200) - arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-REFILL"); - if (payload & 0x400) - arm_spe_pkt_out_string(&err, &buf, &blen, " REMOTE-ACCESS"); - } + err = arm_spe_pkt_desc_event(packet, buf, buf_len); break; case ARM_SPE_OP_TYPE: switch (idx) { -- Gitee From 0f8c1949aabb90afb70ab0c6f615dd6dbbae2fdf Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:11 +0800 Subject: [PATCH 083/173] perf arm-spe: Refactor event type handling [Upstream commit 889d1a675fcfe734f83c459de023a6f0a91a7a0e] Move the enums of event types to arm-spe-pkt-decoder.h, thus function arm_spe_pkt_desc_event() can use them for bitmasks. Suggested-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-11-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 22 +++++++++---------- .../arm-spe-decoder/arm-spe-pkt-decoder.h | 18 +++++++++++++++ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index da6b9f76739c..3f30b2937715 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -295,28 +295,28 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV"); - if (payload & 0x1) + if (payload & BIT(EV_EXCEPTION_GEN)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN"); - if (payload & 0x2) + if (payload & BIT(EV_RETIRED)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED"); - if (payload & 0x4) + if (payload & BIT(EV_L1D_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS"); - if (payload & 0x8) + if (payload & BIT(EV_L1D_REFILL)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL"); - if (payload & 0x10) + if (payload & BIT(EV_TLB_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS"); - if (payload & 0x20) + if (payload & BIT(EV_TLB_WALK)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL"); - if (payload & 0x40) + if (payload & BIT(EV_NOT_TAKEN)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN"); - if (payload & 0x80) + if (payload & BIT(EV_MISPRED)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED"); if (packet->index > 1) { - if (payload & 0x100) + if (payload & BIT(EV_LLC_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); - if (payload & 0x200) + if (payload & BIT(EV_LLC_MISS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); - if (payload & 0x400) + if (payload & BIT(EV_REMOTE_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); } diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 7d8e34e35f05..42ed4e61ede2 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -87,6 +87,24 @@ struct arm_spe_pkt { #define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT 0x1 #define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT 0x2 +/* Event packet payload */ +enum arm_spe_events { + EV_EXCEPTION_GEN = 0, + EV_RETIRED = 1, + EV_L1D_ACCESS = 2, + EV_L1D_REFILL = 3, + EV_TLB_ACCESS = 4, + EV_TLB_WALK = 5, + EV_NOT_TAKEN = 6, + EV_MISPRED = 7, + EV_LLC_ACCESS = 8, + EV_LLC_MISS = 9, + EV_REMOTE_ACCESS = 10, + EV_ALIGNMENT = 11, + EV_PARTIAL_PREDICATE = 17, + EV_EMPTY_PREDICATE = 18, +}; + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- Gitee From b199be71a2ab205327e7d43481473e57c30136e1 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:12 +0800 Subject: [PATCH 084/173] perf arm-spe: Remove size condition checking for events [Upstream commit 4d0f4ca273aa95bf592b8bad3c619b5766c8ecc7] In the Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.6 Events packet", it describes the event bit is valid with specific payload requirement. For example, the Last Level cache access event, the bit is defined as: E[8], byte 1 bit [0], when SZ == 0b01 , when SZ == 0b10 , or when SZ == 0b11 It requires the payload size is at least 2 bytes, when byte 1 (start counting from 0) is valid, E[8] (bit 0 in byte 1) can be used for LLC access event type. For safety, the code checks the condition for payload size firstly, if meet the requirement for payload size, then continue to parse event type. If review function arm_spe_get_payload(), it has used cast, so any bytes beyond the valid size have been set to zeros. For this reason, we don't need to check payload size anymore afterwards when parse events, thus this patch removes payload size conditions. Suggested-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-12-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 3f30b2937715..88bcf7e5be76 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -311,14 +311,12 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN"); if (payload & BIT(EV_MISPRED)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED"); - if (packet->index > 1) { - if (payload & BIT(EV_LLC_ACCESS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); - if (payload & BIT(EV_LLC_MISS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); - if (payload & BIT(EV_REMOTE_ACCESS)) - arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); - } + if (payload & BIT(EV_LLC_ACCESS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS"); + if (payload & BIT(EV_LLC_MISS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); + if (payload & BIT(EV_REMOTE_ACCESS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); return err; } -- Gitee From 336eef23edeab6af03d2d291dd9ec017f72641d1 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:13 +0800 Subject: [PATCH 085/173] perf arm-spe: Add new function arm_spe_pkt_desc_op_type() [Upstream commit 7488ffc4d981e19feddfe36a619051bf6216c7a1] The operation type packet is complex and contains subclass; the parsing flow causes deep indentation; for more readable, this patch introduces a new function arm_spe_pkt_desc_op_type() which is used for operation type parsing. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-13-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 79 +++++++++++-------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 88bcf7e5be76..d6c060f119b4 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -321,6 +321,50 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, return err; } +static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, + char *buf, size_t buf_len) +{ + u64 payload = packet->payload; + int err = 0; + + switch (packet->index) { + case 0: + arm_spe_pkt_out_string(&err, &buf, &buf_len, + payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); + break; + case 1: + arm_spe_pkt_out_string(&err, &buf, &buf_len, + payload & 0x1 ? "ST" : "LD"); + + if (payload & 0x2) { + if (payload & 0x4) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT"); + if (payload & 0x8) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); + if (payload & 0x10) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); + } else if (payload & 0x4) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); + } + break; + case 2: + arm_spe_pkt_out_string(&err, &buf, &buf_len, "B"); + + if (payload & 0x1) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND"); + if (payload & 0x2) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND"); + + break; + default: + /* Unknown index */ + err = -1; + break; + } + + return err; +} + static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { @@ -404,40 +448,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, err = arm_spe_pkt_desc_event(packet, buf, buf_len); break; case ARM_SPE_OP_TYPE: - switch (idx) { - case 0: - arm_spe_pkt_out_string(&err, &buf, &blen, - payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); - break; - case 1: - arm_spe_pkt_out_string(&err, &buf, &blen, - payload & 0x1 ? "ST" : "LD"); - - if (payload & 0x2) { - if (payload & 0x4) - arm_spe_pkt_out_string(&err, &buf, &blen, " AT"); - if (payload & 0x8) - arm_spe_pkt_out_string(&err, &buf, &blen, " EXCL"); - if (payload & 0x10) - arm_spe_pkt_out_string(&err, &buf, &blen, " AR"); - } else if (payload & 0x4) { - arm_spe_pkt_out_string(&err, &buf, &blen, " SIMD-FP"); - } - break; - case 2: - arm_spe_pkt_out_string(&err, &buf, &blen, "B"); - - if (payload & 0x1) - arm_spe_pkt_out_string(&err, &buf, &blen, " COND"); - if (payload & 0x2) - arm_spe_pkt_out_string(&err, &buf, &blen, " IND"); - - break; - default: - /* Unknown index */ - err = -1; - break; - } + err = arm_spe_pkt_desc_op_type(packet, buf, buf_len); break; case ARM_SPE_DATA_SOURCE: case ARM_SPE_TIMESTAMP: -- Gitee From 69f6df4773412be549987d015b624dcaf797135b Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:14 +0800 Subject: [PATCH 086/173] perf arm-spe: Refactor operation packet handling [Upstream commit e771218f32f97c0940ae46c23e20d27f3d4c05e3] Defines macros for operation packet header and formats (support sub classes for 'other', 'branch', 'load and store', etc). Uses these macros for operation packet decoding and dumping. Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-14-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 26 ++++++++++--------- .../arm-spe-decoder/arm-spe-pkt-decoder.h | 23 ++++++++++++++++ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index d6c060f119b4..1d1354a0eef4 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -144,7 +144,7 @@ static int arm_spe_get_op_type(const unsigned char *buf, size_t len, struct arm_spe_pkt *packet) { packet->type = ARM_SPE_OP_TYPE; - packet->index = buf[0] & 0x3; + packet->index = SPE_OP_PKT_HDR_CLASS(buf[0]); return arm_spe_get_payload(buf, len, 0, packet); } @@ -328,31 +328,33 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, int err = 0; switch (packet->index) { - case 0: + case SPE_OP_PKT_HDR_CLASS_OTHER: arm_spe_pkt_out_string(&err, &buf, &buf_len, - payload & 0x1 ? "COND-SELECT" : "INSN-OTHER"); + payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER"); break; - case 1: + case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC: arm_spe_pkt_out_string(&err, &buf, &buf_len, payload & 0x1 ? "ST" : "LD"); - if (payload & 0x2) { - if (payload & 0x4) + if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) { + if (payload & SPE_OP_PKT_AT) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT"); - if (payload & 0x8) + if (payload & SPE_OP_PKT_EXCL) arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); - if (payload & 0x10) + if (payload & SPE_OP_PKT_AR) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); - } else if (payload & 0x4) { + } else if (SPE_OP_PKT_LDST_SUBCLASS_GET(payload) == + SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); } break; - case 2: + case SPE_OP_PKT_HDR_CLASS_BR_ERET: arm_spe_pkt_out_string(&err, &buf, &buf_len, "B"); - if (payload & 0x1) + if (payload & SPE_OP_PKT_COND) arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND"); - if (payload & 0x2) + + if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND"); break; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 42ed4e61ede2..7032fc141ad4 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -105,6 +105,29 @@ enum arm_spe_events { EV_EMPTY_PREDICATE = 18, }; +/* Operation packet header */ +#define SPE_OP_PKT_HDR_CLASS(h) ((h) & GENMASK_ULL(1, 0)) +#define SPE_OP_PKT_HDR_CLASS_OTHER 0x0 +#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 +#define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 + +#define SPE_OP_PKT_COND BIT(0) + +#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) +#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0 +#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4 +#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10 +#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30 + +#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) + +#define SPE_OP_PKT_AR BIT(4) +#define SPE_OP_PKT_EXCL BIT(3) +#define SPE_OP_PKT_AT BIT(2) +#define SPE_OP_PKT_ST BIT(0) + +#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) + const char *arm_spe_pkt_name(enum arm_spe_pkt_type); int arm_spe_get_packet(const unsigned char *buf, size_t len, -- Gitee From b7f593fb0db10bd7ef434fa8d41cedd20e89a79e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:15 +0800 Subject: [PATCH 087/173] perf arm-spe: Add more sub classes for operation packet [Upstream commit 3d829724b16c5d2de42e6c9601c696c93a10bc61] For the operation type packet payload with load/store class, it misses to support these sub classes: - A load/store targeting the general-purpose registers; - A load/store targeting unspecified registers; - The ARMv8.4 nested virtualisation extension can redirect system register accesses to a memory page controlled by the hypervisor. The SPE profiling feature in newer implementations can tag those memory accesses accordingly. Add the bit pattern describing load/store sub classes, so that the perf tool can decode it properly. Inspired by Andre Przywara, refined the commit log and code for more clear description. Co-developed-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-15-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../util/arm-spe-decoder/arm-spe-pkt-decoder.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 1d1354a0eef4..84d661aab54f 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -343,9 +343,23 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); if (payload & SPE_OP_PKT_AR) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); - } else if (SPE_OP_PKT_LDST_SUBCLASS_GET(payload) == - SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP) { + } + + switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) { + case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP: arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); + break; + case SPE_OP_PKT_LDST_SUBCLASS_GP_REG: + arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG"); + break; + case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG: + arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG"); + break; + case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG: + arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG"); + break; + default: + break; } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: -- Gitee From ec5f4f94c3c0dcc4f91f7b81b8154b3bd85b29ee Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 31 Dec 2021 13:32:16 +0800 Subject: [PATCH 088/173] perf arm_spe: Decode memory tagging properties [Upstream commit 3601e605501df289db149785e1e6a8d16e557d31] When SPE records a physical address, it can additionally tag the event with information from the Memory Tagging architecture extension. Decode the two additional fields in the SPE event payload. [leoy: Refined patch to use predefined macros] Signed-off-by: Andre Przywara Signed-off-by: Leo Yan Reviewed-by: Dave Martin Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Link: https://lore.kernel.org/r/20201119152441.6972-16-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 +++++- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 84d661aab54f..57c01ce27915 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -385,6 +385,7 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, char *buf, size_t buf_len) { int ns, el, idx = packet->index; + int ch, pat; u64 payload = packet->payload; int err = 0; @@ -404,9 +405,12 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, break; case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS: ns = !!SPE_ADDR_PKT_GET_NS(payload); + ch = !!SPE_ADDR_PKT_GET_CH(payload); + pat = SPE_ADDR_PKT_GET_PAT(payload); payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, - "PA 0x%llx ns=%d", payload, ns); + "PA 0x%llx ns=%d ch=%d pat=%x", + payload, ns, ch, pat); break; default: /* Unknown index */ diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 7032fc141ad4..1ad14885c2a1 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -73,6 +73,8 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_GET_NS(v) (((v) & BIT_ULL(63)) >> 63) #define SPE_ADDR_PKT_GET_EL(v) (((v) & GENMASK_ULL(62, 61)) >> 61) +#define SPE_ADDR_PKT_GET_CH(v) (((v) & BIT_ULL(62)) >> 62) +#define SPE_ADDR_PKT_GET_PAT(v) (((v) & GENMASK_ULL(59, 56)) >> 56) #define SPE_ADDR_PKT_EL0 0 #define SPE_ADDR_PKT_EL1 1 -- Gitee From 5dece6d904d5a52348d491e71929a5697c395e13 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Fri, 31 Dec 2021 13:32:17 +0800 Subject: [PATCH 089/173] perf arm-spe: Add support for ARMv8.3-SPE [Upstream commit 05e91e7fe26c6fb116fa16f43c1eed78020f9463] This patch is to support Armv8.3 extension for SPE, it adds alignment field in the Events packet and it supports the Scalable Vector Extension (SVE) for Operation packet and Events packet with two additions: - The vector length for SVE operations in the Operation Type packet; - The incomplete predicate and empty predicate fields in the Events packet. Signed-off-by: Wei Li Signed-off-by: Leo Yan Reviewed-by: Andre Przywara Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Dave Martin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20201119152441.6972-17-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../arm-spe-decoder/arm-spe-pkt-decoder.c | 36 +++++++++++++++++-- .../arm-spe-decoder/arm-spe-pkt-decoder.h | 16 +++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 57c01ce27915..f3ac9d40cebf 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -317,6 +317,12 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL"); if (payload & BIT(EV_REMOTE_ACCESS)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); + if (payload & BIT(EV_ALIGNMENT)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT"); + if (payload & BIT(EV_PARTIAL_PREDICATE)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED"); + if (payload & BIT(EV_EMPTY_PREDICATE)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED"); return err; } @@ -329,8 +335,23 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, switch (packet->index) { case SPE_OP_PKT_HDR_CLASS_OTHER: - arm_spe_pkt_out_string(&err, &buf, &buf_len, - payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER"); + if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER"); + + /* SVE effective vector length */ + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", + SPE_OP_PKG_SVE_EVL(payload)); + + if (payload & SPE_OP_PKT_SVE_FP) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); + if (payload & SPE_OP_PKT_SVE_PRED) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); + } else { + arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER"); + arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s", + payload & SPE_OP_PKT_COND ? + "COND-SELECT" : "INSN-OTHER"); + } break; case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC: arm_spe_pkt_out_string(&err, &buf, &buf_len, @@ -361,6 +382,17 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, default: break; } + + if (SPE_OP_PKT_IS_LDST_SVE(payload)) { + /* SVE effective vector length */ + arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", + SPE_OP_PKG_SVE_EVL(payload)); + + if (payload & SPE_OP_PKT_SVE_PRED) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); + if (payload & SPE_OP_PKT_SVE_SG) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG"); + } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: arm_spe_pkt_out_string(&err, &buf, &buf_len, "B"); diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 1ad14885c2a1..9b970e7bf1e2 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -113,6 +113,8 @@ enum arm_spe_events { #define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 #define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 +#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) + #define SPE_OP_PKT_COND BIT(0) #define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) @@ -128,6 +130,20 @@ enum arm_spe_events { #define SPE_OP_PKT_AT BIT(2) #define SPE_OP_PKT_ST BIT(0) +#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8) + +#define SPE_OP_PKT_SVE_SG BIT(7) +/* + * SVE effective vector length (EVL) is stored in byte 0 bits [6:4]; + * the length is rounded up to a power of two and use 32 as one step, + * so EVL calculation is: + * + * 32 * (2 ^ bits [6:4]) = 32 << (bits [6:4]) + */ +#define SPE_OP_PKG_SVE_EVL(v) (32 << (((v) & GENMASK_ULL(6, 4)) >> 4)) +#define SPE_OP_PKT_SVE_PRED BIT(2) +#define SPE_OP_PKT_SVE_FP BIT(1) + #define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) const char *arm_spe_pkt_name(enum arm_spe_pkt_type); -- Gitee From 8914c24aedac0558720af0a2f6a32f7e6da5b343 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:18 +0800 Subject: [PATCH 090/173] perf c2c: Rename for shared cache line stats [Upstream commit 1834436e340ce0ec00e8114f61009246a5b36fe9] For shared cache line statistics, 'perf c2c' relies on HITM. We can use more general naming rather than only binding to HITM, so replace "hitm_stats" with "shared_clines_stats" in structure perf_c2c, and rename function resort_hitm_cb() to resort_shared_cl_cb(). Signed-off-by: Leo Yan Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20210114154646.209024-2-leo.yan@linaro.org Signed-off-by: huwentao --- tools/perf/builtin-c2c.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f5af579378dc..30ce1611063a 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -96,8 +96,8 @@ struct perf_c2c { bool stats_only; bool symbol_full; - /* HITM shared clines stats */ - struct c2c_stats hitm_stats; + /* Shared cache line stats */ + struct c2c_stats shared_clines_stats; int shared_clines; int display; @@ -2005,7 +2005,7 @@ static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; struct c2c_hists *c2c_hists; - bool display = he__display(he, &c2c.hitm_stats); + bool display = he__display(he, &c2c.shared_clines_stats); c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; @@ -2092,14 +2092,14 @@ static int setup_nodes(struct perf_session *session) #define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) -static int resort_hitm_cb(struct hist_entry *he, void *arg __maybe_unused) +static int resort_shared_cl_cb(struct hist_entry *he, void *arg __maybe_unused) { struct c2c_hist_entry *c2c_he; c2c_he = container_of(he, struct c2c_hist_entry, he); if (HAS_HITMS(c2c_he)) { c2c.shared_clines++; - c2c_add_stats(&c2c.hitm_stats, &c2c_he->stats); + c2c_add_stats(&c2c.shared_clines_stats, &c2c_he->stats); } return 0; @@ -2170,7 +2170,7 @@ static void print_c2c__display_stats(FILE *out) static void print_shared_cacheline_info(FILE *out) { - struct c2c_stats *stats = &c2c.hitm_stats; + struct c2c_stats *stats = &c2c.shared_clines_stats; int hitm_cnt = stats->lcl_hitm + stats->rmt_hitm; fprintf(out, "=================================================\n"); @@ -2865,7 +2865,7 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); hists__collapse_resort(&c2c.hists.hists, NULL); - hists__output_resort_cb(&c2c.hists.hists, &prog, resort_hitm_cb); + hists__output_resort_cb(&c2c.hists.hists, &prog, resort_shared_cl_cb); hists__iterate_cb(&c2c.hists.hists, resort_cl_cb); ui_progress__finish(); -- Gitee From bdd588dcfcddf04ecd1b01b951be855fa44bfbb6 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:19 +0800 Subject: [PATCH 091/173] perf c2c: Refactor hist entry validation [Upstream commit 2290e1d6193bc7c760a47a4c2208a87fd8dab202] This patch has no functionality changes but refactors hist entry validation for cache line resorting. It renames function "valid_hitm_or_store()" to "is_valid_hist_entry()", changes return type from integer type to bool type. In the function, it uses switch-case instead of ternary operators, which is easier to extend for more display types. Signed-off-by: Leo Yan Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210114154646.209024-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/builtin-c2c.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 30ce1611063a..2a4be59e01c6 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1932,16 +1932,32 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats) return he->filtered == 0; } -static inline int valid_hitm_or_store(struct hist_entry *he) +static inline bool is_valid_hist_entry(struct hist_entry *he) { struct c2c_hist_entry *c2c_he; - bool has_hitm; + bool has_record = false; c2c_he = container_of(he, struct c2c_hist_entry, he); - has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm : - c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm : - c2c_he->stats.rmt_hitm; - return has_hitm || c2c_he->stats.store; + + /* It's a valid entry if contains stores */ + if (c2c_he->stats.store) + return true; + + switch (c2c.display) { + case DISPLAY_LCL: + has_record = !!c2c_he->stats.lcl_hitm; + break; + case DISPLAY_RMT: + has_record = !!c2c_he->stats.rmt_hitm; + break; + case DISPLAY_TOT: + has_record = !!c2c_he->stats.tot_hitm; + break; + default: + break; + } + + return has_record; } static void set_node_width(struct c2c_hist_entry *c2c_he, int len) @@ -1995,7 +2011,7 @@ static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) calc_width(c2c_he); - if (!valid_hitm_or_store(he)) + if (!is_valid_hist_entry(he)) he->filtered = HIST_FILTER__C2C; return 0; -- Gitee From d30abee5adb318a015a74ef077c1474c9cef9a53 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:20 +0800 Subject: [PATCH 092/173] perf c2c: Refactor display filter [Upstream commit 69a95bfdf95b23ad9bd3d240cef92408823656f9] When sorting on the respective metrics (lcl_hitm, rmt_hitm, tot_hitm), the FILTER_HITM macro is used to filter out the cache line entries if its overhead is less than 1%. This patch introduces a static function filter_display() to replace that macro and refines its parameters with a more flexible way, rather than passing field name, it changes to pass the cache line's statistic and sum value. Signed-off-by: Leo Yan Acked-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210114154646.209024-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/builtin-c2c.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 2a4be59e01c6..571bac37166c 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1895,40 +1895,40 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, #define DISPLAY_LINE_LIMIT 0.001 +static u8 filter_display(u32 val, u32 sum) +{ + if (sum == 0 || ((double)val / sum) < DISPLAY_LINE_LIMIT) + return HIST_FILTER__C2C; + + return 0; +} + static bool he__display(struct hist_entry *he, struct c2c_stats *stats) { struct c2c_hist_entry *c2c_he; - double ld_dist; if (c2c.show_all) return true; c2c_he = container_of(he, struct c2c_hist_entry, he); -#define FILTER_HITM(__h) \ - if (stats->__h) { \ - ld_dist = ((double)c2c_he->stats.__h / stats->__h); \ - if (ld_dist < DISPLAY_LINE_LIMIT) \ - he->filtered = HIST_FILTER__C2C; \ - } else { \ - he->filtered = HIST_FILTER__C2C; \ - } - switch (c2c.display) { case DISPLAY_LCL: - FILTER_HITM(lcl_hitm); + he->filtered = filter_display(c2c_he->stats.lcl_hitm, + stats->lcl_hitm); break; case DISPLAY_RMT: - FILTER_HITM(rmt_hitm); + he->filtered = filter_display(c2c_he->stats.rmt_hitm, + stats->rmt_hitm); break; case DISPLAY_TOT: - FILTER_HITM(tot_hitm); + he->filtered = filter_display(c2c_he->stats.tot_hitm, + stats->tot_hitm); + break; default: break; }; -#undef FILTER_HITM - return he->filtered == 0; } -- Gitee From 1ecd2768f4a05419e3d36f601b7139deaaf42c34 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:21 +0800 Subject: [PATCH 093/173] perf c2c: Fix argument type for percent() [Upstream commit 111c141591178881314ba1b50d550d31cba34c1a] For percent() its arguments are defined as integers; this is not consistent with its consumers which pass u32 arguments. Thus this patch makes argument type as u32 for percent(). Signed-off-by: Leo Yan Acked-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210114154646.209024-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/builtin-c2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 571bac37166c..4cabbbff7833 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -911,7 +911,7 @@ static struct c2c_stats *total_stats(struct hist_entry *he) return &hists->stats; } -static double percent(int st, int tot) +static double percent(u32 st, u32 tot) { return tot ? 100. * (double) st / (double) tot : 0; } -- Gitee From 0be1e240765e1d7f6859796b2d1d75e96df7ba17 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:22 +0800 Subject: [PATCH 094/173] perf c2c: Refactor node display [Upstream commit f3d0a551db13b7cdf8addbe265b4fd7026944d24] The macro DISPLAY_HITM() is used to calculate HITM percentage introduced by every node and it's shown for the node info. This patch introduces the static function display_metrics() to replace the macro, and the parameters are refined for passing the metric's statistic and sum value. Signed-off-by: Leo Yan Acked-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Ingo Molnar Cc: Joe Mario Cc: Joe Perches Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210114154646.209024-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/builtin-c2c.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 4cabbbff7833..f1280115f039 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1083,6 +1083,19 @@ empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return 0; } +static int display_metrics(struct perf_hpp *hpp, u32 val, u32 sum) +{ + int ret; + + if (sum != 0) + ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", + percent(val, sum)); + else + ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); + + return ret; +} + static int node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, struct hist_entry *he) @@ -1126,29 +1139,23 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num); advance_hpp(hpp, ret); - #define DISPLAY_HITM(__h) \ - if (c2c_he->stats.__h> 0) { \ - ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", \ - percent(stats->__h, c2c_he->stats.__h));\ - } else { \ - ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); \ - } - switch (c2c.display) { case DISPLAY_RMT: - DISPLAY_HITM(rmt_hitm); + ret = display_metrics(hpp, stats->rmt_hitm, + c2c_he->stats.rmt_hitm); break; case DISPLAY_LCL: - DISPLAY_HITM(lcl_hitm); + ret = display_metrics(hpp, stats->lcl_hitm, + c2c_he->stats.lcl_hitm); break; case DISPLAY_TOT: - DISPLAY_HITM(tot_hitm); + ret = display_metrics(hpp, stats->tot_hitm, + c2c_he->stats.tot_hitm); + break; default: break; } - #undef DISPLAY_HITM - advance_hpp(hpp, ret); if (c2c_he->stats.store > 0) { -- Gitee From e772072b449d47e2a9d381efe289f02fd834b7e3 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:24 +0800 Subject: [PATCH 095/173] perf arm-spe: Enable sample type PERF_SAMPLE_DATA_SRC [Upstream commit 845d3a65c3352fc54eaf936259cd87e40a4b0fcf] This patch is to enable sample type PERF_SAMPLE_DATA_SRC for Arm SPE in the perf data, when output the tracing data, it tells tools that it contains data source in the memory event. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Al Grant Cc: Andre Przywara Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Cc: Will Deacon Link: https://lore.kernel.org/r/20210211133856.2137-1-james.clark@arm.com Signed-off-by: James Clark Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 8901a1656a41..b134516e890b 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -803,7 +803,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) attr.type = PERF_TYPE_HARDWARE; attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | - PERF_SAMPLE_PERIOD; + PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC; if (spe->timeless_decoding) attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; else -- Gitee From 6b34cae1d43868c017d757947dfaddfc19afb9a7 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:25 +0800 Subject: [PATCH 096/173] perf arm-spe: Store memory address in packet [Upstream commit 265cfb9586d34afd705938927e5da096f4f025a4] This patch is to store virtual and physical memory addresses in packet, which will be used for memory samples. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Al Grant Cc: Andre Przywara Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Cc: Will Deacon Link: https://lore.kernel.org/r/20210211133856.2137-2-james.clark@arm.com Signed-off-by: James Clark Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 4 ++++ tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 90d575cee1b9..7aac3048b090 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -172,6 +172,10 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.from_ip = ip; else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH) decoder->record.to_ip = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) + decoder->record.virt_addr = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) + decoder->record.phys_addr = ip; break; case ARM_SPE_COUNTER: break; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 24727b8ca7ff..7b845001afe7 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -30,6 +30,8 @@ struct arm_spe_record { u64 from_ip; u64 to_ip; u64 timestamp; + u64 virt_addr; + u64 phys_addr; }; struct arm_spe_insn; -- Gitee From ed03dd817b8e7ac83105bc54bfaa43597bf66acd Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:26 +0800 Subject: [PATCH 097/173] perf arm-spe: Store operation type in packet [Upstream commit 97ae666ae03606e254ffb478673d4e311a35fd83] This patch is to store operation type in packet structure. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Al Grant Cc: Andre Przywara Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Cc: Will Deacon Signed-off-by: James Clark Link: https://lore.kernel.org/r/20210211133856.2137-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 6 ++++++ tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 7aac3048b090..32fe41835fa6 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -182,6 +182,12 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) case ARM_SPE_CONTEXT: break; case ARM_SPE_OP_TYPE: + if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) { + if (payload & 0x1) + decoder->record.op = ARM_SPE_ST; + else + decoder->record.op = ARM_SPE_LD; + } break; case ARM_SPE_EVENTS: if (payload & BIT(EV_L1D_REFILL)) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 7b845001afe7..59bdb7309674 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -24,9 +24,15 @@ enum arm_spe_sample_type { ARM_SPE_REMOTE_ACCESS = 1 << 7, }; +enum arm_spe_op_type { + ARM_SPE_LD = 1 << 0, + ARM_SPE_ST = 1 << 1, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; + u32 op; u64 from_ip; u64 to_ip; u64 timestamp; -- Gitee From 539a7be08374c3cc873c8ac0b6d72cd2c3821c77 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:27 +0800 Subject: [PATCH 098/173] perf arm-spe: Fill address info for samples [Upstream commit 54f7815efef7fad935fdf73dfd8b3116568f2f35] To properly handle memory and branch samples, this patch divides into two functions for generating samples: arm_spe__synth_mem_sample() is for synthesizing memory and TLB samples; arm_spe__synth_branch_sample() is to synthesize branch samples. Arm SPE backend decoder has passed virtual and physical address through packets, the address info is stored into the synthesize samples in the function arm_spe__synth_mem_sample(). Committer notes: Fixed this: 36 46.77 fedora:27 : FAIL clang version 5.0.2 (tags/RELEASE_502/final) util/arm-spe.c:269:34: error: missing field 'pid' initializer [-Werror,-Wmissing-field-initializers] struct perf_sample sample = { 0 }; ^ util/arm-spe.c:288:34: error: missing field 'pid' initializer [-Werror,-Wmissing-field-initializers] struct perf_sample sample = { 0 }; By using = { .ip = 0, }; Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Al Grant Cc: Andre Przywara Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Cc: Will Deacon Signed-off-by: James Clark Link: https://lore.kernel.org/r/20210211133856.2137-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 50 +++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index b134516e890b..de70a8f3ff36 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -235,7 +235,6 @@ static void arm_spe_prep_sample(struct arm_spe *spe, sample->cpumode = arm_spe_cpumode(spe, sample->ip); sample->pid = speq->pid; sample->tid = speq->tid; - sample->addr = record->to_ip; sample->period = 1; sample->cpu = speq->cpu; @@ -259,11 +258,29 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, return ret; } -static int -arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq, - u64 spe_events_id) +static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, + u64 spe_events_id) +{ + struct arm_spe *spe = speq->spe; + struct arm_spe_record *record = &speq->decoder->record; + union perf_event *event = speq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + arm_spe_prep_sample(spe, speq, event, &sample); + + sample.id = spe_events_id; + sample.stream_id = spe_events_id; + sample.addr = record->virt_addr; + sample.phys_addr = record->phys_addr; + + return arm_spe_deliver_synth_event(spe, speq, event, &sample); +} + +static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, + u64 spe_events_id) { struct arm_spe *spe = speq->spe; + struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; struct perf_sample sample = { .ip = 0, }; @@ -271,6 +288,7 @@ arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq, sample.id = spe_events_id; sample.stream_id = spe_events_id; + sample.addr = record->to_ip; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -283,15 +301,13 @@ static int arm_spe_sample(struct arm_spe_queue *speq) if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->l1d_miss_id); + err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id); if (err) return err; } if (record->type & ARM_SPE_L1D_ACCESS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->l1d_access_id); + err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id); if (err) return err; } @@ -299,15 +315,13 @@ static int arm_spe_sample(struct arm_spe_queue *speq) if (spe->sample_llc) { if (record->type & ARM_SPE_LLC_MISS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->llc_miss_id); + err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id); if (err) return err; } if (record->type & ARM_SPE_LLC_ACCESS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->llc_access_id); + err = arm_spe__synth_mem_sample(speq, spe->llc_access_id); if (err) return err; } @@ -315,31 +329,27 @@ static int arm_spe_sample(struct arm_spe_queue *speq) if (spe->sample_tlb) { if (record->type & ARM_SPE_TLB_MISS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->tlb_miss_id); + err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id); if (err) return err; } if (record->type & ARM_SPE_TLB_ACCESS) { - err = arm_spe_synth_spe_events_sample( - speq, spe->tlb_access_id); + err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id); if (err) return err; } } if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { - err = arm_spe_synth_spe_events_sample(speq, - spe->branch_miss_id); + err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); if (err) return err; } if (spe->sample_remote_access && (record->type & ARM_SPE_REMOTE_ACCESS)) { - err = arm_spe_synth_spe_events_sample(speq, - spe->remote_access_id); + err = arm_spe__synth_mem_sample(speq, spe->remote_access_id); if (err) return err; } -- Gitee From 45be976eca1972c7e960230cf2aa95896b92af5a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:28 +0800 Subject: [PATCH 099/173] perf arm-spe: Synthesize memory event [Upstream commit e55ed3423c1bb29f97062f42ba3a94bbff5ab6a5] The memory event can deliver two benefits: - The first benefit is the memory event can give out global view for memory accessing, rather than organizing events with scatter mode (e.g. uses separate event for L1 cache, last level cache, etc) which which can only display a event for single memory type, memory events include all memory accessing so it can display the data accessing cross memory levels in the same view; - The second benefit is the sample generation might introduce a big overhead and need to wait for long time for Perf reporting, we can specify itrace option '--itrace=M' to filter out other events and only output memory events, this can significantly reduce the overhead caused by generating samples. This patch is to enable memory event for Arm SPE. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Al Grant Cc: Andre Przywara Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Cc: Will Deacon Signed-off-by: James Clark Link: https://lore.kernel.org/r/20210211133856.2137-5-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index de70a8f3ff36..0ae294a58402 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -53,6 +53,7 @@ struct arm_spe { u8 sample_tlb; u8 sample_branch; u8 sample_remote_access; + u8 sample_memory; u64 l1d_miss_id; u64 l1d_access_id; @@ -62,6 +63,7 @@ struct arm_spe { u64 tlb_access_id; u64 branch_miss_id; u64 remote_access_id; + u64 memory_id; u64 kernel_start; @@ -293,6 +295,18 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, return arm_spe_deliver_synth_event(spe, speq, event, &sample); } +#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \ + ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \ + ARM_SPE_REMOTE_ACCESS) + +static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) +{ + if (type & SPE_MEM_TYPE) + return true; + + return false; +} + static int arm_spe_sample(struct arm_spe_queue *speq) { const struct arm_spe_record *record = &speq->decoder->record; @@ -354,6 +368,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } + if (spe->sample_memory && arm_spe__is_memory_event(record->type)) { + err = arm_spe__synth_mem_sample(speq, spe->memory_id); + if (err) + return err; + } + return 0; } @@ -917,6 +937,16 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; } + if (spe->synth_opts.mem) { + spe->sample_memory = true; + + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->memory_id = id; + arm_spe_set_event_name(evlist, id, "memory"); + } + return 0; } -- Gitee From 208ef62566c062773192cbf155af8876419f3d40 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:29 +0800 Subject: [PATCH 100/173] perf arm-spe: Set sample's data source field [Upstream commit a89dbc9b988f3ba8700df3c58614744de0c5043f] The sample structure contains the field 'data_src' which is used to tell the data operation attributions, e.g. operation type is loading or storing, cache level, it's snooping or remote accessing, etc. At the end, the 'data_src' will be parsed by perf mem/c2c tools to display human readable strings. This patch is to fill the 'data_src' field in the synthesized samples base on different types. Currently perf tool can display statistics for L1/L2/L3 caches but it doesn't support the 'last level cache'. To fit to current implementation, 'data_src' field uses L3 cache for last level cache. Before this commit, perf mem report looks like this: # Samples: 75K of event 'l1d-miss' # Total weight : 75951 # Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked # # Overhead Samples Local Weight Memory access Symbol Shared Object Data Symbol Data Object Snoop TLB access # ........ ....... ............ ............. ...................... ............. ...................... ........... ..... .......... # 81.56% 61945 0 N/A [.] 0x00000000000009d8 serial_c [.] 0000000000000000 [unknown] N/A N/A 18.44% 14003 0 N/A [.] 0x0000000000000828 serial_c [.] 0000000000000000 [unknown] N/A N/A Now on a system with Arm SPE, addresses and access types are displayed: # Samples: 75K of event 'l1d-miss' # Total weight : 75951 # Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked # # Overhead Samples Local Weight Memory access Symbol Shared Object Data Symbol Data Object Snoop TLB access # ........ ....... ............ ............. ...................... ............. ...................... ........... ..... .......... # 0.43% 324 0 L1 miss [.] 0x00000000000009d8 serial_c [.] 0x0000ffff80794e00 anon N/A Walker hit 0.42% 322 0 L1 miss [.] 0x00000000000009d8 serial_c [.] 0x0000ffff80794580 anon N/A Walker hit Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Al Grant Cc: Andre Przywara Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wei Li Cc: Will Deacon Signed-off-by: James Clark Link: https://lore.kernel.org/r/20210211133856.2137-6-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 69 ++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 0ae294a58402..2539d4baec44 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -261,7 +261,7 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, } static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, - u64 spe_events_id) + u64 spe_events_id, u64 data_src) { struct arm_spe *spe = speq->spe; struct arm_spe_record *record = &speq->decoder->record; @@ -274,6 +274,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, sample.stream_id = spe_events_id; sample.addr = record->virt_addr; sample.phys_addr = record->phys_addr; + sample.data_src = data_src; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -307,21 +308,66 @@ static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) return false; } +static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) +{ + union perf_mem_data_src data_src = { 0 }; + + if (record->op == ARM_SPE_LD) + data_src.mem_op = PERF_MEM_OP_LOAD; + else + data_src.mem_op = PERF_MEM_OP_STORE; + + if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { + data_src.mem_lvl = PERF_MEM_LVL_L3; + + if (record->type & ARM_SPE_LLC_MISS) + data_src.mem_lvl |= PERF_MEM_LVL_MISS; + else + data_src.mem_lvl |= PERF_MEM_LVL_HIT; + } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { + data_src.mem_lvl = PERF_MEM_LVL_L1; + + if (record->type & ARM_SPE_L1D_MISS) + data_src.mem_lvl |= PERF_MEM_LVL_MISS; + else + data_src.mem_lvl |= PERF_MEM_LVL_HIT; + } + + if (record->type & ARM_SPE_REMOTE_ACCESS) + data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1; + + if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { + data_src.mem_dtlb = PERF_MEM_TLB_WK; + + if (record->type & ARM_SPE_TLB_MISS) + data_src.mem_dtlb |= PERF_MEM_TLB_MISS; + else + data_src.mem_dtlb |= PERF_MEM_TLB_HIT; + } + + return data_src.val; +} + static int arm_spe_sample(struct arm_spe_queue *speq) { const struct arm_spe_record *record = &speq->decoder->record; struct arm_spe *spe = speq->spe; + u64 data_src; int err; + data_src = arm_spe__synth_data_source(record); + if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { - err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id); + err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, + data_src); if (err) return err; } if (record->type & ARM_SPE_L1D_ACCESS) { - err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id); + err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, + data_src); if (err) return err; } @@ -329,13 +375,15 @@ static int arm_spe_sample(struct arm_spe_queue *speq) if (spe->sample_llc) { if (record->type & ARM_SPE_LLC_MISS) { - err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id); + err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, + data_src); if (err) return err; } if (record->type & ARM_SPE_LLC_ACCESS) { - err = arm_spe__synth_mem_sample(speq, spe->llc_access_id); + err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, + data_src); if (err) return err; } @@ -343,13 +391,15 @@ static int arm_spe_sample(struct arm_spe_queue *speq) if (spe->sample_tlb) { if (record->type & ARM_SPE_TLB_MISS) { - err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id); + err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, + data_src); if (err) return err; } if (record->type & ARM_SPE_TLB_ACCESS) { - err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id); + err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, + data_src); if (err) return err; } @@ -363,13 +413,14 @@ static int arm_spe_sample(struct arm_spe_queue *speq) if (spe->sample_remote_access && (record->type & ARM_SPE_REMOTE_ACCESS)) { - err = arm_spe__synth_mem_sample(speq, spe->remote_access_id); + err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, + data_src); if (err) return err; } if (spe->sample_memory && arm_spe__is_memory_event(record->type)) { - err = arm_spe__synth_mem_sample(speq, spe->memory_id); + err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); if (err) return err; } -- Gitee From 1e5b6d37976a248a998e43fed048da50aad8b91f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 31 Dec 2021 13:32:30 +0800 Subject: [PATCH 101/173] perf arm-spe: Avoid potential buffer overrun [Upstream commit 92f1e8adf7db2ef9b90e5662182810c0cf8ac22e] SPE extended headers are > 1 byte so ensure the buffer contains at least this before reading. This issue was detected by fuzzing. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andre Przywara Cc: Dave Martin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20210407153955.317215-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index f3ac9d40cebf..2e5eff4f8f03 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -210,8 +210,10 @@ static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_EXTENDED) { /* 16-bit extended format header */ - ext_hdr = 1; + if (len == 1) + return ARM_SPE_BAD_PACKET; + ext_hdr = 1; hdr = buf[1]; if (hdr == SPE_HEADER1_ALIGNMENT) return arm_spe_get_alignment(buf, len, packet); -- Gitee From 674b473d2baad67ce1def920fc82b58c067a9ed3 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:31 +0800 Subject: [PATCH 102/173] perf arm-spe: Correct sample flags for SPE event [Upstream commit 2f021954952f105e32383103cac06a095d9a1db6] Now it's hard code to set sample flags for CPU, TIME and TID for SPE event, which is pointless. The CPU is useful for sampling only for per-mmap case, it is used to indicate the AUX trace is associated to which CPU. The TIME sample is not needed for AUX event, since the time for AUX event is not really used and this time is a different thing from the timestamp in Arm SPE trace, the timestamp tracing which is controlled by Arm SPE's config bit. The TID sample is not useful for AUX event. This patch corrects the sample flags for SPE event, it only set CPU sample bit for per-cpu mmap case. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20210519041546.1574961-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/arm64/util/arm-spe.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index e3593063b3d1..35e7c7e774c2 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -68,6 +68,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, container_of(itr, struct arm_spe_recording, itr); struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; struct evsel *evsel, *arm_spe_evsel = NULL; + struct perf_cpu_map *cpus = evlist->core.cpus; bool privileged = perf_event_paranoid_check(-1); struct evsel *tracking_evsel; int err; @@ -120,9 +121,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, */ perf_evlist__to_front(evlist, arm_spe_evsel); - evsel__set_sample_bit(arm_spe_evsel, CPU); - evsel__set_sample_bit(arm_spe_evsel, TIME); - evsel__set_sample_bit(arm_spe_evsel, TID); + /* In the case of per-cpu mmaps, sample CPU for AUX event. */ + if (!perf_cpu_map__empty(cpus)) + evsel__set_sample_bit(arm_spe_evsel, CPU); /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); -- Gitee From b198ae22cbe6f48b6a69b908675d2e80d4c743a4 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:32 +0800 Subject: [PATCH 103/173] perf arm-spe: Correct sample flags for dummy event [Upstream commit e582badf1706a005359ef3e184fa97384e93f83a] The dummy event is mainly used for mmap, the TIME sample is only needed for per-cpu case so that the perf tool can rely on the correct timing for parsing symbols. And the CPU sample is useless for mmap. The BRANCH_STACK sample bit will be always reset for the dummy event in the function evsel__config(), so don't need to repeatedly reset it for Arm SPE specific. So this patch only enables TIME sample for per-cpu mmap. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20210519041546.1574961-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/arm64/util/arm-spe.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 35e7c7e774c2..119d8d7656b9 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -135,9 +135,10 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; - evsel__set_sample_bit(tracking_evsel, TIME); - evsel__set_sample_bit(tracking_evsel, CPU); - evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + + /* In per-cpu case, always need the time of mmap events etc */ + if (!perf_cpu_map__empty(cpus)) + evsel__set_sample_bit(tracking_evsel, TIME); return 0; } -- Gitee From 926412c59d712edd7d46f7c76d17b8d3a5eaea1c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Jul 2020 13:11:24 +0800 Subject: [PATCH 104/173] sched_clock: Expose struct clock_read_data [Upstream commit 1b86abc1c645ad5c9c7bf70910cb3ce73939d2d7] In order to support perf_event_mmap_page::cap_time features, an architecture needs, aside from a userspace readable counter register, to expose the exact clock data so that userspace can convert the counter register into a correct timestamp. Provide struct clock_read_data and two (seqcount) helpers so that architectures (arm64 in specific) can expose the numbers to userspace. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20200716051130.4359-2-leo.yan@linaro.org Signed-off-by: Will Deacon Signed-off-by: huwentao --- include/linux/sched_clock.h | 28 +++++++++++++++++++++++++ kernel/time/sched_clock.c | 41 ++++++++++++------------------------- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 0bb04a96a6d4..528718e4ed52 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -6,6 +6,34 @@ #define LINUX_SCHED_CLOCK #ifdef CONFIG_GENERIC_SCHED_CLOCK +/** + * struct clock_read_data - data required to read from sched_clock() + * + * @epoch_ns: sched_clock() value at last update + * @epoch_cyc: Clock cycle value at last update. + * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit + * clocks. + * @read_sched_clock: Current clock source (or dummy source when suspended). + * @mult: Multipler for scaled math conversion. + * @shift: Shift value for scaled math conversion. + * + * Care must be taken when updating this structure; it is read by + * some very hot code paths. It occupies <=40 bytes and, when combined + * with the seqcount used to synchronize access, comfortably fits into + * a 64 byte cache line. + */ +struct clock_read_data { + u64 epoch_ns; + u64 epoch_cyc; + u64 sched_clock_mask; + u64 (*read_sched_clock)(void); + u32 mult; + u32 shift; +}; + +extern struct clock_read_data *sched_clock_read_begin(unsigned int *seq); +extern int sched_clock_read_retry(unsigned int seq); + extern void generic_sched_clock_init(void); extern void sched_clock_register(u64 (*read)(void), int bits, diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index a5538dd76a81..fb3e4b9fd1bc 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -19,31 +19,6 @@ #include "timekeeping.h" -/** - * struct clock_read_data - data required to read from sched_clock() - * - * @epoch_ns: sched_clock() value at last update - * @epoch_cyc: Clock cycle value at last update. - * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit - * clocks. - * @read_sched_clock: Current clock source (or dummy source when suspended). - * @mult: Multipler for scaled math conversion. - * @shift: Shift value for scaled math conversion. - * - * Care must be taken when updating this structure; it is read by - * some very hot code paths. It occupies <=40 bytes and, when combined - * with the seqcount used to synchronize access, comfortably fits into - * a 64 byte cache line. - */ -struct clock_read_data { - u64 epoch_ns; - u64 epoch_cyc; - u64 sched_clock_mask; - u64 (*read_sched_clock)(void); - u32 mult; - u32 shift; -}; - /** * struct clock_data - all data needed for sched_clock() (including * registration of a new clock source) @@ -93,6 +68,17 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) return (cyc * mult) >> shift; } +struct clock_read_data *sched_clock_read_begin(unsigned int *seq) +{ + *seq = raw_read_seqcount(&cd.seq); + return cd.read_data + (*seq & 1); +} + +int sched_clock_read_retry(unsigned int seq) +{ + return read_seqcount_retry(&cd.seq, seq); +} + unsigned long long notrace sched_clock(void) { u64 cyc, res; @@ -100,13 +86,12 @@ unsigned long long notrace sched_clock(void) struct clock_read_data *rd; do { - seq = raw_read_seqcount(&cd.seq); - rd = cd.read_data + (seq & 1); + rd = sched_clock_read_begin(&seq); cyc = (rd->read_sched_clock() - rd->epoch_cyc) & rd->sched_clock_mask; res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift); - } while (read_seqcount_retry(&cd.seq, seq)); + } while (sched_clock_read_retry(seq)); return res; } -- Gitee From 514db0bb4a934d281b6535c46882c0b5e9d7bfc3 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 16 Jul 2020 13:11:25 +0800 Subject: [PATCH 105/173] time/sched_clock: Use raw_read_seqcount_latch() [Upstream commit aadd6e5caaacd6feca9691ba30536e7de5a7d152] sched_clock uses seqcount_t latching to switch between two storage places protected by the sequence counter. This allows it to have interruptible, NMI-safe, seqcount_t write side critical sections. Since 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()"), raw_read_seqcount_latch() became the standardized way for seqcount_t latch read paths. Due to the dependent load, it also has one read memory barrier less than the currently used raw_read_seqcount() API. Use raw_read_seqcount_latch() for the seqcount_t latch read path. Signed-off-by: Ahmed S. Darwish Signed-off-by: Leo Yan Link: https://lkml.kernel.org/r/20200625085745.GD117543@hirez.programming.kicks-ass.net Link: https://lkml.kernel.org/r/20200715092345.GA231464@debian-buster-darwi.lab.linutronix.de Link: https://lore.kernel.org/r/20200716051130.4359-3-leo.yan@linaro.org References: 1809bfa44e10 ("timers, sched/clock: Avoid deadlock during read from NMI") Signed-off-by: Will Deacon Signed-off-by: huwentao --- kernel/time/sched_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index fb3e4b9fd1bc..0bd10f4c975f 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -70,7 +70,7 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) struct clock_read_data *sched_clock_read_begin(unsigned int *seq) { - *seq = raw_read_seqcount(&cd.seq); + *seq = raw_read_seqcount_latch(&cd.seq); return cd.read_data + (*seq & 1); } -- Gitee From 6217cd88ec1baa79d7f59eab8c980f70c1133db2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Jul 2020 13:11:26 +0800 Subject: [PATCH 106/173] arm64: perf: Implement correct cap_user_time [Upstream commit 950b74ddefc4a42add8b1ae0170aa309338ffe73] As reported by Leo; the existing implementation is broken when the clock and counter don't intersect at 0. Use the sched_clock's struct clock_read_data information to correctly implement cap_user_time and cap_user_time_zero. Note that the ARM64 counter is architecturally only guaranteed to be 56bit wide (implementations are allowed to be wider) and the existing perf ABI cannot deal with wrap-around. This implementation should also be faster than the old; seeing how we don't need to recompute mult and shift all the time. [leoyan: Use mul_u64_u32_shr() to convert cyc to ns to avoid overflow] Reported-by: Leo Yan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20200716051130.4359-4-leo.yan@linaro.org Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/kernel/perf_event.c | 38 ++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 0b461bc3d06c..bb116079a681 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1372,28 +1373,47 @@ device_initcall(armv8_pmu_driver_init) void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { - u32 freq; - u32 shift; + struct clock_read_data *rd; + unsigned int seq; + u64 ns; /* * Internal timekeeping for enabled/running/stopped times * is always computed with the sched_clock. */ - freq = arch_timer_get_rate(); userpg->cap_user_time = 1; + userpg->cap_user_time_zero = 1; + + do { + rd = sched_clock_read_begin(&seq); + + userpg->time_mult = rd->mult; + userpg->time_shift = rd->shift; + userpg->time_zero = rd->epoch_ns; + + /* + * This isn't strictly correct, the ARM64 counter can be + * 'short' and then we get funnies when it wraps. The correct + * thing would be to extend the perf ABI with a cycle and mask + * value, but because wrapping on ARM64 is very rare in + * practise this 'works'. + */ + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); + userpg->time_zero -= ns; + + } while (sched_clock_read_retry(seq)); + + userpg->time_offset = userpg->time_zero - now; - clocks_calc_mult_shift(&userpg->time_mult, &shift, freq, - NSEC_PER_SEC, 0); /* * time_shift is not expected to be greater than 31 due to * the original published conversion algorithm shifting a * 32-bit value (now specifies a 64-bit value) - refer * perf_event_mmap_page documentation in perf_event.h. */ - if (shift == 32) { - shift = 31; + if (userpg->time_shift == 32) { + userpg->time_shift = 31; userpg->time_mult >>= 1; } - userpg->time_shift = (u16)shift; - userpg->time_offset = -now; + } -- Gitee From 7a45c46f2e989f064aac5baba48eaa970bfcee9c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Jul 2020 13:11:27 +0800 Subject: [PATCH 107/173] arm64: perf: Only advertise cap_user_time for arch_timer [Upstream commit 279a811eb520594fac3cd3a541e6c7ea50072ac9] When sched_clock is running on anything other than arch_timer, don't advertise cap_user_time*. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20200716051130.4359-5-leo.yan@linaro.org Requested-by: Will Deacon Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/kernel/perf_event.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index bb116079a681..7640084686f4 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -13,6 +13,8 @@ #include #include +#include + #include #include #include @@ -1377,16 +1379,15 @@ void arch_perf_update_userpage(struct perf_event *event, unsigned int seq; u64 ns; - /* - * Internal timekeeping for enabled/running/stopped times - * is always computed with the sched_clock. - */ - userpg->cap_user_time = 1; - userpg->cap_user_time_zero = 1; + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; do { rd = sched_clock_read_begin(&seq); + if (rd->read_sched_clock != arch_timer_read_counter) + return; + userpg->time_mult = rd->mult; userpg->time_shift = rd->shift; userpg->time_zero = rd->epoch_ns; @@ -1416,4 +1417,10 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->time_mult >>= 1; } + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + userpg->cap_user_time = 1; + userpg->cap_user_time_zero = 1; } -- Gitee From b760da47ae52f6878006e367e4abf65f075b4c6b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Jul 2020 13:11:28 +0800 Subject: [PATCH 108/173] perf: Add perf_event_mmap_page::cap_user_time_short ABI [Upstream commit 6c0246a4588d418f72acd40a7b7601be403d80a9] In order to support short clock counters, provide an ABI extension. As a whole: u64 time, delta, cyc = read_cycle_counter(); + if (cap_user_time_short) + cyc = time_cycle + ((cyc - time_cycle) & time_mask); delta = mul_u64_u32_shr(cyc, time_mult, time_shift); if (cap_user_time_zero) time = time_zero + delta; delta += time_offset; Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20200716051130.4359-6-leo.yan@linaro.org Signed-off-by: Will Deacon Signed-off-by: huwentao --- include/uapi/linux/perf_event.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index f7fb6c83e030..561de8362ccc 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -570,9 +570,10 @@ struct perf_event_mmap_page { cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */ cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ - cap_user_time : 1, /* The time_* fields are used */ + cap_user_time : 1, /* The time_{shift,mult,offset} fields are used */ cap_user_time_zero : 1, /* The time_zero field is used */ - cap_____res : 59; + cap_user_time_short : 1, /* the time_{cycle,mask} fields are used */ + cap_____res : 58; }; }; @@ -631,13 +632,29 @@ struct perf_event_mmap_page { * ((rem * time_mult) >> time_shift); */ __u64 time_zero; + __u32 size; /* Header size up to __reserved[] fields. */ + __u32 __reserved_1; + + /* + * If cap_usr_time_short, the hardware clock is less than 64bit wide + * and we must compute the 'cyc' value, as used by cap_usr_time, as: + * + * cyc = time_cycles + ((cyc - time_cycles) & time_mask) + * + * NOTE: this form is explicitly chosen such that cap_usr_time_short + * is a correction on top of cap_usr_time, and code that doesn't + * know about cap_usr_time_short still works under the assumption + * the counter doesn't wrap. + */ + __u64 time_cycles; + __u64 time_mask; /* * Hole for extension of the self monitor capabilities */ - __u8 __reserved[118*8+4]; /* align to 1k. */ + __u8 __reserved[116*8]; /* align to 1k. */ /* * Control data for the mmap() data buffer. -- Gitee From 08fcb413cd335a0afe3f09112ff7b11b02987bf9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Jul 2020 13:11:29 +0800 Subject: [PATCH 109/173] arm64: perf: Add cap_user_time_short [Upstream commit c8f9eb0d6ebaa768c9f6eb2ee21b01d74230934d] This completes the ARM64 cap_user_time support. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20200716051130.4359-7-leo.yan@linaro.org Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/kernel/perf_event.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 7640084686f4..dc3c99b304d0 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1381,6 +1381,7 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time = 0; userpg->cap_user_time_zero = 0; + userpg->cap_user_time_short = 0; do { rd = sched_clock_read_begin(&seq); @@ -1391,13 +1392,13 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->time_mult = rd->mult; userpg->time_shift = rd->shift; userpg->time_zero = rd->epoch_ns; + userpg->time_cycles = rd->epoch_cyc; + userpg->time_mask = rd->sched_clock_mask; /* - * This isn't strictly correct, the ARM64 counter can be - * 'short' and then we get funnies when it wraps. The correct - * thing would be to extend the perf ABI with a cycle and mask - * value, but because wrapping on ARM64 is very rare in - * practise this 'works'. + * Subtract the cycle base, such that software that + * doesn't know about cap_user_time_short still 'works' + * assuming no wraps. */ ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); userpg->time_zero -= ns; @@ -1423,4 +1424,5 @@ void arch_perf_update_userpage(struct perf_event *event, */ userpg->cap_user_time = 1; userpg->cap_user_time_zero = 1; + userpg->cap_user_time_short = 1; } -- Gitee From e7bf4283ee6ddb5d0fc2a6a0bd4e9d17486eebbe Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 16 Jul 2020 13:11:30 +0800 Subject: [PATCH 110/173] tools headers UAPI: Update tools's copy of linux/perf_event.h [Upstream commit 5271d915a99c696a2f16ae59cf6a037be35afa22] To get the changes in the commit: "perf: Add perf_event_mmap_page::cap_user_time_short ABI" This update is a prerequisite to add support for short clock counters related ABI extension. Signed-off-by: Leo Yan Link: https://lore.kernel.org/r/20200716051130.4359-8-leo.yan@linaro.org Signed-off-by: Will Deacon Signed-off-by: huwentao --- tools/include/uapi/linux/perf_event.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 59a1c0172470..394d633800b6 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -553,9 +553,10 @@ struct perf_event_mmap_page { cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */ cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ - cap_user_time : 1, /* The time_* fields are used */ + cap_user_time : 1, /* The time_{shift,mult,offset} fields are used */ cap_user_time_zero : 1, /* The time_zero field is used */ - cap_____res : 59; + cap_user_time_short : 1, /* the time_{cycle,mask} fields are used */ + cap_____res : 58; }; }; @@ -614,13 +615,29 @@ struct perf_event_mmap_page { * ((rem * time_mult) >> time_shift); */ __u64 time_zero; + __u32 size; /* Header size up to __reserved[] fields. */ + __u32 __reserved_1; + + /* + * If cap_usr_time_short, the hardware clock is less than 64bit wide + * and we must compute the 'cyc' value, as used by cap_usr_time, as: + * + * cyc = time_cycles + ((cyc - time_cycles) & time_mask) + * + * NOTE: this form is explicitly chosen such that cap_usr_time_short + * is a correction on top of cap_usr_time, and code that doesn't + * know about cap_usr_time_short still works under the assumption + * the counter doesn't wrap. + */ + __u64 time_cycles; + __u64 time_mask; /* * Hole for extension of the self monitor capabilities */ - __u8 __reserved[118*8+4]; /* align to 1k. */ + __u8 __reserved[116*8]; /* align to 1k. */ /* * Control data for the mmap() data buffer. -- Gitee From c049ced07656ae38de14035dd099181be47de732 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 14 Sep 2020 19:53:06 +0800 Subject: [PATCH 111/173] perf tsc: Move out common functions from x86 [Upstream commit 03fca3af51703bd77ed14c88c3d5733840a69f3f] Functions perf_read_tsc_conversion() and perf_event__synth_time_conv() should work as common functions rather than x86 specific, so move these two functions out from arch/x86 folder and place them into util/tsc.c. Since the function perf_event__synth_time_conv() will be linked in util/tsc.c, remove its weak version. Committer testing: Before/after: # perf test tsc 70: Convert perf time to TSC : Ok # # perf test -v tsc 70: Convert perf time to TSC : --- start --- test child forked, pid 8520 mmap size 528384B 1st event perf time 592110439891 tsc 2317172044331 rdtsc time 592110441915 tsc 2317172052010 2nd event perf time 592110442336 tsc 2317172053605 test child finished with 0 ---- end ---- Convert perf time to TSC: Ok # Signed-off-by: Leo Yan Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Kemeng Shi Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Gasson Cc: Peter Zijlstra Cc: Remi Bernon Cc: Stephane Eranian Cc: Steve Maclean Cc: Will Deacon Cc: Zou Wei Link: http://lore.kernel.org/lkml/20200914115311.2201-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/x86/util/tsc.c | 73 +----------------------------- tools/perf/util/synthetic-events.c | 8 ---- tools/perf/util/tsc.c | 71 +++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 80 deletions(-) diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 2f55afb14e1f..559365f8fe52 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -1,45 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include - -#include -#include - #include -#include -#include "../../../util/debug.h" -#include "../../../util/event.h" -#include "../../../util/synthetic-events.h" -#include "../../../util/tsc.h" - -int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, - struct perf_tsc_conversion *tc) -{ - bool cap_user_time_zero; - u32 seq; - int i = 0; - - while (1) { - seq = pc->lock; - rmb(); - tc->time_mult = pc->time_mult; - tc->time_shift = pc->time_shift; - tc->time_zero = pc->time_zero; - cap_user_time_zero = pc->cap_user_time_zero; - rmb(); - if (pc->lock == seq && !(seq & 1)) - break; - if (++i > 10000) { - pr_debug("failed to get perf_event_mmap_page lock\n"); - return -EINVAL; - } - } - if (!cap_user_time_zero) - return -EOPNOTSUPP; - - return 0; -} +#include "../../../util/tsc.h" u64 rdtsc(void) { @@ -49,36 +11,3 @@ u64 rdtsc(void) return low | ((u64)high) << 32; } - -int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, - struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) -{ - union perf_event event = { - .time_conv = { - .header = { - .type = PERF_RECORD_TIME_CONV, - .size = sizeof(struct perf_record_time_conv), - }, - }, - }; - struct perf_tsc_conversion tc; - int err; - - if (!pc) - return 0; - err = perf_read_tsc_conversion(pc, &tc); - if (err == -EOPNOTSUPP) - return 0; - if (err) - return err; - - pr_debug2("Synthesizing TSC conversion information\n"); - - event.time_conv.time_mult = tc.time_mult; - event.time_conv.time_shift = tc.time_shift; - event.time_conv.time_zero = tc.time_zero; - - return process(tool, &event, NULL, machine); -} diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 96e2adca2719..b18cf5d1af44 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1852,14 +1852,6 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p return 0; } -int __weak perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, - struct perf_tool *tool __maybe_unused, - perf_event__handler_t process __maybe_unused, - struct machine *machine __maybe_unused) -{ - return 0; -} - extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index bfa782421cbd..9e3f04ddddf8 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -1,7 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 +#include + #include +#include +#include #include +#include + +#include "event.h" +#include "synthetic-events.h" +#include "debug.h" #include "tsc.h" u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc) @@ -25,6 +34,68 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) ((rem * tc->time_mult) >> tc->time_shift); } +int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, + struct perf_tsc_conversion *tc) +{ + bool cap_user_time_zero; + u32 seq; + int i = 0; + + while (1) { + seq = pc->lock; + rmb(); + tc->time_mult = pc->time_mult; + tc->time_shift = pc->time_shift; + tc->time_zero = pc->time_zero; + cap_user_time_zero = pc->cap_user_time_zero; + rmb(); + if (pc->lock == seq && !(seq & 1)) + break; + if (++i > 10000) { + pr_debug("failed to get perf_event_mmap_page lock\n"); + return -EINVAL; + } + } + + if (!cap_user_time_zero) + return -EOPNOTSUPP; + + return 0; +} + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event event = { + .time_conv = { + .header = { + .type = PERF_RECORD_TIME_CONV, + .size = sizeof(struct perf_record_time_conv), + }, + }, + }; + struct perf_tsc_conversion tc; + int err; + + if (!pc) + return 0; + err = perf_read_tsc_conversion(pc, &tc); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + pr_debug2("Synthesizing TSC conversion information\n"); + + event.time_conv.time_mult = tc.time_mult; + event.time_conv.time_shift = tc.time_shift; + event.time_conv.time_zero = tc.time_zero; + + return process(tool, &event, NULL, machine); +} + u64 __weak rdtsc(void) { return 0; -- Gitee From c5e0a79fa46aeff13d498c4c69e0fd4a173738f7 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 14 Sep 2020 19:53:07 +0800 Subject: [PATCH 112/173] perf tsc: Add rdtsc() for Arm64 [Upstream commit 4979e861415d59a479c7376e16d90df83da94bb1] The system register CNTVCT_EL0 can be used to retrieve the counter from user space. Add rdtsc() for Arm64. Signed-off-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Kemeng Shi Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Gasson Cc: Peter Zijlstra Cc: Remi Bernon Cc: Stephane Eranian Cc: Steve Maclean Cc: Will Deacon Cc: Zou Wei Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200914115311.2201-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/tsc.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tools/perf/arch/arm64/util/tsc.c diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index b4a627a37d11..e4c044686116 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,6 +1,7 @@ perf-y += header.o perf-y += perf_regs.o perf-y += sym-handling.o +perf-y += tsc.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/tsc.c b/tools/perf/arch/arm64/util/tsc.c new file mode 100644 index 000000000000..cc85bd9e73f1 --- /dev/null +++ b/tools/perf/arch/arm64/util/tsc.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include "../../../util/tsc.h" + +u64 rdtsc(void) +{ + u64 val; + + /* + * According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the + * system counter is at least 56 bits wide; from Armv8.6, the counter + * must be 64 bits wide. So the system counter could be less than 64 + * bits wide and it is attributed with the flag 'cap_user_time_short' + * is true. + */ + asm volatile("mrs %0, cntvct_el0" : "=r" (val)); + + return val; +} -- Gitee From eeca60b87381fc38dd1e904c095c714f7c0f996a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 14 Sep 2020 19:53:08 +0800 Subject: [PATCH 113/173] perf tsc: Calculate timestamp with cap_user_time_short [Upstream commit 78a93d4cec6bc7c6011319d2b8f6509d96de186f] The perf mmap'ed buffer contains the flag 'cap_user_time_short' and two extra fields 'time_cycles' and 'time_mask', perf tool needs to know them for handling the counter wrapping case. This patch is to reads out the relevant parameters from the head of the first mmap'ed page and stores into the structure 'perf_tsc_conversion', if the flag 'cap_user_time_short' has been set, it will firstly calibrate cycle value for timestamp calculation. Committer testing: Before/after: # perf test tsc 70: Convert perf time to TSC : Ok # # perf test -v tsc 70: Convert perf time to TSC : --- start --- test child forked, pid 11059 mmap size 528384B 1st event perf time 996384576521 tsc 3850532906613 rdtsc time 996384578455 tsc 3850532913950 2nd event perf time 996384578845 tsc 3850532915428 test child finished with 0 ---- end ---- Convert perf time to TSC: Ok # Signed-off-by: Leo Yan Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Kemeng Shi Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Gasson Cc: Peter Zijlstra Cc: Remi Bernon Cc: Stephane Eranian Cc: Steve Maclean Cc: Will Deacon Cc: Zou Wei Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200914115311.2201-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/tsc.c | 12 +++++++++--- tools/perf/util/tsc.h | 5 +++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 9e3f04ddddf8..c0ca40204649 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -28,6 +28,10 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) { u64 quot, rem; + if (tc->cap_user_time_short) + cyc = tc->time_cycles + + ((cyc - tc->time_cycles) & tc->time_mask); + quot = cyc >> tc->time_shift; rem = cyc & (((u64)1 << tc->time_shift) - 1); return tc->time_zero + quot * tc->time_mult + @@ -37,7 +41,6 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, struct perf_tsc_conversion *tc) { - bool cap_user_time_zero; u32 seq; int i = 0; @@ -47,7 +50,10 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, tc->time_mult = pc->time_mult; tc->time_shift = pc->time_shift; tc->time_zero = pc->time_zero; - cap_user_time_zero = pc->cap_user_time_zero; + tc->time_cycles = pc->time_cycles; + tc->time_mask = pc->time_mask; + tc->cap_user_time_zero = pc->cap_user_time_zero; + tc->cap_user_time_short = pc->cap_user_time_short; rmb(); if (pc->lock == seq && !(seq & 1)) break; @@ -57,7 +63,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, } } - if (!cap_user_time_zero) + if (!tc->cap_user_time_zero) return -EOPNOTSUPP; return 0; diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 3c5a632ee57c..72a15419f3b3 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -8,6 +8,11 @@ struct perf_tsc_conversion { u16 time_shift; u32 time_mult; u64 time_zero; + u64 time_cycles; + u64 time_mask; + + bool cap_user_time_zero; + bool cap_user_time_short; }; struct perf_event_mmap_page; -- Gitee From 4e402d0d232810620ae95bb47c2a4fa14c4286ca Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 14 Sep 2020 19:53:09 +0800 Subject: [PATCH 114/173] perf tsc: Support cap_user_time_short for event TIME_CONV [Upstream commit d110162cafc80dad0622cfd40f3113aebb77e1bb] The synthesized event TIME_CONV doesn't contain the complete parameters for counters, this will lead to wrong conversion between counter cycles and timestamp. This patch extends event TIME_CONV to record flags 'cap_user_time_zero' which is used to indicate the counter parameters are valid or not, if not will directly return 0 for timestamp calculation. And record the flag 'cap_user_time_short' and its relevant fields 'time_cycles' and 'time_mask' for cycle calibration. Signed-off-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Jiri Olsa Cc: John Garry Cc: Kemeng Shi Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Gasson Cc: Peter Zijlstra Cc: Remi Bernon Cc: Stephane Eranian Cc: Steve Maclean Cc: Will Deacon Cc: Zou Wei Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20200914115311.2201-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/lib/include/perf/event.h | 4 ++++ tools/perf/util/jitdump.c | 14 +++++++++----- tools/perf/util/tsc.c | 4 ++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/perf/lib/include/perf/event.h b/tools/perf/lib/include/perf/event.h index 69b44d2cc0f5..d5be1dda827f 100644 --- a/tools/perf/lib/include/perf/event.h +++ b/tools/perf/lib/include/perf/event.h @@ -316,6 +316,10 @@ struct perf_record_time_conv { __u64 time_shift; __u64 time_mult; __u64 time_zero; + __u64 time_cycles; + __u64 time_mask; + bool cap_user_time_zero; + bool cap_user_time_short; }; struct perf_record_header_feature { diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index e3ccb0ce1938..759864037587 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -373,11 +373,15 @@ static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) if (!jd->use_arch_timestamp) return timestamp; - tc.time_shift = jd->session->time_conv.time_shift; - tc.time_mult = jd->session->time_conv.time_mult; - tc.time_zero = jd->session->time_conv.time_zero; - - if (!tc.time_mult) + tc.time_shift = jd->session->time_conv.time_shift; + tc.time_mult = jd->session->time_conv.time_mult; + tc.time_zero = jd->session->time_conv.time_zero; + tc.time_cycles = jd->session->time_conv.time_cycles; + tc.time_mask = jd->session->time_conv.time_mask; + tc.cap_user_time_zero = jd->session->time_conv.cap_user_time_zero; + tc.cap_user_time_short = jd->session->time_conv.cap_user_time_short; + + if (!tc.cap_user_time_zero) return 0; return tsc_to_perf_time(timestamp, &tc); diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index c0ca40204649..62b4c75c966c 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -98,6 +98,10 @@ int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, event.time_conv.time_mult = tc.time_mult; event.time_conv.time_shift = tc.time_shift; event.time_conv.time_zero = tc.time_zero; + event.time_conv.time_cycles = tc.time_cycles; + event.time_conv.time_mask = tc.time_mask; + event.time_conv.cap_user_time_zero = tc.cap_user_time_zero; + event.time_conv.cap_user_time_short = tc.cap_user_time_short; return process(tool, &event, NULL, machine); } -- Gitee From f1139e355dfd628d984372c7874fe77f6b3249ba Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 28 Apr 2021 20:09:12 +0800 Subject: [PATCH 115/173] perf tools: Change fields type in perf_record_time_conv [Upstream commit e1d380ea8b00db4bb14d1f513000d4b62aa9d3f0] C standard claims "An object declared as type _Bool is large enough to store the values 0 and 1", bool type size can be 1 byte or larger than 1 byte. Thus it's uncertian for bool type size with different compilers. This patch changes the bool type in structure perf_record_time_conv to __u8 type, and pads extra bytes for 8-byte alignment; this can give reliable structure size. Fixes: d110162cafc8 ("perf tsc: Support cap_user_time_short for event TIME_CONV") Suggested-by: Adrian Hunter Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Gustavo A. R. Silva Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steve MacLean Cc: Yonatan Goldschmidt Link: https://lore.kernel.org/r/20210428120915.7123-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/lib/include/perf/event.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/lib/include/perf/event.h b/tools/perf/lib/include/perf/event.h index d5be1dda827f..f106e479f8c0 100644 --- a/tools/perf/lib/include/perf/event.h +++ b/tools/perf/lib/include/perf/event.h @@ -318,8 +318,9 @@ struct perf_record_time_conv { __u64 time_zero; __u64 time_cycles; __u64 time_mask; - bool cap_user_time_zero; - bool cap_user_time_short; + __u8 cap_user_time_zero; + __u8 cap_user_time_short; + __u8 reserved[6]; /* For alignment */ }; struct perf_record_header_feature { -- Gitee From 28b818a47644f93b2de149f4ea5c5f90a692968a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 28 Apr 2021 20:09:13 +0800 Subject: [PATCH 116/173] perf jit: Let convert_timestamp() to be backwards-compatible [Upstream commit aa616f5a8a2d22a179d5502ebd85045af66fa656] Commit d110162cafc80dad ("perf tsc: Support cap_user_time_short for event TIME_CONV") supports the extended parameters for event TIME_CONV, but it broke the backwards compatibility, so any perf data file with old event format fails to convert timestamp. This patch introduces a helper event_contains() to check if an event contains a specific member or not. For the backwards-compatibility, if the event size confirms the extended parameters are supported in the event TIME_CONV, then copies these parameters. Committer notes: To make this compiler backwards compatible add this patch: - struct perf_tsc_conversion tc = { 0 }; + struct perf_tsc_conversion tc = { .time_shift = 0, }; Fixes: d110162cafc8 ("perf tsc: Support cap_user_time_short for event TIME_CONV") Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Gustavo A. R. Silva Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steve MacLean Cc: Yonatan Goldschmidt Link: https://lore.kernel.org/r/20210428120915.7123-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/lib/include/perf/event.h | 2 ++ tools/perf/util/jitdump.c | 30 +++++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tools/perf/lib/include/perf/event.h b/tools/perf/lib/include/perf/event.h index f106e479f8c0..4768d5035813 100644 --- a/tools/perf/lib/include/perf/event.h +++ b/tools/perf/lib/include/perf/event.h @@ -8,6 +8,8 @@ #include #include /* pid_t */ +#define event_contains(obj, mem) ((obj).header.size > offsetof(typeof(obj), mem)) + struct perf_record_mmap { struct perf_event_header header; __u32 pid, tid; diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 759864037587..35871d080e82 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -368,21 +368,31 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) { - struct perf_tsc_conversion tc; + struct perf_tsc_conversion tc = { .time_shift = 0, }; + struct perf_record_time_conv *time_conv = &jd->session->time_conv; if (!jd->use_arch_timestamp) return timestamp; - tc.time_shift = jd->session->time_conv.time_shift; - tc.time_mult = jd->session->time_conv.time_mult; - tc.time_zero = jd->session->time_conv.time_zero; - tc.time_cycles = jd->session->time_conv.time_cycles; - tc.time_mask = jd->session->time_conv.time_mask; - tc.cap_user_time_zero = jd->session->time_conv.cap_user_time_zero; - tc.cap_user_time_short = jd->session->time_conv.cap_user_time_short; + tc.time_shift = time_conv->time_shift; + tc.time_mult = time_conv->time_mult; + tc.time_zero = time_conv->time_zero; - if (!tc.cap_user_time_zero) - return 0; + /* + * The event TIME_CONV was extended for the fields from "time_cycles" + * when supported cap_user_time_short, for backward compatibility, + * checks the event size and assigns these extended fields if these + * fields are contained in the event. + */ + if (event_contains(*time_conv, time_cycles)) { + tc.time_cycles = time_conv->time_cycles; + tc.time_mask = time_conv->time_mask; + tc.cap_user_time_zero = time_conv->cap_user_time_zero; + tc.cap_user_time_short = time_conv->cap_user_time_short; + + if (!tc.cap_user_time_zero) + return 0; + } return tsc_to_perf_time(timestamp, &tc); } -- Gitee From 5de7bb9d246ed39da4af1c8998538d68789f74cb Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 28 Apr 2021 20:09:14 +0800 Subject: [PATCH 117/173] perf session: Add swap operation for event TIME_CONV [Upstream commit 050ffc449008eeeafc187dec337d9cf1518f89bc] Since commit d110162cafc8 ("perf tsc: Support cap_user_time_short for event TIME_CONV"), the event PERF_RECORD_TIME_CONV has extended the data structure for clock parameters. To be backwards-compatible, this patch adds a dedicated swap operation for the event PERF_RECORD_TIME_CONV, based on checking if the event contains field "time_cycles", it can support both for the old and new event formats. Fixes: d110162cafc8 ("perf tsc: Support cap_user_time_short for event TIME_CONV") Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Gustavo A. R. Silva Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steve MacLean Cc: Yonatan Goldschmidt Link: https://lore.kernel.org/r/20210428120915.7123-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/session.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dbf1d7611bf8..3acb2624903c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -911,6 +911,19 @@ static void perf_event__stat_round_swap(union perf_event *event, event->stat_round.time = bswap_64(event->stat_round.time); } +static void perf_event__time_conv_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + event->time_conv.time_shift = bswap_64(event->time_conv.time_shift); + event->time_conv.time_mult = bswap_64(event->time_conv.time_mult); + event->time_conv.time_zero = bswap_64(event->time_conv.time_zero); + + if (event_contains(event->time_conv, time_cycles)) { + event->time_conv.time_cycles = bswap_64(event->time_conv.time_cycles); + event->time_conv.time_mask = bswap_64(event->time_conv.time_mask); + } +} + typedef void (*perf_event__swap_op)(union perf_event *event, bool sample_id_all); @@ -945,7 +958,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_STAT] = perf_event__stat_swap, [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap, [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap, - [PERF_RECORD_TIME_CONV] = perf_event__all64_swap, + [PERF_RECORD_TIME_CONV] = perf_event__time_conv_swap, [PERF_RECORD_HEADER_MAX] = NULL, }; -- Gitee From e95dd007242149683d79d8a6cce804a68b7370a9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 28 Apr 2021 20:09:15 +0800 Subject: [PATCH 118/173] perf session: Dump PERF_RECORD_TIME_CONV event [Upstream commit 81e70d7ee4ae13d60800958bca9d3c7675de16c9] Now perf tool uses the common stub function process_event_op2_stub() for dumping TIME_CONV event, thus it doesn't output the clock parameters contained in the event. This patch adds the callback function for dumping the hardware clock parameters in TIME_CONV event. Before: # perf report -D 0x978 [0x38]: event: 79 . . ... raw event: size 56 bytes . 0000: 4f 00 00 00 00 00 38 00 15 00 00 00 00 00 00 00 O.....8......... . 0010: 00 00 40 01 00 00 00 00 86 89 0b bf df ff ff ff ..@........ . 0020: d1 c1 b2 39 03 00 00 00 ff ff ff ff ff ff ff 00 9..... . 0030: 01 01 00 00 00 00 00 00 ........ 0 0 0x978 [0x38]: PERF_RECORD_TIME_CONV : unhandled! [...] After: # perf report -D 0x978 [0x38]: event: 79 . . ... raw event: size 56 bytes . 0000: 4f 00 00 00 00 00 38 00 15 00 00 00 00 00 00 00 O.....8......... . 0010: 00 00 40 01 00 00 00 00 86 89 0b bf df ff ff ff ..@........ . 0020: d1 c1 b2 39 03 00 00 00 ff ff ff ff ff ff ff 00 9..... . 0030: 01 01 00 00 00 00 00 00 ........ 0 0 0x978 [0x38]: PERF_RECORD_TIME_CONV ... Time Shift 21 ... Time Muliplier 20971520 ... Time Zero 18446743935180835206 ... Time Cycles 13852918225 ... Time Mask 0xffffffffffffff ... Cap Time Zero 1 ... Cap Time Short 1 : unhandled! [...] Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Gustavo A. R. Silva Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steve MacLean Cc: Yonatan Goldschmidt Link: https://lore.kernel.org/r/20210428120915.7123-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/session.c | 13 ++++++++++++- tools/perf/util/tsc.c | 30 ++++++++++++++++++++++++++++++ tools/perf/util/tsc.h | 4 ++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3acb2624903c..9c746b845f3f 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -29,6 +29,7 @@ #include "thread-stack.h" #include "sample-raw.h" #include "stat.h" +#include "tsc.h" #include "ui/progress.h" #include "../perf.h" #include "arch/common.h" @@ -448,6 +449,16 @@ static int process_stat_round_stub(struct perf_session *perf_session __maybe_unu return 0; } +static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_time_conv(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused, u64 file_offset __maybe_unused) @@ -527,7 +538,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->stat_round == NULL) tool->stat_round = process_stat_round_stub; if (tool->time_conv == NULL) - tool->time_conv = process_event_op2_stub; + tool->time_conv = process_event_time_conv_stub; if (tool->feature == NULL) tool->feature = process_event_op2_stub; if (tool->compressed == NULL) diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 62b4c75c966c..f19791d46e99 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include #include #include @@ -110,3 +112,31 @@ u64 __weak rdtsc(void) { return 0; } + +size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp) +{ + struct perf_record_time_conv *tc = (struct perf_record_time_conv *)event; + size_t ret; + + ret = fprintf(fp, "\n... Time Shift %" PRI_lu64 "\n", tc->time_shift); + ret += fprintf(fp, "... Time Muliplier %" PRI_lu64 "\n", tc->time_mult); + ret += fprintf(fp, "... Time Zero %" PRI_lu64 "\n", tc->time_zero); + + /* + * The event TIME_CONV was extended for the fields from "time_cycles" + * when supported cap_user_time_short, for backward compatibility, + * prints the extended fields only if they are contained in the event. + */ + if (event_contains(*tc, time_cycles)) { + ret += fprintf(fp, "... Time Cycles %" PRI_lu64 "\n", + tc->time_cycles); + ret += fprintf(fp, "... Time Mask %#" PRI_lx64 "\n", + tc->time_mask); + ret += fprintf(fp, "... Cap Time Zero %" PRId32 "\n", + tc->cap_user_time_zero); + ret += fprintf(fp, "... Cap Time Short %" PRId32 "\n", + tc->cap_user_time_short); + } + + return ret; +} diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 72a15419f3b3..7d83a31732a7 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -4,6 +4,8 @@ #include +#include "event.h" + struct perf_tsc_conversion { u16 time_shift; u32 time_mult; @@ -24,4 +26,6 @@ u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); +size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp); + #endif // __PERF_TSC_H -- Gitee From 7b37d7e36c0a63728a99ba5eb8e503435e0589ce Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 6 May 2020 13:05:08 -0300 Subject: [PATCH 119/173] perf evsel: Rename *perf_evsel__get_config_term() & friends to evsel__env() [Upstream commit 35ac0cad7d6c9d4b8c656bbe6d4136de07ecd14d] As it is a 'struct evsel' method, not part of tools/lib/perf/, aka libperf, to whom the perf_ prefix belongs. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/arm/util/cs-etm.c | 4 +-- tools/perf/builtin-top.c | 4 +-- tools/perf/util/auxtrace.c | 4 +-- tools/perf/util/evsel.c | 49 +++++++++++++++---------------- tools/perf/util/evsel_config.h | 45 ++++++++++++++-------------- tools/perf/util/parse-events.c | 12 ++++---- tools/perf/util/pmu.h | 2 +- 7 files changed, 59 insertions(+), 61 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 8cefba31143a..6dd617b1196b 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -216,7 +216,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, struct evsel *evsel) { char msg[BUFSIZ], path[PATH_MAX], *sink; - struct perf_evsel_config_term *term; + struct evsel_config_term *term; int ret = -EINVAL; u32 hash; @@ -224,7 +224,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, return 0; list_for_each_entry(term, &evsel->config_terms, list) { - if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) + if (term->type != EVSEL__CONFIG_TERM_DRV_CFG) continue; sink = term->val.str; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 3892e56e7248..99152de16870 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -938,7 +938,7 @@ static int perf_top__overwrite_check(struct perf_top *top) { struct record_opts *opts = &top->record_opts; struct evlist *evlist = top->evlist; - struct perf_evsel_config_term *term; + struct evsel_config_term *term; struct list_head *config_terms; struct evsel *evsel; int set, overwrite = -1; @@ -947,7 +947,7 @@ static int perf_top__overwrite_check(struct perf_top *top) set = -1; config_terms = &evsel->config_terms; list_for_each_entry(term, config_terms, list) { - if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE) + if (term->type == EVSEL__CONFIG_TERM_OVERWRITE) set = term->val.overwrite ? 1 : 0; } diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 036b9750f05d..8c1156c8d14a 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -726,7 +726,7 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts, const char *str) { - struct perf_evsel_config_term *term; + struct evsel_config_term *term; struct evsel *aux_evsel; bool has_aux_sample_size = false; bool has_aux_leader = false; @@ -768,7 +768,7 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr, evlist__for_each_entry(evlist, evsel) { if (perf_evsel__is_aux_event(evsel)) aux_evsel = evsel; - term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE); + term = evsel__get_config_term(evsel, AUX_SAMPLE_SIZE); if (term) { has_aux_sample_size = true; evsel->core.attr.aux_sample_size = term->val.aux_sample_size; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 147855005d31..2a84926c3963 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -771,10 +771,10 @@ perf_evsel__reset_callgraph(struct evsel *evsel, } } -static void apply_config_terms(struct evsel *evsel, - struct record_opts *opts, bool track) +static void evsel__apply_config_terms(struct evsel *evsel, + struct record_opts *opts, bool track) { - struct perf_evsel_config_term *term; + struct evsel_config_term *term; struct list_head *config_terms = &evsel->config_terms; struct perf_event_attr *attr = &evsel->core.attr; /* callgraph default */ @@ -787,30 +787,30 @@ static void apply_config_terms(struct evsel *evsel, list_for_each_entry(term, config_terms, list) { switch (term->type) { - case PERF_EVSEL__CONFIG_TERM_PERIOD: + case EVSEL__CONFIG_TERM_PERIOD: if (!(term->weak && opts->user_interval != ULLONG_MAX)) { attr->sample_period = term->val.period; attr->freq = 0; evsel__reset_sample_bit(evsel, PERIOD); } break; - case PERF_EVSEL__CONFIG_TERM_FREQ: + case EVSEL__CONFIG_TERM_FREQ: if (!(term->weak && opts->user_freq != UINT_MAX)) { attr->sample_freq = term->val.freq; attr->freq = 1; evsel__set_sample_bit(evsel, PERIOD); } break; - case PERF_EVSEL__CONFIG_TERM_TIME: + case EVSEL__CONFIG_TERM_TIME: if (term->val.time) evsel__set_sample_bit(evsel, TIME); else evsel__reset_sample_bit(evsel, TIME); break; - case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: + case EVSEL__CONFIG_TERM_CALLGRAPH: callgraph_buf = term->val.str; break; - case PERF_EVSEL__CONFIG_TERM_BRANCH: + case EVSEL__CONFIG_TERM_BRANCH: if (term->val.str && strcmp(term->val.str, "no")) { evsel__set_sample_bit(evsel, BRANCH_STACK); parse_branch_str(term->val.str, @@ -818,16 +818,16 @@ static void apply_config_terms(struct evsel *evsel, } else evsel__reset_sample_bit(evsel, BRANCH_STACK); break; - case PERF_EVSEL__CONFIG_TERM_STACK_USER: + case EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; break; - case PERF_EVSEL__CONFIG_TERM_MAX_STACK: + case EVSEL__CONFIG_TERM_MAX_STACK: max_stack = term->val.max_stack; break; - case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS: + case EVSEL__CONFIG_TERM_MAX_EVENTS: evsel->max_events = term->val.max_events; break; - case PERF_EVSEL__CONFIG_TERM_INHERIT: + case EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by * perf_evsel__config. If user explicitly set @@ -836,20 +836,20 @@ static void apply_config_terms(struct evsel *evsel, */ attr->inherit = term->val.inherit ? 1 : 0; break; - case PERF_EVSEL__CONFIG_TERM_OVERWRITE: + case EVSEL__CONFIG_TERM_OVERWRITE: attr->write_backward = term->val.overwrite ? 1 : 0; break; - case PERF_EVSEL__CONFIG_TERM_DRV_CFG: + case EVSEL__CONFIG_TERM_DRV_CFG: break; - case PERF_EVSEL__CONFIG_TERM_PERCORE: + case EVSEL__CONFIG_TERM_PERCORE: break; - case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT: + case EVSEL__CONFIG_TERM_AUX_OUTPUT: attr->aux_output = term->val.aux_output ? 1 : 0; break; - case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: + case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: /* Already applied by auxtrace */ break; - case PERF_EVSEL__CONFIG_TERM_CFG_CHG: + case EVSEL__CONFIG_TERM_CFG_CHG: break; default: break; @@ -910,10 +910,9 @@ static bool is_dummy_event(struct evsel *evsel) (evsel->core.attr.config == PERF_COUNT_SW_DUMMY); } -struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel, - enum evsel_term_type type) +struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type) { - struct perf_evsel_config_term *term, *found_term = NULL; + struct evsel_config_term *term, *found_term = NULL; list_for_each_entry(term, &evsel->config_terms, list) { if (term->type == type) @@ -1187,7 +1186,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * Apply event specific term settings, * it overloads any global configuration. */ - apply_config_terms(evsel, opts, track); + evsel__apply_config_terms(evsel, opts, track); evsel->ignore_missing_thread = opts->ignore_missing_thread; @@ -1275,9 +1274,9 @@ int evsel__disable(struct evsel *evsel) return err; } -static void perf_evsel__free_config_terms(struct evsel *evsel) +static void evsel__free_config_terms(struct evsel *evsel) { - struct perf_evsel_config_term *term, *h; + struct evsel_config_term *term, *h; list_for_each_entry_safe(term, h, &evsel->config_terms, list) { list_del_init(&term->list); @@ -1292,7 +1291,7 @@ void perf_evsel__exit(struct evsel *evsel) perf_evsel__free_counts(evsel); perf_evsel__free_fd(&evsel->core); perf_evsel__free_id(&evsel->core); - perf_evsel__free_config_terms(evsel); + evsel__free_config_terms(evsel); cgroup__put(evsel->cgrp); perf_cpu_map__put(evsel->core.cpus); perf_cpu_map__put(evsel->core.own_cpus); diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index b4a65201e4f7..2e7093bc5a1a 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -6,30 +6,30 @@ #include /* - * The 'struct perf_evsel_config_term' is used to pass event - * specific configuration data to perf_evsel__config routine. + * The 'struct evsel_config_term' is used to pass event + * specific configuration data to evsel__config routine. * It is allocated within event parsing and attached to - * perf_evsel::config_terms list head. + * evsel::config_terms list head. */ enum evsel_term_type { - PERF_EVSEL__CONFIG_TERM_PERIOD, - PERF_EVSEL__CONFIG_TERM_FREQ, - PERF_EVSEL__CONFIG_TERM_TIME, - PERF_EVSEL__CONFIG_TERM_CALLGRAPH, - PERF_EVSEL__CONFIG_TERM_STACK_USER, - PERF_EVSEL__CONFIG_TERM_INHERIT, - PERF_EVSEL__CONFIG_TERM_MAX_STACK, - PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, - PERF_EVSEL__CONFIG_TERM_OVERWRITE, - PERF_EVSEL__CONFIG_TERM_DRV_CFG, - PERF_EVSEL__CONFIG_TERM_BRANCH, - PERF_EVSEL__CONFIG_TERM_PERCORE, - PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT, - PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, - PERF_EVSEL__CONFIG_TERM_CFG_CHG, + EVSEL__CONFIG_TERM_PERIOD, + EVSEL__CONFIG_TERM_FREQ, + EVSEL__CONFIG_TERM_TIME, + EVSEL__CONFIG_TERM_CALLGRAPH, + EVSEL__CONFIG_TERM_STACK_USER, + EVSEL__CONFIG_TERM_INHERIT, + EVSEL__CONFIG_TERM_MAX_STACK, + EVSEL__CONFIG_TERM_MAX_EVENTS, + EVSEL__CONFIG_TERM_OVERWRITE, + EVSEL__CONFIG_TERM_DRV_CFG, + EVSEL__CONFIG_TERM_BRANCH, + EVSEL__CONFIG_TERM_PERCORE, + EVSEL__CONFIG_TERM_AUX_OUTPUT, + EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, + EVSEL__CONFIG_TERM_CFG_CHG, }; -struct perf_evsel_config_term { +struct evsel_config_term { struct list_head list; enum evsel_term_type type; union { @@ -52,10 +52,9 @@ struct perf_evsel_config_term { struct evsel; -struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel, - enum evsel_term_type type); +struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type); -#define perf_evsel__get_config_term(evsel, type) \ - __perf_evsel__get_config_term(evsel, PERF_EVSEL__CONFIG_TERM_ ## type) +#define evsel__get_config_term(evsel, type) \ + __evsel__get_config_term(evsel, EVSEL__CONFIG_TERM_ ## type) #endif // __PERF_EVSEL_CONFIG_H diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 27fd5501e053..2791530eefcb 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1204,14 +1204,14 @@ static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused) { #define ADD_CONFIG_TERM(__type) \ - struct perf_evsel_config_term *__t; \ + struct evsel_config_term *__t; \ \ __t = zalloc(sizeof(*__t)); \ if (!__t) \ return -ENOMEM; \ \ INIT_LIST_HEAD(&__t->list); \ - __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ + __t->type = EVSEL__CONFIG_TERM_ ## __type; \ __t->weak = term->weak; \ list_add_tail(&__t->list, head_terms) @@ -1297,7 +1297,7 @@ do { \ } /* - * Add PERF_EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for + * Add EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for * each bit of attr->config that the user has changed. */ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, @@ -1385,10 +1385,10 @@ int parse_events_add_tool(struct parse_events_state *parse_state, static bool config_term_percore(struct list_head *config_terms) { - struct perf_evsel_config_term *term; + struct evsel_config_term *term; list_for_each_entry(term, config_terms, list) { - if (term->type == PERF_EVSEL__CONFIG_TERM_PERCORE) + if (term->type == EVSEL__CONFIG_TERM_PERCORE) return term->val.percore; } @@ -1463,7 +1463,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return -ENOMEM; if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { - struct perf_evsel_config_term *pos, *tmp; + struct evsel_config_term *pos, *tmp; list_for_each_entry_safe(pos, tmp, &config_terms, list) { list_del_init(&pos->list); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index e0cf540acac2..99b5c7f0392d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -9,7 +9,7 @@ #include #include "parse-events.h" -struct perf_evsel_config_term; +struct evsel_config_term; enum { PERF_PMU_FORMAT_VALUE_CONFIG, -- Gitee From 14d95931947c914caf97aa268db02aa4cd7cf560 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Apr 2020 11:37:29 -0300 Subject: [PATCH 120/173] tools headers: Adopt verbatim copy of compiletime_assert() from kernel sources [Upstream commit 5b992add7d32d3106f121c85ad99d2986dc3b8e6] Will be needed when syncing the linux/bits.h header, in the next cset. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/include/linux/compiler.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 1827c2f973f9..180f7714a5f1 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -10,6 +10,32 @@ # define __compiletime_error(message) #endif +#ifdef __OPTIMIZE__ +# define __compiletime_assert(condition, msg, prefix, suffix) \ + do { \ + extern void prefix ## suffix(void) __compiletime_error(msg); \ + if (!(condition)) \ + prefix ## suffix(); \ + } while (0) +#else +# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0) +#endif + +#define _compiletime_assert(condition, msg, prefix, suffix) \ + __compiletime_assert(condition, msg, prefix, suffix) + +/** + * compiletime_assert - break build and emit msg if condition is false + * @condition: a compile-time constant condition to check + * @msg: a message to emit if condition is false + * + * In tradition of POSIX assert, this macro will break the build if the + * supplied condition is *false*, emitting the supplied error message if the + * compiler has support to do so. + */ +#define compiletime_assert(condition, msg) \ + _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) + /* Optimization barrier */ /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") -- Gitee From dcdd4888f001c8c0e825f928d2efbd03e491260f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Apr 2020 08:52:23 -0300 Subject: [PATCH 121/173] tools headers: Update linux/vdso.h and grab a copy of vdso/const.h [Upstream commit ca64d84e93762f4e587e040a44ad9f6089afc777] To get in line with: 8165b57bca21 ("linux/const.h: Extract common header for vDSO") And silence this tools/perf/ build warning: Warning: Kernel ABI header at 'tools/include/linux/const.h' differs from latest version at 'include/linux/const.h' diff -u tools/include/linux/const.h include/linux/const.h Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Vincenzo Frascino Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/include/linux/const.h | 5 +---- tools/include/vdso/const.h | 10 ++++++++++ tools/perf/check-headers.sh | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 tools/include/vdso/const.h diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h index 7b55a55f5911..81b8aae5a855 100644 --- a/tools/include/linux/const.h +++ b/tools/include/linux/const.h @@ -1,9 +1,6 @@ #ifndef _LINUX_CONST_H #define _LINUX_CONST_H -#include - -#define UL(x) (_UL(x)) -#define ULL(x) (_ULL(x)) +#include #endif /* _LINUX_CONST_H */ diff --git a/tools/include/vdso/const.h b/tools/include/vdso/const.h new file mode 100644 index 000000000000..94b385ad438d --- /dev/null +++ b/tools/include/vdso/const.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_CONST_H +#define __VDSO_CONST_H + +#include + +#define UL(x) (_UL(x)) +#define ULL(x) (_ULL(x)) + +#endif /* __VDSO_CONST_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 9cea76bf0efa..b964925de95d 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -22,6 +22,7 @@ include/uapi/linux/vhost.h include/uapi/sound/asound.h include/linux/bits.h include/linux/const.h +include/vdso/const.h include/linux/hash.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h -- Gitee From 821433b0a5becdf604c0d66579fa115d145d5f0e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Apr 2020 10:27:39 -0300 Subject: [PATCH 122/173] tools headers: Synchronize linux/bits.h with the kernel sources [Upstream commit e3698b23ecb8c099b4b523e7d5c8c042e93ef15d] To pick up the changes in these csets: 295bcca84916 ("linux/bits.h: add compile time sanity check of GENMASK inputs") 3945ff37d2f4 ("linux/bits.h: Extract common header for vDSO") To address this tools/perf build warning: Warning: Kernel ABI header at 'tools/include/linux/bits.h' differs from latest version at 'include/linux/bits.h' diff -u tools/include/linux/bits.h include/linux/bits.h This clashes with usage of userspace's static_assert(), that, at least on glibc, is guarded by a ifnded/endif pair, do the same to our copy of build_bug.h and avoid that diff in check_headers.sh so that we continue checking for drifts with the kernel sources master copy. This will all be tested with the set of build containers that includes uCLibc, musl libc, lots of glibc versions in lots of distros and cross build environments. The tools/objtool, tools/bpf, etc were tested as well. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Rikard Falkeborn Cc: Thomas Gleixner Cc: Vincenzo Frascino Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/include/linux/bits.h | 24 ++++++++-- tools/include/linux/build_bug.h | 82 +++++++++++++++++++++++++++++++++ tools/include/linux/kernel.h | 4 +- tools/include/vdso/bits.h | 9 ++++ tools/perf/check-headers.sh | 2 + 5 files changed, 115 insertions(+), 6 deletions(-) create mode 100644 tools/include/linux/build_bug.h create mode 100644 tools/include/vdso/bits.h diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 669d69441a62..4671fbf28842 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -3,9 +3,9 @@ #define __LINUX_BITS_H #include +#include #include -#define BIT(nr) (UL(1) << (nr)) #define BIT_ULL(nr) (ULL(1) << (nr)) #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) @@ -18,12 +18,30 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#define GENMASK(h, l) \ +#if !defined(__ASSEMBLY__) && \ + (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#include +#define GENMASK_INPUT_CHECK(h, l) \ + (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ + __builtin_constant_p((l) > (h)), (l) > (h), 0))) +#else +/* + * BUILD_BUG_ON_ZERO is not available in h files included from asm files, + * disable the input check if that is the case. + */ +#define GENMASK_INPUT_CHECK(h, l) 0 +#endif + +#define __GENMASK(h, l) \ (((~UL(0)) - (UL(1) << (l)) + 1) & \ (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define GENMASK(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ +#define __GENMASK_ULL(h, l) \ (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define GENMASK_ULL(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) #endif /* __LINUX_BITS_H */ diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h new file mode 100644 index 000000000000..cc7070c7439b --- /dev/null +++ b/tools/include/linux/build_bug.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BUILD_BUG_H +#define _LINUX_BUILD_BUG_H + +#include + +#ifdef __CHECKER__ +#define BUILD_BUG_ON_ZERO(e) (0) +#else /* __CHECKER__ */ +/* + * Force a compilation error if condition is true, but also produce a + * result (of value 0 and type int), so the expression can be used + * e.g. in a structure initializer (or where-ever else comma expressions + * aren't permitted). + */ +#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) +#endif /* __CHECKER__ */ + +/* Force a compilation error if a constant expression is not a power of 2 */ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON(((n) & ((n) - 1)) != 0) +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) + +/* + * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the + * expression but avoids the generation of any code, even if that expression + * has side-effects. + */ +#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e)))) + +/** + * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied + * error message. + * @condition: the condition which the compiler should know is false. + * + * See BUILD_BUG_ON for description. + */ +#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) + +/** + * BUILD_BUG_ON - break compile if a condition is true. + * @condition: the condition which the compiler should know is false. + * + * If you have some code which relies on certain constants being equal, or + * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to + * detect if someone changes it. + */ +#define BUILD_BUG_ON(condition) \ + BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) + +/** + * BUILD_BUG - break compile if used. + * + * If you have some code that you expect the compiler to eliminate at + * build time, you should use BUILD_BUG to detect if it is + * unexpectedly used. + */ +#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") + +/** + * static_assert - check integer constant expression at build time + * + * static_assert() is a wrapper for the C11 _Static_assert, with a + * little macro magic to make the message optional (defaulting to the + * stringification of the tested expression). + * + * Contrary to BUILD_BUG_ON(), static_assert() can be used at global + * scope, but requires the expression to be an integer constant + * expression (i.e., it is not enough that __builtin_constant_p() is + * true for expr). + * + * Also note that BUILD_BUG_ON() fails the build if the condition is + * true, while static_assert() fails the build if the expression is + * false. + */ +#ifndef static_assert +#define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) +#define __static_assert(expr, msg, ...) _Static_assert(expr, msg) +#endif // static_assert + +#endif /* _LINUX_BUILD_BUG_H */ diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index cba226948a0c..a7e54a08fb54 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -35,9 +36,6 @@ (type *)((char *)__mptr - offsetof(type, member)); }) #endif -#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) -#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) - #ifndef max #define max(x, y) ({ \ typeof(x) _max1 = (x); \ diff --git a/tools/include/vdso/bits.h b/tools/include/vdso/bits.h new file mode 100644 index 000000000000..6d005a1f5d94 --- /dev/null +++ b/tools/include/vdso/bits.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_BITS_H +#define __VDSO_BITS_H + +#include + +#define BIT(nr) (UL(1) << (nr)) + +#endif /* __VDSO_BITS_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index b964925de95d..0472da3741ed 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -21,6 +21,7 @@ include/uapi/linux/usbdevice_fs.h include/uapi/linux/vhost.h include/uapi/sound/asound.h include/linux/bits.h +include/vdso/bits.h include/linux/const.h include/vdso/const.h include/linux/hash.h @@ -115,6 +116,7 @@ check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include "' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' +check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' check include/linux/ctype.h '-I "isdigit("' check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include " -B' check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"' -- Gitee From 79fdc0ee7ac5ce01762810735a30b1008af37528 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 11 Aug 2022 14:24:37 +0800 Subject: [PATCH 123/173] perf tools: Sync addition of PERF_MEM_SNOOPX_PEER [Upstream commit 2e21bcf0514a3623b41962bf424dec061c02ebc6] Add a flag to the 'perf mem' data struct to signal that a request caused a cache-to-cache transfer of a line from a peer of the requestor and wasn't sourced from a lower cache level. The line being moved from one peer cache to another has latency and performance implications. On Arm64 Neoverse systems the data source can indicate a cache-to-cache transfer but not if the line is dirty or clean, so instead of overloading HITM define a new flag that indicates this type of transfer. Committer notes: This really is not syncing with the kernel since the patch to the kernel wasn't merged. But we're going ahead of this as it seems trivial and is just a matter of the perf kernel maintainers to give their ack or for us to find another way of expressing this in the perf records synthesized in userspace from the ARM64 hardware traces. Reviewed-by: Leo Yan Signed-off-by: Ali Saidi Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: German Gomez Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Like Xu Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Timothy Hayes Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220811062451.435810-2-leo.yan@linaro.org Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 394d633800b6..ff263a800a63 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1213,7 +1213,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_SHIFT 19 #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -/* 1 free */ +#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ #define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ -- Gitee From 98668b2256fa4f85b305d78648716322a30f0849 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 6 Oct 2022 21:09:39 +0530 Subject: [PATCH 124/173] tools headers UAPI: Sync include/uapi/linux/perf_event.h header with the kernel [Upstream commit b7ddd38ccc723f0dca68151baed1e6c07c2a6005] Two new fields for mem_lvl_num has been introduced: PERF_MEM_LVLNUM_IO and PERF_MEM_LVLNUM_CXL which are required to support perf mem/c2c on AMD platform. Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ian Rogers Cc: Ingo Molnar Cc: Joe Mario Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: x86@kernel.org Link: https://lore.kernel.org/r/20221006153946.7816-2-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/include/uapi/linux/perf_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index ff263a800a63..b3ace9c90628 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1195,7 +1195,9 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0xa available */ +/* 5-0x8 available */ +#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -- Gitee From 55af6b262a7f80fc0c069fc24dd92653c7bab908 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 28 May 2020 15:08:58 +0300 Subject: [PATCH 125/173] perf record: Respect --no-switch-events [Upstream commit 16b4b4e1a0038365a6734a4f50aba77e57865c8e] Context switch events are added automatically by Intel PT and Coresight. Make it possible to suppress them. That is useful for tracing the scheduler without the disturbance that the switch event processing creates. Example: Prerequisites: $ which perf ~/bin/perf $ sudo setcap "cap_sys_rawio,cap_sys_admin,cap_sys_ptrace,cap_syslog,cap_ipc_lock=ep" ~/bin/perf $ sudo chmod +r /proc/kcore Before: $ perf record --no-switch-events --kcore -a -e intel_pt//k -- sleep 0.001 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.938 MB perf.data ] $ perf script -D | grep PERF_RECORD_SWITCH | wc -l 572 After: $ perf record --no-switch-events --kcore -a -e intel_pt//k -- sleep 0.001 Warning: Intel Processor Trace decoding will not be possible except for kernel tracing! [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.838 MB perf.data ] $ perf script -D | grep PERF_RECORD_SWITCH | wc -l 0 $ sudo chmod go-r /proc/kcore $ sudo setcap -r ~/bin/perf Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Link: http://lore.kernel.org/lkml/20200528120859.21604-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/Documentation/perf-record.txt | 4 +++- tools/perf/arch/arm/util/cs-etm.c | 3 ++- tools/perf/arch/x86/util/intel-pt.c | 3 ++- tools/perf/builtin-record.c | 5 +++-- tools/perf/util/record.h | 6 ++++++ 5 files changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 4aecca8c1610..5e1f7d998173 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -467,7 +467,9 @@ This option sets the time out limit. The default value is 500 ms. --switch-events:: Record context switch events i.e. events of type PERF_RECORD_SWITCH or -PERF_RECORD_SWITCH_CPU_WIDE. +PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight) +switch events will be enabled automatically, which can be suppressed by +by the option --no-switch-events. --clang-path=PATH:: Path to clang binary to use for compiling BPF scriptlets. diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 6dd617b1196b..bf2931b21212 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -265,7 +265,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, ptr->evlist = evlist; ptr->snapshot_mode = opts->auxtrace_snapshot_mode; - if (perf_can_record_switch_events()) + if (!record_opts__no_switch_events(opts) && + perf_can_record_switch_events()) opts->record_switch_events = true; evlist__for_each_entry(evlist, evsel) { diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 4b4dffd2b4c4..97835936e3dc 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -707,7 +707,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * Per-cpu recording needs sched_switch events to distinguish different * threads. */ - if (have_timing_info && !perf_cpu_map__empty(cpus)) { + if (have_timing_info && !perf_cpu_map__empty(cpus) && + !record_opts__no_switch_events(opts)) { if (perf_can_record_switch_events()) { bool cpu_wide = !target__none(&opts->target) && !target__has_task(&opts->target); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2af9b8685817..ed3c2f88b93d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2189,8 +2189,9 @@ static struct option __record_options[] = { "Record namespaces events"), OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, "Record cgroup events"), - OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, - "Record context switch events"), + OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, + &record.opts.record_switch_events_set, + "Record context switch events"), OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, "Configure all used events to run in kernel space.", PARSE_OPT_EXCLUSIVE), diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index e6a552a7882e..c582598920a1 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -38,6 +38,7 @@ struct record_opts { bool record_namespaces; bool record_cgroup; bool record_switch_events; + bool record_switch_events_set; bool all_kernel; bool all_user; bool kernel_callchains; @@ -76,4 +77,9 @@ extern struct option *record_options; int record__parse_freq(const struct option *opt, const char *str, int unset); +static inline bool record_opts__no_switch_events(const struct record_opts *opts) +{ + return opts->record_switch_events_set && !opts->record_switch_events; +} + #endif // _PERF_RECORD_H -- Gitee From c7742caec1a2b92665fe02b919dfb11295be73e2 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 28 May 2020 15:08:59 +0300 Subject: [PATCH 126/173] perf intel-pt: Refine kernel decoding only warning message [Upstream commit 9b2d2066ddf6f38c79aa38321544999fa943f0e0] Stop the message displaying when user space is not being traced. Example: Prerequisites: sudo setcap "cap_sys_rawio,cap_sys_admin,cap_sys_ptrace,cap_syslog,cap_ipc_lock=ep" ~/bin/perf sudo chmod +r /proc/kcore Before: $ perf record --no-switch-events --kcore -a -e intel_pt//k -- sleep 0.001 Warning: Intel Processor Trace decoding will not be possible except for kernel tracing! [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.838 MB perf.data ] After: $ perf record --no-switch-events --kcore -a -e intel_pt//k -- sleep 0.001 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.068 MB perf.data ] $ sudo chmod go-r /proc/kcore $ sudo setcap -r ~/bin/perf Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Mathieu Poirier Link: http://lore.kernel.org/lkml/20200528120859.21604-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/x86/util/intel-pt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 97835936e3dc..11bb0f0d8642 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -803,7 +803,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * per-cpu with no sched_switch (except workload-only). */ if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && - !target__none(&opts->target)) + !target__none(&opts->target) && + !intel_pt_evsel->core.attr.exclude_user) ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); return 0; -- Gitee From bb52d476417511d595133ff84e278ff0f4be068d Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:33 +0800 Subject: [PATCH 127/173] perf arm-spe: Enable timestamp for per-cpu mode [Upstream commit f99237e46432c4581a30b3afc053cef8d3df3ce5] For per-cpu mmap, it should enable timestamp tracing for Arm SPE; this is helpful for samples correlation. To automatically enable the timestamp, a helper arm_spe_set_timestamp() is introduced for setting "ts_enable" format bit. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Link: https://lore.kernel.org/r/20210519041546.1574961-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/arm64/util/arm-spe.c | 33 ++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 119d8d7656b9..a11e7aa5a654 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -14,6 +14,7 @@ #include "../../../util/cpumap.h" #include "../../../util/event.h" #include "../../../util/evsel.h" +#include "../../../util/evsel_config.h" #include "../../../util/evlist.h" #include "../../../util/session.h" #include // page_size @@ -32,6 +33,29 @@ struct arm_spe_recording { struct evlist *evlist; }; +static void arm_spe_set_timestamp(struct auxtrace_record *itr, + struct evsel *evsel) +{ + struct arm_spe_recording *ptr; + struct perf_pmu *arm_spe_pmu; + struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG); + u64 user_bits = 0, bit; + + ptr = container_of(itr, struct arm_spe_recording, itr); + arm_spe_pmu = ptr->arm_spe_pmu; + + if (term) + user_bits = term->val.cfg_chg; + + bit = perf_pmu__format_bits(&arm_spe_pmu->format, "ts_enable"); + + /* Skip if user has set it */ + if (bit & user_bits) + return; + + evsel->core.attr.config |= bit; +} + static size_t arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, struct evlist *evlist __maybe_unused) @@ -121,9 +145,14 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, */ perf_evlist__to_front(evlist, arm_spe_evsel); - /* In the case of per-cpu mmaps, sample CPU for AUX event. */ - if (!perf_cpu_map__empty(cpus)) + /* + * In the case of per-cpu mmaps, sample CPU for AUX event; + * also enable the timestamp tracing for samples correlation. + */ + if (!perf_cpu_map__empty(cpus)) { evsel__set_sample_bit(arm_spe_evsel, CPU); + arm_spe_set_timestamp(itr, arm_spe_evsel); + } /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); -- Gitee From 327513f6a4ca711b1d3c3987dc7add78557f0310 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:34 +0800 Subject: [PATCH 128/173] perf arm-spe: Remove redundant checking for "full_auxtrace" [Upstream commit afe360a8c35eb2a9e9ea6314886b5fe465f81fe4] The option "opts->full_auxtrace" is checked at the earlier place, if it is false the function will directly bail out. So remove the redundant checking for "opts->full_auxtrace". Suggested-by: James Clark Signed-off-by: Leo Yan Reviewed-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20210519041546.1574961-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/arm64/util/arm-spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index a11e7aa5a654..02fe1345b85c 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -116,7 +116,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, return 0; /* We are in full trace mode but '-m,xyz' wasn't specified */ - if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (!opts->auxtrace_mmap_pages) { if (privileged) { opts->auxtrace_mmap_pages = MiB(4) / page_size; } else { -- Gitee From c196ef772912086b04f6cb2537a6e1c1f1c943e2 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:35 +0800 Subject: [PATCH 129/173] perf arm-spe: Save clock parameters from TIME_CONV event [Upstream commit c210c3069636d830bc81f438711f0ad4a1602805] During the recording phase, "perf record" tool synthesizes event PERF_RECORD_TIME_CONV for the hardware clock parameters and saves the event into the data file. Afterwards, when processing the data file, the event TIME_CONV will be processed at the very early time and is stored into session context. This patch extracts these parameters from the session context and saves into the structure "spe->tc" with the type perf_tsc_conversion, so that the parameters are ready for conversion between clock counter and time stamp. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Dave Martin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20210519071939.1598923-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 2539d4baec44..d2ae5a5c13ee 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -26,6 +26,7 @@ #include "symbol.h" #include "thread.h" #include "thread-stack.h" +#include "tsc.h" #include "tool.h" #include "util/synthetic-events.h" @@ -45,6 +46,8 @@ struct arm_spe { struct machine *machine; u32 pmu_type; + struct perf_tsc_conversion tc; + u8 timeless_decoding; u8 data_queued; @@ -1006,6 +1009,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; + struct perf_record_time_conv *tc = &session->time_conv; struct arm_spe *spe; int err; @@ -1027,6 +1031,28 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); + + /* + * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead + * and the parameters for hardware clock are stored in the session + * context. Passes these parameters to the struct perf_tsc_conversion + * in "spe->tc", which is used for later conversion between clock + * counter and timestamp. + * + * For backward compatibility, copies the fields starting from + * "time_cycles" only if they are contained in the event. + */ + spe->tc.time_shift = tc->time_shift; + spe->tc.time_mult = tc->time_mult; + spe->tc.time_zero = tc->time_zero; + + if (event_contains(*tc, time_cycles)) { + spe->tc.time_cycles = tc->time_cycles; + spe->tc.time_mask = tc->time_mask; + spe->tc.cap_user_time_zero = tc->cap_user_time_zero; + spe->tc.cap_user_time_short = tc->cap_user_time_short; + } + spe->auxtrace.process_event = arm_spe_process_event; spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; spe->auxtrace.flush_events = arm_spe_flush; -- Gitee From d1de61d8b9c5b81a85cefd3ffaac8a5197d09a48 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:36 +0800 Subject: [PATCH 130/173] perf arm-spe: Convert event kernel time to counter value [Upstream commit 630519014c7b5abc544d93e07ad6e9328098bf68] When handle a perf event, Arm SPE decoder needs to decide if this perf event is earlier or later than the samples from Arm SPE trace data; to do comparision, it needs to use the same unit for the time. This patch converts the event kernel time to arch timer's counter value, thus it can be used to compare with counter value contained in Arm SPE Timestamp packet. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Dave Martin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20210519071939.1598923-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index d2ae5a5c13ee..ff8b52e6d475 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -669,7 +669,7 @@ static int arm_spe_process_event(struct perf_session *session, } if (sample->time && (sample->time != (u64) -1)) - timestamp = sample->time; + timestamp = perf_time_to_tsc(sample->time, &spe->tc); else timestamp = 0; -- Gitee From 1831c146ceb2a218ec7e4ee335032d0a3d032818 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:37 +0800 Subject: [PATCH 131/173] perf arm-spe: Assign kernel time to synthesized event [Upstream commit 85498f756f015e3ae89dbe0c94480ad76929752a] In current code, it assigns the arch timer counter to the synthesized samples Arm SPE trace, thus the samples don't contain the kernel time but only contain the raw counter value. To fix the issue, this patch converts the timer counter to kernel time and assigns it to sample timestamp. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Dave Martin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20210519071939.1598923-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index ff8b52e6d475..da379328442c 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -234,7 +234,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe, struct arm_spe_record *record = &speq->decoder->record; if (!spe->timeless_decoding) - sample->time = speq->timestamp; + sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); sample->ip = record->from_ip; sample->cpumode = arm_spe_cpumode(spe, sample->ip); -- Gitee From 7616b6210ed0f18a4542d5593c443343c13bebe0 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:38 +0800 Subject: [PATCH 132/173] perf arm-spe: Bail out if the trace is later than perf event [Upstream commit afb5e9e47faf53e0f557e22979076dc1a94ef3d7] It's possible that record in Arm SPE trace is later than perf event and vice versa. This asks to correlate the perf events and Arm SPE synthesized events to be processed in the manner of correct timing. To achieve the time ordering, this patch reverses the flow, it firstly calls arm_spe_sample() and then calls arm_spe_decode(). By comparing the timestamp value and detect the perf event is coming earlier than Arm SPE trace data, it bails out from the decoding loop, the last record is pushed into auxtrace stack and is deferred to generate sample. To track the timestamp, everytime it updates timestamp for the latest record. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Dave Martin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20210519071939.1598923-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index da379328442c..5c5b438584c4 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -434,12 +434,36 @@ static int arm_spe_sample(struct arm_spe_queue *speq) static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) { struct arm_spe *spe = speq->spe; + struct arm_spe_record *record; int ret; if (!spe->kernel_start) spe->kernel_start = machine__kernel_start(spe->machine); while (1) { + /* + * The usual logic is firstly to decode the packets, and then + * based the record to synthesize sample; but here the flow is + * reversed: it calls arm_spe_sample() for synthesizing samples + * prior to arm_spe_decode(). + * + * Two reasons for this code logic: + * 1. Firstly, when setup queue in arm_spe__setup_queue(), it + * has decoded trace data and generated a record, but the record + * is left to generate sample until run to here, so it's correct + * to synthesize sample for the left record. + * 2. After decoding trace data, it needs to compare the record + * timestamp with the coming perf event, if the record timestamp + * is later than the perf event, it needs bail out and pushs the + * record into auxtrace heap, thus the record can be deferred to + * synthesize sample until run to here at the next time; so this + * can correlate samples between Arm SPE trace data and other + * perf events with correct time ordering. + */ + ret = arm_spe_sample(speq); + if (ret) + return ret; + ret = arm_spe_decode(speq->decoder); if (!ret) { pr_debug("No data or all data has been processed.\n"); @@ -453,10 +477,17 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) if (ret < 0) continue; - ret = arm_spe_sample(speq); - if (ret) - return ret; + record = &speq->decoder->record; + /* Update timestamp for the last record */ + if (record->timestamp > speq->timestamp) + speq->timestamp = record->timestamp; + + /* + * If the timestamp of the queue is later than timestamp of the + * coming perf event, bail out so can allow the perf event to + * be processed ahead. + */ if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { *timestamp = speq->timestamp; return 0; -- Gitee From c7cbea54eee0c34f88ebbd3b3ac76ffd352b91b1 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 31 Dec 2021 13:32:39 +0800 Subject: [PATCH 133/173] perf arm-spe: Don't wait for PERF_RECORD_EXIT event [Upstream commit 8941ba502f74d72c40feffc1620e1b7b878b052b] When decode Arm SPE trace, it waits for PERF_RECORD_EXIT event (the last perf event) for processing trace data, which is needless and even might cause logic error, e.g. it might fail to correlate perf events with Arm SPE events correctly. So this patch removes the condition checking for PERF_RECORD_EXIT event. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Dave Martin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Link: https://lore.kernel.org/r/20210519071939.1598923-6-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 5c5b438584c4..58b7069c5a5f 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -717,11 +717,7 @@ static int arm_spe_process_event(struct perf_session *session, sample->time); } } else if (timestamp) { - if (event->header.type == PERF_RECORD_EXIT) { - err = arm_spe_process_queues(spe, timestamp); - if (err) - return err; - } + err = arm_spe_process_queues(spe, timestamp); } return err; -- Gitee From 9a3913311117823c74ad586ed4e3f18ab4bf531d Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 31 Dec 2021 13:32:40 +0800 Subject: [PATCH 134/173] perf arm-spe: Add snapshot mode support [Upstream commit 0901b56028725a68459c99f41d1172f80449c9e6] This patch enables support for snapshot mode of arm_spe events, including the implementation of the necessary callbacks (excluding find_snapshot, which is to be included in a followup commit). Reviewed-by: James Clark Reviewed-by: Leo Yan Signed-off-by: German Gomez Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211109163009.92072-2-german.gomez@arm.com Tested-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/arm64/util/arm-spe.c | 130 +++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 02fe1345b85c..1e545237391b 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -84,6 +84,55 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, return 0; } +static void +arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts, + bool privileged) +{ + /* + * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor + * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for + * unprivileged users. + * + * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for + * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages + * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the + * user is likely to get an error as they exceed their mlock limmit. + */ + + /* + * No size were given to '-S' or '-m,', so go with the default + */ + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) { + opts->mmap_pages = KiB(256) / page_size; + } + + /* + * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the + * auxtrace mmap area. + */ + if (!opts->auxtrace_snapshot_size) + opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size; + + /* + * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big + * enough to fit the requested snapshot size. + */ + if (!opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_snapshot_size; + + sz = round_up(sz, page_size) / page_size; + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); + } +} + static int arm_spe_recording_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts) @@ -115,6 +164,36 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; + /* + * we are in snapshot mode. + */ + if (opts->auxtrace_snapshot_mode) { + /* + * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with + * default values. + */ + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) + arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged); + + /* + * Snapshot size can't be bigger than the auxtrace area. + */ + if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", + opts->auxtrace_snapshot_size, + opts->auxtrace_mmap_pages * (size_t)page_size); + return -EINVAL; + } + + /* + * Something went wrong somewhere - this shouldn't happen. + */ + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); + return -EINVAL; + } + } + /* We are in full trace mode but '-m,xyz' wasn't specified */ if (!opts->auxtrace_mmap_pages) { if (privileged) { @@ -138,6 +217,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, } } + if (opts->auxtrace_snapshot_mode) + pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME, + opts->auxtrace_snapshot_size); /* * To obtain the auxtrace buffer file descriptor, the auxtrace event @@ -172,6 +254,51 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, return 0; } +static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, + struct record_opts *opts, + const char *str) +{ + unsigned long long snapshot_size = 0; + char *endptr; + + if (str) { + snapshot_size = strtoull(str, &endptr, 0); + if (*endptr || snapshot_size > SIZE_MAX) + return -1; + } + + opts->auxtrace_snapshot_mode = true; + opts->auxtrace_snapshot_size = snapshot_size; + + return 0; +} + +static int arm_spe_snapshot_start(struct auxtrace_record *itr) +{ + struct arm_spe_recording *ptr = + container_of(itr, struct arm_spe_recording, itr); + struct evsel *evsel; + + evlist__for_each_entry(ptr->evlist, evsel) { + if (evsel->core.attr.type == ptr->arm_spe_pmu->type) + return evsel__disable(evsel); + } + return -EINVAL; +} + +static int arm_spe_snapshot_finish(struct auxtrace_record *itr) +{ + struct arm_spe_recording *ptr = + container_of(itr, struct arm_spe_recording, itr); + struct evsel *evsel; + + evlist__for_each_entry(ptr->evlist, evsel) { + if (evsel->core.attr.type == ptr->arm_spe_pmu->type) + return evsel__enable(evsel); + } + return -EINVAL; +} + static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused) { struct timespec ts; @@ -207,6 +334,9 @@ struct auxtrace_record *arm_spe_recording_init(int *err, sper->arm_spe_pmu = arm_spe_pmu; sper->itr.pmu = arm_spe_pmu; + sper->itr.snapshot_start = arm_spe_snapshot_start; + sper->itr.snapshot_finish = arm_spe_snapshot_finish; + sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options; sper->itr.recording_options = arm_spe_recording_options; sper->itr.info_priv_size = arm_spe_info_priv_size; sper->itr.info_fill = arm_spe_info_fill; -- Gitee From 3832692cc5c04b13569470d4093ade20f1eed0df Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 31 Dec 2021 13:32:41 +0800 Subject: [PATCH 135/173] perf arm-spe: Snapshot mode test [Upstream commit 6b1b208bef5b9a53a591f13df98f72435b69c3e8] Shell script test_arm_spe.sh has been added to test the recording of SPE tracing events in snapshot mode. Reviewed-by: Leo Yan Signed-off-by: German Gomez Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211109163009.92072-4-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/tests/shell/test_arm_spe.sh | 89 ++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100755 tools/perf/tests/shell/test_arm_spe.sh diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh new file mode 100755 index 000000000000..e59044edc406 --- /dev/null +++ b/tools/perf/tests/shell/test_arm_spe.sh @@ -0,0 +1,89 @@ +#!/bin/sh +# Check Arm SPE trace data recording and synthesized samples + +# Uses the 'perf record' to record trace data of Arm SPE events; +# then verify if any SPE event samples are generated by SPE with +# 'perf script' and 'perf report' commands. + +# SPDX-License-Identifier: GPL-2.0 +# German Gomez , 2021 + +skip_if_no_arm_spe_event() { + perf list | egrep -q 'arm_spe_[0-9]+//' && return 0 + + # arm_spe event doesn't exist + return 2 +} + +skip_if_no_arm_spe_event || exit 2 + +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +glb_err=0 + +cleanup_files() +{ + rm -f ${perfdata} + exit $glb_err +} + +trap cleanup_files exit term int + +arm_spe_report() { + if [ $2 != 0 ]; then + echo "$1: FAIL" + glb_err=$2 + else + echo "$1: PASS" + fi +} + +perf_script_samples() { + echo "Looking at perf.data file for dumping samples:" + + # from arm-spe.c/arm_spe_synth_events() + events="(ld1-miss|ld1-access|llc-miss|lld-access|tlb-miss|tlb-access|branch-miss|remote-access|memory)" + + # Below is an example of the samples dumping: + # dd 3048 [002] 1 l1d-access: ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so) + # dd 3048 [002] 1 tlb-access: ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so) + # dd 3048 [002] 1 memory: ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so) + perf script -F,-time -i ${perfdata} 2>&1 | \ + egrep " +$1 +[0-9]+ .* +${events}:(.*:)? +" > /dev/null 2>&1 +} + +perf_report_samples() { + echo "Looking at perf.data file for reporting samples:" + + # Below is an example of the samples reporting: + # 73.04% 73.04% dd libc-2.27.so [.] _dl_addr + # 7.71% 7.71% dd libc-2.27.so [.] getenv + # 2.59% 2.59% dd ld-2.27.so [.] strcmp + perf report --stdio -i ${perfdata} 2>&1 | \ + egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " > /dev/null 2>&1 +} + +arm_spe_snapshot_test() { + echo "Recording trace with snapshot mode $perfdata" + perf record -o ${perfdata} -e arm_spe// -S \ + -- dd if=/dev/zero of=/dev/null > /dev/null 2>&1 & + PERFPID=$! + + # Wait for perf program + sleep 1 + + # Send signal to snapshot trace data + kill -USR2 $PERFPID + + # Stop perf program + kill $PERFPID + wait $PERFPID + + perf_script_samples dd && + perf_report_samples dd + + err=$? + arm_spe_report "SPE snapshot testing" $err +} + +arm_spe_snapshot_test +exit $glb_err -- Gitee From 429877da99f9637001ebd28633dd8473e28f5e94 Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 31 Dec 2021 13:32:42 +0800 Subject: [PATCH 136/173] perf arm-spe: Implement find_snapshot callback [Upstream commit 56c31cdff7c2a640f4afcfe0ac4e4ca6dc47c5fd] The head pointer of the AUX buffer managed by the arm_spe_pmu.c driver is not monotonically increasing, therefore the find_snapshot callback is needed in order to find the trace data within the AUX buffer and avoid wasting space in the perf.data file. The pointer is assumed to have wrapped if the buffer contains non-zero data at the end. If it has wrapped, the entire contents of the AUX buffer are stored in the perf.data file. Otherwise only the data up to the head pointer is stored. Reviewed-by: James Clark Reviewed-by: Leo Yan Signed-off-by: German Gomez Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211109163009.92072-3-german.gomez@arm.com Tested-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/arm64/util/arm-spe.c | 145 +++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 1e545237391b..23e72d50565f 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -23,6 +23,7 @@ #include "../../../util/auxtrace.h" #include "../../../util/record.h" #include "../../../util/arm-spe.h" +#include // reallocarray #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -31,6 +32,8 @@ struct arm_spe_recording { struct auxtrace_record itr; struct perf_pmu *arm_spe_pmu; struct evlist *evlist; + int wrapped_cnt; + bool *wrapped; }; static void arm_spe_set_timestamp(struct auxtrace_record *itr, @@ -299,6 +302,146 @@ static int arm_spe_snapshot_finish(struct auxtrace_record *itr) return -EINVAL; } +static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx) +{ + bool *wrapped; + int cnt = ptr->wrapped_cnt, new_cnt, i; + + /* + * No need to allocate, so return early. + */ + if (idx < cnt) + return 0; + + /* + * Make ptr->wrapped as big as idx. + */ + new_cnt = idx + 1; + + /* + * Free'ed in arm_spe_recording_free(). + */ + wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool)); + if (!wrapped) + return -ENOMEM; + + /* + * init new allocated values. + */ + for (i = cnt; i < new_cnt; i++) + wrapped[i] = false; + + ptr->wrapped_cnt = new_cnt; + ptr->wrapped = wrapped; + + return 0; +} + +static bool arm_spe_buffer_has_wrapped(unsigned char *buffer, + size_t buffer_size, u64 head) +{ + u64 i, watermark; + u64 *buf = (u64 *)buffer; + size_t buf_size = buffer_size; + + /* + * Defensively handle the case where head might be continually increasing - if its value is + * equal or greater than the size of the ring buffer, then we can safely determine it has + * wrapped around. Otherwise, continue to detect if head might have wrapped. + */ + if (head >= buffer_size) + return true; + + /* + * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer. + */ + watermark = buf_size - 512; + + /* + * The value of head is somewhere within the size of the ring buffer. This can be that there + * hasn't been enough data to fill the ring buffer yet or the trace time was so long that + * head has numerically wrapped around. To find we need to check if we have data at the + * very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed + * out and there is a fresh mapping with every new session. + */ + + /* + * head is less than 512 byte from the end of the ring buffer. + */ + if (head > watermark) + watermark = head; + + /* + * Speed things up by using 64 bit transactions (see "u64 *buf" above) + */ + watermark /= sizeof(u64); + buf_size /= sizeof(u64); + + /* + * If we find trace data at the end of the ring buffer, head has been there and has + * numerically wrapped around at least once. + */ + for (i = watermark; i < buf_size; i++) + if (buf[i]) + return true; + + return false; +} + +static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old) +{ + int err; + bool wrapped; + struct arm_spe_recording *ptr = + container_of(itr, struct arm_spe_recording, itr); + + /* + * Allocate memory to keep track of wrapping if this is the first + * time we deal with this *mm. + */ + if (idx >= ptr->wrapped_cnt) { + err = arm_spe_alloc_wrapped_array(ptr, idx); + if (err) + return err; + } + + /* + * Check to see if *head has wrapped around. If it hasn't only the + * amount of data between *head and *old is snapshot'ed to avoid + * bloating the perf.data file with zeros. But as soon as *head has + * wrapped around the entire size of the AUX ring buffer it taken. + */ + wrapped = ptr->wrapped[idx]; + if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) { + wrapped = true; + ptr->wrapped[idx] = true; + } + + pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", + __func__, idx, (size_t)*old, (size_t)*head, mm->len); + + /* + * No wrap has occurred, we can just use *head and *old. + */ + if (!wrapped) + return 0; + + /* + * *head has wrapped around - adjust *head and *old to pickup the + * entire content of the AUX buffer. + */ + if (*head >= mm->len) { + *old = *head - mm->len; + } else { + *head += mm->len; + *old = *head - mm->len; + } + + return 0; +} + static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused) { struct timespec ts; @@ -313,6 +456,7 @@ static void arm_spe_recording_free(struct auxtrace_record *itr) struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); + free(sper->wrapped); free(sper); } @@ -336,6 +480,7 @@ struct auxtrace_record *arm_spe_recording_init(int *err, sper->itr.pmu = arm_spe_pmu; sper->itr.snapshot_start = arm_spe_snapshot_start; sper->itr.snapshot_finish = arm_spe_snapshot_finish; + sper->itr.find_snapshot = arm_spe_find_snapshot; sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options; sper->itr.recording_options = arm_spe_recording_options; sper->itr.info_priv_size = arm_spe_info_priv_size; -- Gitee From cbbd936dbb6445dd604124a917fdf5ac6ac7e843 Mon Sep 17 00:00:00 2001 From: Andrew Kilroy Date: Fri, 31 Dec 2021 13:32:43 +0800 Subject: [PATCH 137/173] perf arm-spe: Print size using consistent format [Upstream commit 09e9afac8cea99429b103fc21836dae693a56b17] Since the size is already printed earlier in hex, print the same data using the same format, in hex. Reviewed-by: James Clark Reviewed-by: Leo Yan Signed-off-by: Andrew Kilroy Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211109142153.56546-3-german.gomez@arm.com Signed-off-by: German Gomez Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 58b7069c5a5f..2196291976d9 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -100,7 +100,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, const char *color = PERF_COLOR_BLUE; color_fprintf(stdout, color, - ". ... ARM SPE data: size %zu bytes\n", + ". ... ARM SPE data: size %#zx bytes\n", len); while (len) { -- Gitee From f917106c66f50daaf0e5407c69876c1ff730e374 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 31 Dec 2021 13:32:44 +0800 Subject: [PATCH 138/173] perf arm-spe: Track task context switch for cpu-mode events [Upstream commit 9dc9855f18ba25d2bc536ea5ba6682855e385d66] When perf report synthesize events from ARM SPE data, it refers to current cpu, pid and tid in the machine. But there's no place to set them in the ARM SPE decoder. I'm seeing all pid/tid is set to -1 and user symbols are not resolved in the output. # perf record -a -e arm_spe_0/ts_enable=1/ sleep 1 # perf report -q | head 8.77% 8.77% :-1 [kernel.kallsyms] [k] format_decode 7.02% 7.02% :-1 [kernel.kallsyms] [k] seq_printf 7.02% 7.02% :-1 [unknown] [.] 0x0000ffff9f687c34 5.26% 5.26% :-1 [kernel.kallsyms] [k] vsnprintf 3.51% 3.51% :-1 [kernel.kallsyms] [k] string 3.51% 3.51% :-1 [unknown] [.] 0x0000ffff9f66ae20 3.51% 3.51% :-1 [unknown] [.] 0x0000ffff9f670b3c 3.51% 3.51% :-1 [unknown] [.] 0x0000ffff9f67c040 1.75% 1.75% :-1 [kernel.kallsyms] [k] ___cache_free 1.75% 1.75% :-1 [kernel.kallsyms] [k] __count_memcg_events Like Intel PT, add context switch records to track task info. As ARM SPE support was added later than PERF_RECORD_SWITCH_CPU_WIDE, I think we can safely set the attr.context_switch bit and use it. Reviewed-by: Leo Yan Signed-off-by: German Gomez Signed-off-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211111133625.193568-2-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/arch/arm64/util/arm-spe.c | 6 +++++- tools/perf/util/arm-spe.c | 25 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 23e72d50565f..2bbfd5625b27 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -251,8 +251,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, tracking_evsel->core.attr.sample_period = 1; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) + if (!perf_cpu_map__empty(cpus)) { evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, CPU); + /* also track task context switch */ + tracking_evsel->core.attr.context_switch = 1; + } return 0; } diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 2196291976d9..9e3a6c54801d 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -681,6 +681,25 @@ static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, return 0; } +static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, + struct perf_sample *sample) +{ + pid_t pid, tid; + int cpu; + + if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) + return 0; + + pid = event->context_switch.next_prev_pid; + tid = event->context_switch.next_prev_tid; + cpu = sample->cpu; + + if (tid == -1) + pr_warning("context_switch event has no tid\n"); + + return machine__set_current_tid(spe->machine, cpu, pid, tid); +} + static int arm_spe_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -718,6 +737,12 @@ static int arm_spe_process_event(struct perf_session *session, } } else if (timestamp) { err = arm_spe_process_queues(spe, timestamp); + if (err) + return err; + + if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || + event->header.type == PERF_RECORD_SWITCH) + err = arm_spe_context_switch(spe, event, sample); } return err; -- Gitee From 8a01bdd310a62706b42b62503552e63a546f3ffc Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 31 Dec 2021 13:32:45 +0800 Subject: [PATCH 139/173] perf arm-spe: Update --switch-events docs in 'perf record' [Upstream commit 455c988225c7eee84c7a2f86984404825a1830bb] Update 'perf record' docs and ARM SPE recording options so that they are consistent. This includes supporting the --no-switch-events flag in ARM SPE as well. Reviewed-by: Leo Yan Signed-off-by: German Gomez Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211111133625.193568-3-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/Documentation/perf-record.txt | 2 +- tools/perf/arch/arm64/util/arm-spe.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5e1f7d998173..0535927eb642 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -467,7 +467,7 @@ This option sets the time out limit. The default value is 500 ms. --switch-events:: Record context switch events i.e. events of type PERF_RECORD_SWITCH or -PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight) +PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE) switch events will be enabled automatically, which can be suppressed by by the option --no-switch-events. diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 2bbfd5625b27..f3aa09647606 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -254,8 +254,10 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, if (!perf_cpu_map__empty(cpus)) { evsel__set_sample_bit(tracking_evsel, TIME); evsel__set_sample_bit(tracking_evsel, CPU); + /* also track task context switch */ - tracking_evsel->core.attr.context_switch = 1; + if (!record_opts__no_switch_events(opts)) + tracking_evsel->core.attr.context_switch = 1; } return 0; -- Gitee From d21727dc24fe51cf19dfe5d3dcc1d08f8ff3e7a6 Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 31 Dec 2021 13:32:46 +0800 Subject: [PATCH 140/173] perf arm-spe: Save context ID in record [Upstream commit 169de64f5dc22d9984d45c1f119fb644fa16d64a] This patch is to save context ID in record, this will be used to set TID for samples. Reviewed-by: Leo Yan Signed-off-by: German Gomez Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211111133625.193568-4-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 2 ++ tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 1 + 2 files changed, 3 insertions(+) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 32fe41835fa6..3fc528c9270c 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -151,6 +151,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) u64 payload, ip; memset(&decoder->record, 0x0, sizeof(decoder->record)); + decoder->record.context_id = (u64)-1; while (1) { err = arm_spe_get_next_packet(decoder); @@ -180,6 +181,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) case ARM_SPE_COUNTER: break; case ARM_SPE_CONTEXT: + decoder->record.context_id = payload; break; case ARM_SPE_OP_TYPE: if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) { diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 59bdb7309674..46a8556a9e95 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -38,6 +38,7 @@ struct arm_spe_record { u64 timestamp; u64 virt_addr; u64 phys_addr; + u64 context_id; }; struct arm_spe_insn; -- Gitee From 991a4763fbae559d17a0e17e066afcdc04b9750c Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 31 Dec 2021 13:32:47 +0800 Subject: [PATCH 141/173] perf arm-spe: Support hardware-based PID tracing [Upstream commit 27d113cfe892867885ab1d75abe5f42c228ef8a9] If ARM SPE traces contains CONTEXT packets with TID info, use these values for tracking the TID of samples. Otherwise fall back to using context switch events and display a message warning to the user of possible timing inaccuracies [1]. [1] https://lore.kernel.org/lkml/f877cfa6-9b25-6445-3806-ca44a4042eaf@arm.com/ Signed-off-by: German Gomez Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211111133625.193568-5-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 99 +++++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 29 deletions(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 9e3a6c54801d..4748bcfe61de 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -71,6 +71,7 @@ struct arm_spe { u64 kernel_start; unsigned long num_events; + u8 use_ctx_pkt_for_pid; }; struct arm_spe_queue { @@ -226,6 +227,44 @@ static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) PERF_RECORD_MISC_USER; } +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, + struct auxtrace_queue *queue) +{ + struct arm_spe_queue *speq = queue->priv; + pid_t tid; + + tid = machine__get_current_tid(spe->machine, speq->cpu); + if (tid != -1) { + speq->tid = tid; + thread__zput(speq->thread); + } else + speq->tid = queue->tid; + + if ((!speq->thread) && (speq->tid != -1)) { + speq->thread = machine__find_thread(spe->machine, -1, + speq->tid); + } + + if (speq->thread) { + speq->pid = speq->thread->pid_; + if (queue->cpu == -1) + speq->cpu = speq->thread->cpu; + } +} + +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) +{ + struct arm_spe *spe = speq->spe; + int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); + + if (err) + return err; + + arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); + + return 0; +} + static void arm_spe_prep_sample(struct arm_spe *spe, struct arm_spe_queue *speq, union perf_event *event, @@ -460,6 +499,19 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) * can correlate samples between Arm SPE trace data and other * perf events with correct time ordering. */ + + /* + * Update pid/tid info. + */ + record = &speq->decoder->record; + if (!spe->timeless_decoding && record->context_id != (u64)-1) { + ret = arm_spe_set_tid(speq, record->context_id); + if (ret) + return ret; + + spe->use_ctx_pkt_for_pid = true; + } + ret = arm_spe_sample(speq); if (ret) return ret; @@ -586,31 +638,6 @@ static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) return timeless_decoding; } -static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, - struct auxtrace_queue *queue) -{ - struct arm_spe_queue *speq = queue->priv; - pid_t tid; - - tid = machine__get_current_tid(spe->machine, speq->cpu); - if (tid != -1) { - speq->tid = tid; - thread__zput(speq->thread); - } else - speq->tid = queue->tid; - - if ((!speq->thread) && (speq->tid != -1)) { - speq->thread = machine__find_thread(spe->machine, -1, - speq->tid); - } - - if (speq->thread) { - speq->pid = speq->thread->pid_; - if (queue->cpu == -1) - speq->cpu = speq->thread->cpu; - } -} - static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) { unsigned int queue_nr; @@ -641,7 +668,12 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) ts = timestamp; } - arm_spe_set_pid_tid_cpu(spe, queue); + /* + * A previous context-switch event has set pid/tid in the machine's context, so + * here we need to update the pid/tid in the thread and SPE queue. + */ + if (!spe->use_ctx_pkt_for_pid) + arm_spe_set_pid_tid_cpu(spe, queue); ret = arm_spe_run_decoder(speq, &ts); if (ret < 0) { @@ -740,8 +772,9 @@ static int arm_spe_process_event(struct perf_session *session, if (err) return err; - if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || - event->header.type == PERF_RECORD_SWITCH) + if (!spe->use_ctx_pkt_for_pid && + (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || + event->header.type == PERF_RECORD_SWITCH)) err = arm_spe_context_switch(spe, event, sample); } @@ -808,7 +841,15 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return arm_spe_process_timeless_queues(spe, -1, MAX_TIMESTAMP - 1); - return arm_spe_process_queues(spe, MAX_TIMESTAMP); + ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); + if (ret) + return ret; + + if (!spe->use_ctx_pkt_for_pid) + ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" + "Matching of TIDs to SPE events could be inaccurate.\n"); + + return 0; } static void arm_spe_free_queue(void *priv) -- Gitee From c0c6e17b691a725418d0801c41c3268bae826cc4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 31 Dec 2021 13:32:48 +0800 Subject: [PATCH 142/173] perf tools: Set COMPAT_NEED_REALLOCARRAY for CONFIG_AUXTRACE=1 [Upstream commit 70f9c9b2df1dd12cf40862b2b31c7bf89e311066] As it is being used in tools/perf/arch/arm64/util/arm-spe.c and the COMPAT_NEED_REALLOCARRAY was only being set when CORESIGHT=1 is set. Fixes: 56c31cdff7c2a640 ("perf arm-spe: Implement find_snapshot callback") Cc: Alexander Shishkin Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Link: https://lore.kernel.org/all/YZT63mIc7iY01er3@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/Makefile.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index d606e224b932..d033f665731b 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -930,6 +930,9 @@ ifndef NO_AUXTRACE ifndef NO_AUXTRACE $(call detected,CONFIG_AUXTRACE) CFLAGS += -DHAVE_AUXTRACE_SUPPORT + ifeq ($(feature-reallocarray), 0) + CFLAGS += -DCOMPAT_NEED_REALLOCARRAY + endif endif endif -- Gitee From 3bd036c47fcadd279496706416a9e3f08a9e51cd Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 31 Dec 2021 13:32:49 +0800 Subject: [PATCH 143/173] perf inject: Fix ARM SPE handling [Upstream commit 9e1a8d9f683260d50e0a14176d3f7c46a93b2700] 'perf inject' is currently not working for Arm SPE. When you try to run 'perf inject' and 'perf report' with a perf.data file that contains SPE traces, the tool reports a "Bad address" error: # ./perf record -e arm_spe_0/ts_enable=1,store_filter=1,branch_filter=1,load_filter=1/ -a -- sleep 1 # ./perf inject -i perf.data -o perf.inject.data --itrace # ./perf report -i perf.inject.data --stdio 0x42c00 [0x8]: failed to process type: 9 [Bad address] Error: failed to process sample As far as I know, the issue was first spotted in [1], but 'perf inject' was not yet injecting the samples. This patch does something similar to what cs_etm does for injecting the samples [2], but for SPE. [1] https://patchwork.kernel.org/project/linux-arm-kernel/cover/20210412091006.468557-1-leo.yan@linaro.org/#24117339 [2] https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/tree/tools/perf/util/cs-etm.c?h=perf/core&id=133fe2e617e48ca0948983329f43877064ffda3e#n1196 Reviewed-by: James Clark Signed-off-by: German Gomez Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211105104130.28186-2-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 4748bcfe61de..fccac06b573a 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -51,6 +51,7 @@ struct arm_spe { u8 timeless_decoding; u8 data_queued; + u64 sample_type; u8 sample_flc; u8 sample_llc; u8 sample_tlb; @@ -287,6 +288,12 @@ static void arm_spe_prep_sample(struct arm_spe *spe, event->sample.header.size = sizeof(struct perf_event_header); } +static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample); +} + static inline int arm_spe_deliver_synth_event(struct arm_spe *spe, struct arm_spe_queue *speq __maybe_unused, @@ -295,6 +302,12 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, { int ret; + if (spe->synth_opts.inject) { + ret = arm_spe__inject_event(event, sample, spe->sample_type); + if (ret) + return ret; + } + ret = perf_session__deliver_synth_event(spe->session, event, sample); if (ret) pr_err("ARM SPE: failed to deliver event, error %d\n", ret); @@ -986,6 +999,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) else attr.sample_type |= PERF_SAMPLE_TIME; + spe->sample_type = attr.sample_type; + attr.exclude_user = evsel->core.attr.exclude_user; attr.exclude_kernel = evsel->core.attr.exclude_kernel; attr.exclude_hv = evsel->core.attr.exclude_hv; -- Gitee From b52e1f6c0e7189c1447cfdde649000fdc607c749 Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Thu, 7 Oct 2021 23:34:26 +0000 Subject: [PATCH 144/173] tools: arm64: Import sysreg.h [Upstream commit 272a067df3c89f6f2176a350f88661625a2c8b3b] Bring-in the kernel's arch/arm64/include/asm/sysreg.h into tools/ for arm64 to make use of all the standard register definitions in consistence with the kernel. Make use of the register read/write definitions from sysreg.h, instead of the existing definitions. A syntax correction is needed for the files that use write_sysreg() to make it compliant with the new (kernel's) syntax. Reviewed-by: Andrew Jones Reviewed-by: Oliver Upton Signed-off-by: Raghavendra Rao Ananta [maz: squashed two commits in order to keep the series bisectable] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211007233439.1826892-3-rananta@google.com Link: https://lore.kernel.org/r/20211007233439.1826892-4-rananta@google.com Signed-off-by: huwentao --- tools/arch/arm64/include/asm/sysreg.h | 1296 +++++++++++++++++ .../selftests/kvm/include/aarch64/processor.h | 1 + 2 files changed, 1297 insertions(+) create mode 100644 tools/arch/arm64/include/asm/sysreg.h diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h new file mode 100644 index 000000000000..7640fa27be94 --- /dev/null +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -0,0 +1,1296 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Macros for accessing system registers with older binutils. + * + * Copyright (C) 2014 ARM Ltd. + * Author: Catalin Marinas + */ + +#ifndef __ASM_SYSREG_H +#define __ASM_SYSREG_H + +#include +#include + +/* + * ARMv8 ARM reserves the following encoding for system registers: + * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", + * C5.2, version:ARM DDI 0487A.f) + * [20-19] : Op0 + * [18-16] : Op1 + * [15-12] : CRn + * [11-8] : CRm + * [7-5] : Op2 + */ +#define Op0_shift 19 +#define Op0_mask 0x3 +#define Op1_shift 16 +#define Op1_mask 0x7 +#define CRn_shift 12 +#define CRn_mask 0xf +#define CRm_shift 8 +#define CRm_mask 0xf +#define Op2_shift 5 +#define Op2_mask 0x7 + +#define sys_reg(op0, op1, crn, crm, op2) \ + (((op0) << Op0_shift) | ((op1) << Op1_shift) | \ + ((crn) << CRn_shift) | ((crm) << CRm_shift) | \ + ((op2) << Op2_shift)) + +#define sys_insn sys_reg + +#define sys_reg_Op0(id) (((id) >> Op0_shift) & Op0_mask) +#define sys_reg_Op1(id) (((id) >> Op1_shift) & Op1_mask) +#define sys_reg_CRn(id) (((id) >> CRn_shift) & CRn_mask) +#define sys_reg_CRm(id) (((id) >> CRm_shift) & CRm_mask) +#define sys_reg_Op2(id) (((id) >> Op2_shift) & Op2_mask) + +#ifndef CONFIG_BROKEN_GAS_INST + +#ifdef __ASSEMBLY__ +// The space separator is omitted so that __emit_inst(x) can be parsed as +// either an assembler directive or an assembler macro argument. +#define __emit_inst(x) .inst(x) +#else +#define __emit_inst(x) ".inst " __stringify((x)) "\n\t" +#endif + +#else /* CONFIG_BROKEN_GAS_INST */ + +#ifndef CONFIG_CPU_BIG_ENDIAN +#define __INSTR_BSWAP(x) (x) +#else /* CONFIG_CPU_BIG_ENDIAN */ +#define __INSTR_BSWAP(x) ((((x) << 24) & 0xff000000) | \ + (((x) << 8) & 0x00ff0000) | \ + (((x) >> 8) & 0x0000ff00) | \ + (((x) >> 24) & 0x000000ff)) +#endif /* CONFIG_CPU_BIG_ENDIAN */ + +#ifdef __ASSEMBLY__ +#define __emit_inst(x) .long __INSTR_BSWAP(x) +#else /* __ASSEMBLY__ */ +#define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t" +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_BROKEN_GAS_INST */ + +/* + * Instructions for modifying PSTATE fields. + * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints, + * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions + * for accessing PSTATE fields have the following encoding: + * Op0 = 0, CRn = 4 + * Op1, Op2 encodes the PSTATE field modified and defines the constraints. + * CRm = Imm4 for the instruction. + * Rt = 0x1f + */ +#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) +#define PSTATE_Imm_shift CRm_shift + +#define PSTATE_PAN pstate_field(0, 4) +#define PSTATE_UAO pstate_field(0, 3) +#define PSTATE_SSBS pstate_field(3, 1) +#define PSTATE_TCO pstate_field(3, 4) + +#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift)) + +#define set_pstate_pan(x) asm volatile(SET_PSTATE_PAN(x)) +#define set_pstate_uao(x) asm volatile(SET_PSTATE_UAO(x)) +#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x)) + +#define __SYS_BARRIER_INSN(CRm, op2, Rt) \ + __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) + +#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31) + +#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) +#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) +#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) + +/* + * System registers, organised loosely by encoding but grouped together + * where the architected name contains an index. e.g. ID_MMFR_EL1. + */ +#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) +#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) +#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) +#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2) +#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2) +#define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4) +#define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5) +#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6) +#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) +#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) +#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) +#define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) +#define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) +#define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) +#define SYS_DBGCLAIMSET_EL1 sys_reg(2, 0, 7, 8, 6) +#define SYS_DBGCLAIMCLR_EL1 sys_reg(2, 0, 7, 9, 6) +#define SYS_DBGAUTHSTATUS_EL1 sys_reg(2, 0, 7, 14, 6) +#define SYS_MDCCSR_EL0 sys_reg(2, 3, 0, 1, 0) +#define SYS_DBGDTR_EL0 sys_reg(2, 3, 0, 4, 0) +#define SYS_DBGDTRRX_EL0 sys_reg(2, 3, 0, 5, 0) +#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0) +#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) + +#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) +#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) +#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) + +#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) +#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) +#define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4) +#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) +#define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) +#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) +#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) +#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) +#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) +#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) +#define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) + +#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) +#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) +#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) +#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) +#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) +#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) +#define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) + +#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) +#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) +#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) + +#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) +#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) +#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4) + +#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) +#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) + +#define SYS_ID_AA64AFR0_EL1 sys_reg(3, 0, 0, 5, 4) +#define SYS_ID_AA64AFR1_EL1 sys_reg(3, 0, 0, 5, 5) + +#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) +#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) + +#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) +#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) +#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) + +#define SYS_SCTLR_EL1 sys_reg(3, 0, 1, 0, 0) +#define SYS_ACTLR_EL1 sys_reg(3, 0, 1, 0, 1) +#define SYS_CPACR_EL1 sys_reg(3, 0, 1, 0, 2) +#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5) +#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6) + +#define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0) +#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1) + +#define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0) +#define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1) +#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2) + +#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0) +#define SYS_APIAKEYHI_EL1 sys_reg(3, 0, 2, 1, 1) +#define SYS_APIBKEYLO_EL1 sys_reg(3, 0, 2, 1, 2) +#define SYS_APIBKEYHI_EL1 sys_reg(3, 0, 2, 1, 3) + +#define SYS_APDAKEYLO_EL1 sys_reg(3, 0, 2, 2, 0) +#define SYS_APDAKEYHI_EL1 sys_reg(3, 0, 2, 2, 1) +#define SYS_APDBKEYLO_EL1 sys_reg(3, 0, 2, 2, 2) +#define SYS_APDBKEYHI_EL1 sys_reg(3, 0, 2, 2, 3) + +#define SYS_APGAKEYLO_EL1 sys_reg(3, 0, 2, 3, 0) +#define SYS_APGAKEYHI_EL1 sys_reg(3, 0, 2, 3, 1) + +#define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0) +#define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1) + +#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) + +#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) +#define SYS_AFSR1_EL1 sys_reg(3, 0, 5, 1, 1) +#define SYS_ESR_EL1 sys_reg(3, 0, 5, 2, 0) + +#define SYS_ERRIDR_EL1 sys_reg(3, 0, 5, 3, 0) +#define SYS_ERRSELR_EL1 sys_reg(3, 0, 5, 3, 1) +#define SYS_ERXFR_EL1 sys_reg(3, 0, 5, 4, 0) +#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1) +#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2) +#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3) +#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0) +#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1) +#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0) +#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1) + +#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) +#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) + +#define SYS_PAR_EL1_F BIT(0) +#define SYS_PAR_EL1_FST GENMASK(6, 1) + +/*** Statistical Profiling Extension ***/ +/* ID registers */ +#define SYS_PMSIDR_EL1 sys_reg(3, 0, 9, 9, 7) +#define SYS_PMSIDR_EL1_FE_SHIFT 0 +#define SYS_PMSIDR_EL1_FT_SHIFT 1 +#define SYS_PMSIDR_EL1_FL_SHIFT 2 +#define SYS_PMSIDR_EL1_ARCHINST_SHIFT 3 +#define SYS_PMSIDR_EL1_LDS_SHIFT 4 +#define SYS_PMSIDR_EL1_ERND_SHIFT 5 +#define SYS_PMSIDR_EL1_INTERVAL_SHIFT 8 +#define SYS_PMSIDR_EL1_INTERVAL_MASK 0xfUL +#define SYS_PMSIDR_EL1_MAXSIZE_SHIFT 12 +#define SYS_PMSIDR_EL1_MAXSIZE_MASK 0xfUL +#define SYS_PMSIDR_EL1_COUNTSIZE_SHIFT 16 +#define SYS_PMSIDR_EL1_COUNTSIZE_MASK 0xfUL + +#define SYS_PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7) +#define SYS_PMBIDR_EL1_ALIGN_SHIFT 0 +#define SYS_PMBIDR_EL1_ALIGN_MASK 0xfU +#define SYS_PMBIDR_EL1_P_SHIFT 4 +#define SYS_PMBIDR_EL1_F_SHIFT 5 + +/* Sampling controls */ +#define SYS_PMSCR_EL1 sys_reg(3, 0, 9, 9, 0) +#define SYS_PMSCR_EL1_E0SPE_SHIFT 0 +#define SYS_PMSCR_EL1_E1SPE_SHIFT 1 +#define SYS_PMSCR_EL1_CX_SHIFT 3 +#define SYS_PMSCR_EL1_PA_SHIFT 4 +#define SYS_PMSCR_EL1_TS_SHIFT 5 +#define SYS_PMSCR_EL1_PCT_SHIFT 6 + +#define SYS_PMSCR_EL2 sys_reg(3, 4, 9, 9, 0) +#define SYS_PMSCR_EL2_E0HSPE_SHIFT 0 +#define SYS_PMSCR_EL2_E2SPE_SHIFT 1 +#define SYS_PMSCR_EL2_CX_SHIFT 3 +#define SYS_PMSCR_EL2_PA_SHIFT 4 +#define SYS_PMSCR_EL2_TS_SHIFT 5 +#define SYS_PMSCR_EL2_PCT_SHIFT 6 + +#define SYS_PMSICR_EL1 sys_reg(3, 0, 9, 9, 2) + +#define SYS_PMSIRR_EL1 sys_reg(3, 0, 9, 9, 3) +#define SYS_PMSIRR_EL1_RND_SHIFT 0 +#define SYS_PMSIRR_EL1_INTERVAL_SHIFT 8 +#define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL + +/* Filtering controls */ +#define SYS_PMSNEVFR_EL1 sys_reg(3, 0, 9, 9, 1) + +#define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4) +#define SYS_PMSFCR_EL1_FE_SHIFT 0 +#define SYS_PMSFCR_EL1_FT_SHIFT 1 +#define SYS_PMSFCR_EL1_FL_SHIFT 2 +#define SYS_PMSFCR_EL1_B_SHIFT 16 +#define SYS_PMSFCR_EL1_LD_SHIFT 17 +#define SYS_PMSFCR_EL1_ST_SHIFT 18 + +#define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5) +#define SYS_PMSEVFR_EL1_RES0_8_2 \ + (GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\ + BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0)) +#define SYS_PMSEVFR_EL1_RES0_8_3 \ + (SYS_PMSEVFR_EL1_RES0_8_2 & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11))) + +#define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6) +#define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0 + +/* Buffer controls */ +#define SYS_PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0) +#define SYS_PMBLIMITR_EL1_E_SHIFT 0 +#define SYS_PMBLIMITR_EL1_FM_SHIFT 1 +#define SYS_PMBLIMITR_EL1_FM_MASK 0x3UL +#define SYS_PMBLIMITR_EL1_FM_STOP_IRQ (0 << SYS_PMBLIMITR_EL1_FM_SHIFT) + +#define SYS_PMBPTR_EL1 sys_reg(3, 0, 9, 10, 1) + +/* Buffer error reporting */ +#define SYS_PMBSR_EL1 sys_reg(3, 0, 9, 10, 3) +#define SYS_PMBSR_EL1_COLL_SHIFT 16 +#define SYS_PMBSR_EL1_S_SHIFT 17 +#define SYS_PMBSR_EL1_EA_SHIFT 18 +#define SYS_PMBSR_EL1_DL_SHIFT 19 +#define SYS_PMBSR_EL1_EC_SHIFT 26 +#define SYS_PMBSR_EL1_EC_MASK 0x3fUL + +#define SYS_PMBSR_EL1_EC_BUF (0x0UL << SYS_PMBSR_EL1_EC_SHIFT) +#define SYS_PMBSR_EL1_EC_FAULT_S1 (0x24UL << SYS_PMBSR_EL1_EC_SHIFT) +#define SYS_PMBSR_EL1_EC_FAULT_S2 (0x25UL << SYS_PMBSR_EL1_EC_SHIFT) + +#define SYS_PMBSR_EL1_FAULT_FSC_SHIFT 0 +#define SYS_PMBSR_EL1_FAULT_FSC_MASK 0x3fUL + +#define SYS_PMBSR_EL1_BUF_BSC_SHIFT 0 +#define SYS_PMBSR_EL1_BUF_BSC_MASK 0x3fUL + +#define SYS_PMBSR_EL1_BUF_BSC_FULL (0x1UL << SYS_PMBSR_EL1_BUF_BSC_SHIFT) + +/*** End of Statistical Profiling Extension ***/ + +/* + * TRBE Registers + */ +#define SYS_TRBLIMITR_EL1 sys_reg(3, 0, 9, 11, 0) +#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1) +#define SYS_TRBBASER_EL1 sys_reg(3, 0, 9, 11, 2) +#define SYS_TRBSR_EL1 sys_reg(3, 0, 9, 11, 3) +#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4) +#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6) +#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7) + +#define TRBLIMITR_LIMIT_MASK GENMASK_ULL(51, 0) +#define TRBLIMITR_LIMIT_SHIFT 12 +#define TRBLIMITR_NVM BIT(5) +#define TRBLIMITR_TRIG_MODE_MASK GENMASK(1, 0) +#define TRBLIMITR_TRIG_MODE_SHIFT 3 +#define TRBLIMITR_FILL_MODE_MASK GENMASK(1, 0) +#define TRBLIMITR_FILL_MODE_SHIFT 1 +#define TRBLIMITR_ENABLE BIT(0) +#define TRBPTR_PTR_MASK GENMASK_ULL(63, 0) +#define TRBPTR_PTR_SHIFT 0 +#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0) +#define TRBBASER_BASE_SHIFT 12 +#define TRBSR_EC_MASK GENMASK(5, 0) +#define TRBSR_EC_SHIFT 26 +#define TRBSR_IRQ BIT(22) +#define TRBSR_TRG BIT(21) +#define TRBSR_WRAP BIT(20) +#define TRBSR_ABORT BIT(18) +#define TRBSR_STOP BIT(17) +#define TRBSR_MSS_MASK GENMASK(15, 0) +#define TRBSR_MSS_SHIFT 0 +#define TRBSR_BSC_MASK GENMASK(5, 0) +#define TRBSR_BSC_SHIFT 0 +#define TRBSR_FSC_MASK GENMASK(5, 0) +#define TRBSR_FSC_SHIFT 0 +#define TRBMAR_SHARE_MASK GENMASK(1, 0) +#define TRBMAR_SHARE_SHIFT 8 +#define TRBMAR_OUTER_MASK GENMASK(3, 0) +#define TRBMAR_OUTER_SHIFT 4 +#define TRBMAR_INNER_MASK GENMASK(3, 0) +#define TRBMAR_INNER_SHIFT 0 +#define TRBTRG_TRG_MASK GENMASK(31, 0) +#define TRBTRG_TRG_SHIFT 0 +#define TRBIDR_FLAG BIT(5) +#define TRBIDR_PROG BIT(4) +#define TRBIDR_ALIGN_MASK GENMASK(3, 0) +#define TRBIDR_ALIGN_SHIFT 0 + +#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) +#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) + +#define SYS_PMMIR_EL1 sys_reg(3, 0, 9, 14, 6) + +#define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) +#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) + +#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0) +#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1) +#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2) +#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3) +#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7) + +#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) +#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) + +#define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0) +#define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1) +#define SYS_ICC_HPPIR0_EL1 sys_reg(3, 0, 12, 8, 2) +#define SYS_ICC_BPR0_EL1 sys_reg(3, 0, 12, 8, 3) +#define SYS_ICC_AP0Rn_EL1(n) sys_reg(3, 0, 12, 8, 4 | n) +#define SYS_ICC_AP0R0_EL1 SYS_ICC_AP0Rn_EL1(0) +#define SYS_ICC_AP0R1_EL1 SYS_ICC_AP0Rn_EL1(1) +#define SYS_ICC_AP0R2_EL1 SYS_ICC_AP0Rn_EL1(2) +#define SYS_ICC_AP0R3_EL1 SYS_ICC_AP0Rn_EL1(3) +#define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n) +#define SYS_ICC_AP1R0_EL1 SYS_ICC_AP1Rn_EL1(0) +#define SYS_ICC_AP1R1_EL1 SYS_ICC_AP1Rn_EL1(1) +#define SYS_ICC_AP1R2_EL1 SYS_ICC_AP1Rn_EL1(2) +#define SYS_ICC_AP1R3_EL1 SYS_ICC_AP1Rn_EL1(3) +#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) +#define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3) +#define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) +#define SYS_ICC_ASGI1R_EL1 sys_reg(3, 0, 12, 11, 6) +#define SYS_ICC_SGI0R_EL1 sys_reg(3, 0, 12, 11, 7) +#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) +#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2) +#define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) +#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) +#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) +#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) +#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) + +#define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) +#define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) + +#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7) + +#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) + +#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) +#define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) +#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4) +#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) + +#define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0) + +#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) +#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) + +#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) +#define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1) + +#define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0) +#define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1) +#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2) +#define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3) +#define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4) +#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5) +#define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6) +#define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7) +#define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0) +#define SYS_PMXEVTYPER_EL0 sys_reg(3, 3, 9, 13, 1) +#define SYS_PMXEVCNTR_EL0 sys_reg(3, 3, 9, 13, 2) +#define SYS_PMUSERENR_EL0 sys_reg(3, 3, 9, 14, 0) +#define SYS_PMOVSSET_EL0 sys_reg(3, 3, 9, 14, 3) + +#define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) +#define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) + +#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) + +/* Definitions for system register interface to AMU for ARMv8.4 onwards */ +#define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2)) +#define SYS_AMCR_EL0 SYS_AM_EL0(2, 0) +#define SYS_AMCFGR_EL0 SYS_AM_EL0(2, 1) +#define SYS_AMCGCR_EL0 SYS_AM_EL0(2, 2) +#define SYS_AMUSERENR_EL0 SYS_AM_EL0(2, 3) +#define SYS_AMCNTENCLR0_EL0 SYS_AM_EL0(2, 4) +#define SYS_AMCNTENSET0_EL0 SYS_AM_EL0(2, 5) +#define SYS_AMCNTENCLR1_EL0 SYS_AM_EL0(3, 0) +#define SYS_AMCNTENSET1_EL0 SYS_AM_EL0(3, 1) + +/* + * Group 0 of activity monitors (architected): + * op0 op1 CRn CRm op2 + * Counter: 11 011 1101 010:n<3> n<2:0> + * Type: 11 011 1101 011:n<3> n<2:0> + * n: 0-15 + * + * Group 1 of activity monitors (auxiliary): + * op0 op1 CRn CRm op2 + * Counter: 11 011 1101 110:n<3> n<2:0> + * Type: 11 011 1101 111:n<3> n<2:0> + * n: 0-15 + */ + +#define SYS_AMEVCNTR0_EL0(n) SYS_AM_EL0(4 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPER0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7) +#define SYS_AMEVCNTR1_EL0(n) SYS_AM_EL0(12 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPER1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7) + +/* AMU v1: Fixed (architecturally defined) activity monitors */ +#define SYS_AMEVCNTR0_CORE_EL0 SYS_AMEVCNTR0_EL0(0) +#define SYS_AMEVCNTR0_CONST_EL0 SYS_AMEVCNTR0_EL0(1) +#define SYS_AMEVCNTR0_INST_RET_EL0 SYS_AMEVCNTR0_EL0(2) +#define SYS_AMEVCNTR0_MEM_STALL SYS_AMEVCNTR0_EL0(3) + +#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) + +#define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0) +#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) +#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) + +#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1) +#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2) + +#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) +#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) +#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) + +#define __PMEV_op2(n) ((n) & 0x7) +#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) +#define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) +#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3)) +#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n)) + +#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) + +#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) +#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) +#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) +#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) +#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) +#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) +#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) +#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) +#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) +#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) +#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) +#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) +#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) +#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) +#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) +#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) +#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0) +#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) + +#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) +#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) +#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0) +#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1) +#define SYS_ICH_AP0R2_EL2 __SYS__AP0Rx_EL2(2) +#define SYS_ICH_AP0R3_EL2 __SYS__AP0Rx_EL2(3) + +#define __SYS__AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) +#define SYS_ICH_AP1R0_EL2 __SYS__AP1Rx_EL2(0) +#define SYS_ICH_AP1R1_EL2 __SYS__AP1Rx_EL2(1) +#define SYS_ICH_AP1R2_EL2 __SYS__AP1Rx_EL2(2) +#define SYS_ICH_AP1R3_EL2 __SYS__AP1Rx_EL2(3) + +#define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) +#define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) +#define SYS_ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) +#define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) +#define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) +#define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) +#define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) + +#define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) +#define SYS_ICH_LR0_EL2 __SYS__LR0_EL2(0) +#define SYS_ICH_LR1_EL2 __SYS__LR0_EL2(1) +#define SYS_ICH_LR2_EL2 __SYS__LR0_EL2(2) +#define SYS_ICH_LR3_EL2 __SYS__LR0_EL2(3) +#define SYS_ICH_LR4_EL2 __SYS__LR0_EL2(4) +#define SYS_ICH_LR5_EL2 __SYS__LR0_EL2(5) +#define SYS_ICH_LR6_EL2 __SYS__LR0_EL2(6) +#define SYS_ICH_LR7_EL2 __SYS__LR0_EL2(7) + +#define __SYS__LR8_EL2(x) sys_reg(3, 4, 12, 13, x) +#define SYS_ICH_LR8_EL2 __SYS__LR8_EL2(0) +#define SYS_ICH_LR9_EL2 __SYS__LR8_EL2(1) +#define SYS_ICH_LR10_EL2 __SYS__LR8_EL2(2) +#define SYS_ICH_LR11_EL2 __SYS__LR8_EL2(3) +#define SYS_ICH_LR12_EL2 __SYS__LR8_EL2(4) +#define SYS_ICH_LR13_EL2 __SYS__LR8_EL2(5) +#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6) +#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) + +/* VHE encodings for architectural EL0/1 system registers */ +#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) +#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) +#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) +#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) +#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) +#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) +#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) +#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) +#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) +#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) +#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) +#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) +#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) +#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) +#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) +#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) +#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) +#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) +#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) +#define SYS_CNTP_CVAL_EL02 sys_reg(3, 5, 14, 2, 2) +#define SYS_CNTV_TVAL_EL02 sys_reg(3, 5, 14, 3, 0) +#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1) +#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) + +/* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_DSSBS (BIT(44)) +#define SCTLR_ELx_ATA (BIT(43)) + +#define SCTLR_ELx_TCF_SHIFT 40 +#define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) + +#define SCTLR_ELx_ENIA_SHIFT 31 + +#define SCTLR_ELx_ITFSB (BIT(37)) +#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT)) +#define SCTLR_ELx_ENIB (BIT(30)) +#define SCTLR_ELx_ENDA (BIT(27)) +#define SCTLR_ELx_EE (BIT(25)) +#define SCTLR_ELx_IESB (BIT(21)) +#define SCTLR_ELx_WXN (BIT(19)) +#define SCTLR_ELx_ENDB (BIT(13)) +#define SCTLR_ELx_I (BIT(12)) +#define SCTLR_ELx_SA (BIT(3)) +#define SCTLR_ELx_C (BIT(2)) +#define SCTLR_ELx_A (BIT(1)) +#define SCTLR_ELx_M (BIT(0)) + +/* SCTLR_EL2 specific flags. */ +#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \ + (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \ + (BIT(29))) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL2 SCTLR_ELx_EE +#else +#define ENDIAN_SET_EL2 0 +#endif + +#define INIT_SCTLR_EL2_MMU_ON \ + (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \ + SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \ + SCTLR_ELx_ITFSB | SCTLR_EL2_RES1) + +#define INIT_SCTLR_EL2_MMU_OFF \ + (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) + +/* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_EPAN (BIT(57)) +#define SCTLR_EL1_ATA0 (BIT(42)) + +#define SCTLR_EL1_TCF0_SHIFT 38 +#define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) + +#define SCTLR_EL1_BT1 (BIT(36)) +#define SCTLR_EL1_BT0 (BIT(35)) +#define SCTLR_EL1_UCI (BIT(26)) +#define SCTLR_EL1_E0E (BIT(24)) +#define SCTLR_EL1_SPAN (BIT(23)) +#define SCTLR_EL1_NTWE (BIT(18)) +#define SCTLR_EL1_NTWI (BIT(16)) +#define SCTLR_EL1_UCT (BIT(15)) +#define SCTLR_EL1_DZE (BIT(14)) +#define SCTLR_EL1_UMA (BIT(9)) +#define SCTLR_EL1_SED (BIT(8)) +#define SCTLR_EL1_ITD (BIT(7)) +#define SCTLR_EL1_CP15BEN (BIT(5)) +#define SCTLR_EL1_SA0 (BIT(4)) + +#define SCTLR_EL1_RES1 ((BIT(11)) | (BIT(20)) | (BIT(22)) | (BIT(28)) | \ + (BIT(29))) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) +#else +#define ENDIAN_SET_EL1 0 +#endif + +#define INIT_SCTLR_EL1_MMU_OFF \ + (ENDIAN_SET_EL1 | SCTLR_EL1_RES1) + +#define INIT_SCTLR_EL1_MMU_ON \ + (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_EL1_SA0 | \ + SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ + SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ + SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) + +/* MAIR_ELx memory attributes (used by Linux) */ +#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) +#define MAIR_ATTR_DEVICE_nGnRE UL(0x04) +#define MAIR_ATTR_NORMAL_NC UL(0x44) +#define MAIR_ATTR_NORMAL_TAGGED UL(0xf0) +#define MAIR_ATTR_NORMAL UL(0xff) +#define MAIR_ATTR_MASK UL(0xff) + +/* Position the attr at the correct index */ +#define MAIR_ATTRIDX(attr, idx) ((attr) << ((idx) * 8)) + +/* id_aa64isar0 */ +#define ID_AA64ISAR0_RNDR_SHIFT 60 +#define ID_AA64ISAR0_TLB_SHIFT 56 +#define ID_AA64ISAR0_TS_SHIFT 52 +#define ID_AA64ISAR0_FHM_SHIFT 48 +#define ID_AA64ISAR0_DP_SHIFT 44 +#define ID_AA64ISAR0_SM4_SHIFT 40 +#define ID_AA64ISAR0_SM3_SHIFT 36 +#define ID_AA64ISAR0_SHA3_SHIFT 32 +#define ID_AA64ISAR0_RDM_SHIFT 28 +#define ID_AA64ISAR0_ATOMICS_SHIFT 20 +#define ID_AA64ISAR0_CRC32_SHIFT 16 +#define ID_AA64ISAR0_SHA2_SHIFT 12 +#define ID_AA64ISAR0_SHA1_SHIFT 8 +#define ID_AA64ISAR0_AES_SHIFT 4 + +#define ID_AA64ISAR0_TLB_RANGE_NI 0x0 +#define ID_AA64ISAR0_TLB_RANGE 0x2 + +/* id_aa64isar1 */ +#define ID_AA64ISAR1_I8MM_SHIFT 52 +#define ID_AA64ISAR1_DGH_SHIFT 48 +#define ID_AA64ISAR1_BF16_SHIFT 44 +#define ID_AA64ISAR1_SPECRES_SHIFT 40 +#define ID_AA64ISAR1_SB_SHIFT 36 +#define ID_AA64ISAR1_FRINTTS_SHIFT 32 +#define ID_AA64ISAR1_GPI_SHIFT 28 +#define ID_AA64ISAR1_GPA_SHIFT 24 +#define ID_AA64ISAR1_LRCPC_SHIFT 20 +#define ID_AA64ISAR1_FCMA_SHIFT 16 +#define ID_AA64ISAR1_JSCVT_SHIFT 12 +#define ID_AA64ISAR1_API_SHIFT 8 +#define ID_AA64ISAR1_APA_SHIFT 4 +#define ID_AA64ISAR1_DPB_SHIFT 0 + +#define ID_AA64ISAR1_APA_NI 0x0 +#define ID_AA64ISAR1_APA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_APA_ARCH_EPAC 0x2 +#define ID_AA64ISAR1_APA_ARCH_EPAC2 0x3 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_API_NI 0x0 +#define ID_AA64ISAR1_API_IMP_DEF 0x1 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC 0x2 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2 0x3 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC 0x4 +#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB 0x5 +#define ID_AA64ISAR1_GPA_NI 0x0 +#define ID_AA64ISAR1_GPA_ARCHITECTED 0x1 +#define ID_AA64ISAR1_GPI_NI 0x0 +#define ID_AA64ISAR1_GPI_IMP_DEF 0x1 + +/* id_aa64pfr0 */ +#define ID_AA64PFR0_CSV3_SHIFT 60 +#define ID_AA64PFR0_CSV2_SHIFT 56 +#define ID_AA64PFR0_DIT_SHIFT 48 +#define ID_AA64PFR0_AMU_SHIFT 44 +#define ID_AA64PFR0_MPAM_SHIFT 40 +#define ID_AA64PFR0_SEL2_SHIFT 36 +#define ID_AA64PFR0_SVE_SHIFT 32 +#define ID_AA64PFR0_RAS_SHIFT 28 +#define ID_AA64PFR0_GIC_SHIFT 24 +#define ID_AA64PFR0_ASIMD_SHIFT 20 +#define ID_AA64PFR0_FP_SHIFT 16 +#define ID_AA64PFR0_EL3_SHIFT 12 +#define ID_AA64PFR0_EL2_SHIFT 8 +#define ID_AA64PFR0_EL1_SHIFT 4 +#define ID_AA64PFR0_EL0_SHIFT 0 + +#define ID_AA64PFR0_AMU 0x1 +#define ID_AA64PFR0_SVE 0x1 +#define ID_AA64PFR0_RAS_V1 0x1 +#define ID_AA64PFR0_RAS_V1P1 0x2 +#define ID_AA64PFR0_FP_NI 0xf +#define ID_AA64PFR0_FP_SUPPORTED 0x0 +#define ID_AA64PFR0_ASIMD_NI 0xf +#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 +#define ID_AA64PFR0_ELx_64BIT_ONLY 0x1 +#define ID_AA64PFR0_ELx_32BIT_64BIT 0x2 + +/* id_aa64pfr1 */ +#define ID_AA64PFR1_MPAMFRAC_SHIFT 16 +#define ID_AA64PFR1_RASFRAC_SHIFT 12 +#define ID_AA64PFR1_MTE_SHIFT 8 +#define ID_AA64PFR1_SSBS_SHIFT 4 +#define ID_AA64PFR1_BT_SHIFT 0 + +#define ID_AA64PFR1_SSBS_PSTATE_NI 0 +#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 +#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 +#define ID_AA64PFR1_BT_BTI 0x1 + +#define ID_AA64PFR1_MTE_NI 0x0 +#define ID_AA64PFR1_MTE_EL0 0x1 +#define ID_AA64PFR1_MTE 0x2 + +/* id_aa64zfr0 */ +#define ID_AA64ZFR0_F64MM_SHIFT 56 +#define ID_AA64ZFR0_F32MM_SHIFT 52 +#define ID_AA64ZFR0_I8MM_SHIFT 44 +#define ID_AA64ZFR0_SM4_SHIFT 40 +#define ID_AA64ZFR0_SHA3_SHIFT 32 +#define ID_AA64ZFR0_BF16_SHIFT 20 +#define ID_AA64ZFR0_BITPERM_SHIFT 16 +#define ID_AA64ZFR0_AES_SHIFT 4 +#define ID_AA64ZFR0_SVEVER_SHIFT 0 + +#define ID_AA64ZFR0_F64MM 0x1 +#define ID_AA64ZFR0_F32MM 0x1 +#define ID_AA64ZFR0_I8MM 0x1 +#define ID_AA64ZFR0_BF16 0x1 +#define ID_AA64ZFR0_SM4 0x1 +#define ID_AA64ZFR0_SHA3 0x1 +#define ID_AA64ZFR0_BITPERM 0x1 +#define ID_AA64ZFR0_AES 0x1 +#define ID_AA64ZFR0_AES_PMULL 0x2 +#define ID_AA64ZFR0_SVEVER_SVE2 0x1 + +/* id_aa64mmfr0 */ +#define ID_AA64MMFR0_ECV_SHIFT 60 +#define ID_AA64MMFR0_FGT_SHIFT 56 +#define ID_AA64MMFR0_EXS_SHIFT 44 +#define ID_AA64MMFR0_TGRAN4_2_SHIFT 40 +#define ID_AA64MMFR0_TGRAN64_2_SHIFT 36 +#define ID_AA64MMFR0_TGRAN16_2_SHIFT 32 +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 +#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_SNSMEM_SHIFT 12 +#define ID_AA64MMFR0_BIGENDEL_SHIFT 8 +#define ID_AA64MMFR0_ASID_SHIFT 4 +#define ID_AA64MMFR0_PARANGE_SHIFT 0 + +#define ID_AA64MMFR0_ASID_8 0x0 +#define ID_AA64MMFR0_ASID_16 0x2 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN 0x0 +#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX 0x7 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN 0x1 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX 0xf + +#define ID_AA64MMFR0_PARANGE_32 0x0 +#define ID_AA64MMFR0_PARANGE_36 0x1 +#define ID_AA64MMFR0_PARANGE_40 0x2 +#define ID_AA64MMFR0_PARANGE_42 0x3 +#define ID_AA64MMFR0_PARANGE_44 0x4 +#define ID_AA64MMFR0_PARANGE_48 0x5 +#define ID_AA64MMFR0_PARANGE_52 0x6 + +#define ARM64_MIN_PARANGE_BITS 32 + +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2 +#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX 0x7 + +#ifdef CONFIG_ARM64_PA_BITS_52 +#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52 +#else +#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48 +#endif + +/* id_aa64mmfr1 */ +#define ID_AA64MMFR1_ETS_SHIFT 36 +#define ID_AA64MMFR1_TWED_SHIFT 32 +#define ID_AA64MMFR1_XNX_SHIFT 28 +#define ID_AA64MMFR1_SPECSEI_SHIFT 24 +#define ID_AA64MMFR1_PAN_SHIFT 20 +#define ID_AA64MMFR1_LOR_SHIFT 16 +#define ID_AA64MMFR1_HPD_SHIFT 12 +#define ID_AA64MMFR1_VHE_SHIFT 8 +#define ID_AA64MMFR1_VMIDBITS_SHIFT 4 +#define ID_AA64MMFR1_HADBS_SHIFT 0 + +#define ID_AA64MMFR1_VMIDBITS_8 0 +#define ID_AA64MMFR1_VMIDBITS_16 2 + +/* id_aa64mmfr2 */ +#define ID_AA64MMFR2_E0PD_SHIFT 60 +#define ID_AA64MMFR2_EVT_SHIFT 56 +#define ID_AA64MMFR2_BBM_SHIFT 52 +#define ID_AA64MMFR2_TTL_SHIFT 48 +#define ID_AA64MMFR2_FWB_SHIFT 40 +#define ID_AA64MMFR2_IDS_SHIFT 36 +#define ID_AA64MMFR2_AT_SHIFT 32 +#define ID_AA64MMFR2_ST_SHIFT 28 +#define ID_AA64MMFR2_NV_SHIFT 24 +#define ID_AA64MMFR2_CCIDX_SHIFT 20 +#define ID_AA64MMFR2_LVA_SHIFT 16 +#define ID_AA64MMFR2_IESB_SHIFT 12 +#define ID_AA64MMFR2_LSM_SHIFT 8 +#define ID_AA64MMFR2_UAO_SHIFT 4 +#define ID_AA64MMFR2_CNP_SHIFT 0 + +/* id_aa64dfr0 */ +#define ID_AA64DFR0_MTPMU_SHIFT 48 +#define ID_AA64DFR0_TRBE_SHIFT 44 +#define ID_AA64DFR0_TRACE_FILT_SHIFT 40 +#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36 +#define ID_AA64DFR0_PMSVER_SHIFT 32 +#define ID_AA64DFR0_CTX_CMPS_SHIFT 28 +#define ID_AA64DFR0_WRPS_SHIFT 20 +#define ID_AA64DFR0_BRPS_SHIFT 12 +#define ID_AA64DFR0_PMUVER_SHIFT 8 +#define ID_AA64DFR0_TRACEVER_SHIFT 4 +#define ID_AA64DFR0_DEBUGVER_SHIFT 0 + +#define ID_AA64DFR0_PMUVER_8_0 0x1 +#define ID_AA64DFR0_PMUVER_8_1 0x4 +#define ID_AA64DFR0_PMUVER_8_4 0x5 +#define ID_AA64DFR0_PMUVER_8_5 0x6 +#define ID_AA64DFR0_PMUVER_IMP_DEF 0xf + +#define ID_AA64DFR0_PMSVER_8_2 0x1 +#define ID_AA64DFR0_PMSVER_8_3 0x2 + +#define ID_DFR0_PERFMON_SHIFT 24 + +#define ID_DFR0_PERFMON_8_0 0x3 +#define ID_DFR0_PERFMON_8_1 0x4 +#define ID_DFR0_PERFMON_8_4 0x5 +#define ID_DFR0_PERFMON_8_5 0x6 + +#define ID_ISAR4_SWP_FRAC_SHIFT 28 +#define ID_ISAR4_PSR_M_SHIFT 24 +#define ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT 20 +#define ID_ISAR4_BARRIER_SHIFT 16 +#define ID_ISAR4_SMC_SHIFT 12 +#define ID_ISAR4_WRITEBACK_SHIFT 8 +#define ID_ISAR4_WITHSHIFTS_SHIFT 4 +#define ID_ISAR4_UNPRIV_SHIFT 0 + +#define ID_DFR1_MTPMU_SHIFT 0 + +#define ID_ISAR0_DIVIDE_SHIFT 24 +#define ID_ISAR0_DEBUG_SHIFT 20 +#define ID_ISAR0_COPROC_SHIFT 16 +#define ID_ISAR0_CMPBRANCH_SHIFT 12 +#define ID_ISAR0_BITFIELD_SHIFT 8 +#define ID_ISAR0_BITCOUNT_SHIFT 4 +#define ID_ISAR0_SWAP_SHIFT 0 + +#define ID_ISAR5_RDM_SHIFT 24 +#define ID_ISAR5_CRC32_SHIFT 16 +#define ID_ISAR5_SHA2_SHIFT 12 +#define ID_ISAR5_SHA1_SHIFT 8 +#define ID_ISAR5_AES_SHIFT 4 +#define ID_ISAR5_SEVL_SHIFT 0 + +#define ID_ISAR6_I8MM_SHIFT 24 +#define ID_ISAR6_BF16_SHIFT 20 +#define ID_ISAR6_SPECRES_SHIFT 16 +#define ID_ISAR6_SB_SHIFT 12 +#define ID_ISAR6_FHM_SHIFT 8 +#define ID_ISAR6_DP_SHIFT 4 +#define ID_ISAR6_JSCVT_SHIFT 0 + +#define ID_MMFR0_INNERSHR_SHIFT 28 +#define ID_MMFR0_FCSE_SHIFT 24 +#define ID_MMFR0_AUXREG_SHIFT 20 +#define ID_MMFR0_TCM_SHIFT 16 +#define ID_MMFR0_SHARELVL_SHIFT 12 +#define ID_MMFR0_OUTERSHR_SHIFT 8 +#define ID_MMFR0_PMSA_SHIFT 4 +#define ID_MMFR0_VMSA_SHIFT 0 + +#define ID_MMFR4_EVT_SHIFT 28 +#define ID_MMFR4_CCIDX_SHIFT 24 +#define ID_MMFR4_LSM_SHIFT 20 +#define ID_MMFR4_HPDS_SHIFT 16 +#define ID_MMFR4_CNP_SHIFT 12 +#define ID_MMFR4_XNX_SHIFT 8 +#define ID_MMFR4_AC2_SHIFT 4 +#define ID_MMFR4_SPECSEI_SHIFT 0 + +#define ID_MMFR5_ETS_SHIFT 0 + +#define ID_PFR0_DIT_SHIFT 24 +#define ID_PFR0_CSV2_SHIFT 16 +#define ID_PFR0_STATE3_SHIFT 12 +#define ID_PFR0_STATE2_SHIFT 8 +#define ID_PFR0_STATE1_SHIFT 4 +#define ID_PFR0_STATE0_SHIFT 0 + +#define ID_DFR0_PERFMON_SHIFT 24 +#define ID_DFR0_MPROFDBG_SHIFT 20 +#define ID_DFR0_MMAPTRC_SHIFT 16 +#define ID_DFR0_COPTRC_SHIFT 12 +#define ID_DFR0_MMAPDBG_SHIFT 8 +#define ID_DFR0_COPSDBG_SHIFT 4 +#define ID_DFR0_COPDBG_SHIFT 0 + +#define ID_PFR2_SSBS_SHIFT 4 +#define ID_PFR2_CSV3_SHIFT 0 + +#define MVFR0_FPROUND_SHIFT 28 +#define MVFR0_FPSHVEC_SHIFT 24 +#define MVFR0_FPSQRT_SHIFT 20 +#define MVFR0_FPDIVIDE_SHIFT 16 +#define MVFR0_FPTRAP_SHIFT 12 +#define MVFR0_FPDP_SHIFT 8 +#define MVFR0_FPSP_SHIFT 4 +#define MVFR0_SIMD_SHIFT 0 + +#define MVFR1_SIMDFMAC_SHIFT 28 +#define MVFR1_FPHP_SHIFT 24 +#define MVFR1_SIMDHP_SHIFT 20 +#define MVFR1_SIMDSP_SHIFT 16 +#define MVFR1_SIMDINT_SHIFT 12 +#define MVFR1_SIMDLS_SHIFT 8 +#define MVFR1_FPDNAN_SHIFT 4 +#define MVFR1_FPFTZ_SHIFT 0 + +#define ID_PFR1_GIC_SHIFT 28 +#define ID_PFR1_VIRT_FRAC_SHIFT 24 +#define ID_PFR1_SEC_FRAC_SHIFT 20 +#define ID_PFR1_GENTIMER_SHIFT 16 +#define ID_PFR1_VIRTUALIZATION_SHIFT 12 +#define ID_PFR1_MPROGMOD_SHIFT 8 +#define ID_PFR1_SECURITY_SHIFT 4 +#define ID_PFR1_PROGMOD_SHIFT 0 + +#if defined(CONFIG_ARM64_4K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX +#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX +#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT +#elif defined(CONFIG_ARM64_64K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN +#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX +#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT +#endif + +#define MVFR2_FPMISC_SHIFT 4 +#define MVFR2_SIMDMISC_SHIFT 0 + +#define DCZID_DZP_SHIFT 4 +#define DCZID_BS_SHIFT 0 + +/* + * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which + * are reserved by the SVE architecture for future expansion of the LEN + * field, with compatible semantics. + */ +#define ZCR_ELx_LEN_SHIFT 0 +#define ZCR_ELx_LEN_SIZE 9 +#define ZCR_ELx_LEN_MASK 0x1ff + +#define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ +#define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ +#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) + +/* TCR EL1 Bit Definitions */ +#define SYS_TCR_EL1_TCMA1 (BIT(58)) +#define SYS_TCR_EL1_TCMA0 (BIT(57)) + +/* GCR_EL1 Definitions */ +#define SYS_GCR_EL1_RRND (BIT(16)) +#define SYS_GCR_EL1_EXCL_MASK 0xffffUL + +/* RGSR_EL1 Definitions */ +#define SYS_RGSR_EL1_TAG_MASK 0xfUL +#define SYS_RGSR_EL1_SEED_SHIFT 8 +#define SYS_RGSR_EL1_SEED_MASK 0xffffUL + +/* GMID_EL1 field definitions */ +#define SYS_GMID_EL1_BS_SHIFT 0 +#define SYS_GMID_EL1_BS_SIZE 4 + +/* TFSR{,E0}_EL1 bit definitions */ +#define SYS_TFSR_EL1_TF0_SHIFT 0 +#define SYS_TFSR_EL1_TF1_SHIFT 1 +#define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT) +#define SYS_TFSR_EL1_TF1 (UL(1) << SYS_TFSR_EL1_TF1_SHIFT) + +/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ +#define SYS_MPIDR_SAFE_VAL (BIT(31)) + +#define TRFCR_ELx_TS_SHIFT 5 +#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT) +#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT) +#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT) +#define TRFCR_EL2_CX BIT(3) +#define TRFCR_ELx_ExTRE BIT(1) +#define TRFCR_ELx_E0TRE BIT(0) + + +/* GIC Hypervisor interface registers */ +/* ICH_MISR_EL2 bit definitions */ +#define ICH_MISR_EOI (1 << 0) +#define ICH_MISR_U (1 << 1) + +/* ICH_LR*_EL2 bit definitions */ +#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) + +#define ICH_LR_EOI (1ULL << 41) +#define ICH_LR_GROUP (1ULL << 60) +#define ICH_LR_HW (1ULL << 61) +#define ICH_LR_STATE (3ULL << 62) +#define ICH_LR_PENDING_BIT (1ULL << 62) +#define ICH_LR_ACTIVE_BIT (1ULL << 63) +#define ICH_LR_PHYS_ID_SHIFT 32 +#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) +#define ICH_LR_PRIORITY_SHIFT 48 +#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) + +/* ICH_HCR_EL2 bit definitions */ +#define ICH_HCR_EN (1 << 0) +#define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_NPIE (1 << 3) +#define ICH_HCR_TC (1 << 10) +#define ICH_HCR_TALL0 (1 << 11) +#define ICH_HCR_TALL1 (1 << 12) +#define ICH_HCR_EOIcount_SHIFT 27 +#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) + +/* ICH_VMCR_EL2 bit definitions */ +#define ICH_VMCR_ACK_CTL_SHIFT 2 +#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) +#define ICH_VMCR_FIQ_EN_SHIFT 3 +#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) +#define ICH_VMCR_CBPR_SHIFT 4 +#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) +#define ICH_VMCR_EOIM_SHIFT 9 +#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) +#define ICH_VMCR_BPR1_SHIFT 18 +#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) +#define ICH_VMCR_BPR0_SHIFT 21 +#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) +#define ICH_VMCR_PMR_SHIFT 24 +#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) +#define ICH_VMCR_ENG0_SHIFT 0 +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) +#define ICH_VMCR_ENG1_SHIFT 1 +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) + +/* ICH_VTR_EL2 bit definitions */ +#define ICH_VTR_PRI_BITS_SHIFT 29 +#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) +#define ICH_VTR_ID_BITS_SHIFT 23 +#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) +#define ICH_VTR_SEIS_SHIFT 22 +#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) +#define ICH_VTR_A3V_SHIFT 21 +#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) + +#define ARM64_FEATURE_FIELD_BITS 4 + +/* Create a mask for the feature bits of the specified feature. */ +#define ARM64_FEATURE_MASK(x) (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT)) + +#ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + .equ .L__reg_num_x\num, \num + .endr + .equ .L__reg_num_xzr, 31 + + .macro mrs_s, rt, sreg + __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt)) + .endm + + .macro msr_s, sreg, rt + __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt)) + .endm + +#else + +#include +#include +#include + +#define __DEFINE_MRS_MSR_S_REGNUM \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ +" .equ .L__reg_num_x\\num, \\num\n" \ +" .endr\n" \ +" .equ .L__reg_num_xzr, 31\n" + +#define DEFINE_MRS_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro mrs_s, rt, sreg\n" \ + __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \ +" .endm\n" + +#define DEFINE_MSR_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro msr_s, sreg, rt\n" \ + __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \ +" .endm\n" + +#define UNDEFINE_MRS_S \ +" .purgem mrs_s\n" + +#define UNDEFINE_MSR_S \ +" .purgem msr_s\n" + +#define __mrs_s(v, r) \ + DEFINE_MRS_S \ +" mrs_s " v ", " __stringify(r) "\n" \ + UNDEFINE_MRS_S + +#define __msr_s(r, v) \ + DEFINE_MSR_S \ +" msr_s " __stringify(r) ", " v "\n" \ + UNDEFINE_MSR_S + +/* + * Unlike read_cpuid, calls to read_sysreg are never expected to be + * optimized away or replaced with synthetic values. + */ +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +/* + * The "Z" constraint normally means a zero immediate, but when combined with + * the "%x0" template means XZR. + */ +#define write_sysreg(v, r) do { \ + u64 __val = (u64)(v); \ + asm volatile("msr " __stringify(r) ", %x0" \ + : : "rZ" (__val)); \ +} while (0) + +/* + * For registers without architectural names, or simply unsupported by + * GAS. + */ +#define read_sysreg_s(r) ({ \ + u64 __val; \ + asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg_s(v, r) do { \ + u64 __val = (u64)(v); \ + asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ +} while (0) + +/* + * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the + * set mask are set. Other bits are left as-is. + */ +#define sysreg_clear_set(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg(__scs_new, sysreg); \ +} while (0) + +#define sysreg_clear_set_s(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg_s(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg_s(__scs_new, sysreg); \ +} while (0) + +#define read_sysreg_par() ({ \ + u64 par; \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ + par = read_sysreg(par_el1); \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ + par; \ +}) + +#endif + +#endif /* __ASM_SYSREG_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index b7fa0c8551db..68d4e4c74ecf 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -8,6 +8,7 @@ #define SELFTEST_KVM_PROCESSOR_H #include "kvm_util.h" +#include #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ -- Gitee From 36569bdea1beb7f5a000e7a6efc32e3971d63193 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 24 Mar 2022 18:33:20 +0000 Subject: [PATCH 145/173] tools arm64: Import cputype.h [Upstream commit 1314376d495f2d79cc58753ff3034ccc503c43c9] Bring-in the kernel's arch/arm64/include/asm/cputype.h into tools/ for arm64 to make use of all the core-type definitions in perf. Replace sysreg.h with the version already imported into tools/. Committer notes: Added an entry to tools/perf/check-headers.sh, so that we get notified when the original file in the kernel sources gets modified. Tester notes: LGTM. I did the testing on both my x86 and Arm64 platforms, thanks for the fixing up. Signed-off-by: Ali Saidi Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Benjamin Herrenschmidt Cc: German Gomez Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Li Huafei Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick.Forrington@arm.com Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220324183323.31414-2-alisaidi@amazon.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/arch/arm64/include/asm/cputype.h | 258 +++++++++++++++++++++++++ tools/arch/arm64/include/asm/sysreg.h | 2 +- tools/perf/check-headers.sh | 1 + 3 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 tools/arch/arm64/include/asm/cputype.h diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h new file mode 100644 index 000000000000..7e738fc6e3db --- /dev/null +++ b/tools/arch/arm64/include/asm/cputype.h @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 ARM Ltd. + */ +#ifndef __ASM_CPUTYPE_H +#define __ASM_CPUTYPE_H + +#define INVALID_HWID ULONG_MAX + +#define MPIDR_UP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24) +#define MPIDR_HWID_BITMASK UL(0xff00ffffff) + +#define MPIDR_LEVEL_BITS_SHIFT 3 +#define MPIDR_LEVEL_BITS (1 << MPIDR_LEVEL_BITS_SHIFT) +#define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1) + +#define MPIDR_LEVEL_SHIFT(level) \ + (((1 << level) >> 1) << MPIDR_LEVEL_BITS_SHIFT) + +#define MPIDR_AFFINITY_LEVEL(mpidr, level) \ + ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK) + +#define MIDR_REVISION_MASK 0xf +#define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK) +#define MIDR_PARTNUM_SHIFT 4 +#define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT) +#define MIDR_PARTNUM(midr) \ + (((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT) +#define MIDR_ARCHITECTURE_SHIFT 16 +#define MIDR_ARCHITECTURE_MASK (0xf << MIDR_ARCHITECTURE_SHIFT) +#define MIDR_ARCHITECTURE(midr) \ + (((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT) +#define MIDR_VARIANT_SHIFT 20 +#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) +#define MIDR_VARIANT(midr) \ + (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT) +#define MIDR_IMPLEMENTOR_SHIFT 24 +#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_IMPLEMENTOR(midr) \ + (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) + +#define MIDR_CPU_MODEL(imp, partnum) \ + (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + (0xf << MIDR_ARCHITECTURE_SHIFT) | \ + ((partnum) << MIDR_PARTNUM_SHIFT)) + +#define MIDR_CPU_VAR_REV(var, rev) \ + (((var) << MIDR_VARIANT_SHIFT) | (rev)) + +#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ + MIDR_ARCHITECTURE_MASK) + +#define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_APM 0x50 +#define ARM_CPU_IMP_CAVIUM 0x43 +#define ARM_CPU_IMP_BRCM 0x42 +#define ARM_CPU_IMP_QCOM 0x51 +#define ARM_CPU_IMP_NVIDIA 0x4E +#define ARM_CPU_IMP_FUJITSU 0x46 +#define ARM_CPU_IMP_HISI 0x48 +#define ARM_CPU_IMP_APPLE 0x61 + +#define ARM_CPU_PART_AEM_V8 0xD0F +#define ARM_CPU_PART_FOUNDATION 0xD00 +#define ARM_CPU_PART_CORTEX_A57 0xD07 +#define ARM_CPU_PART_CORTEX_A72 0xD08 +#define ARM_CPU_PART_CORTEX_A53 0xD03 +#define ARM_CPU_PART_CORTEX_A73 0xD09 +#define ARM_CPU_PART_CORTEX_A75 0xD0A +#define ARM_CPU_PART_CORTEX_A35 0xD04 +#define ARM_CPU_PART_CORTEX_A55 0xD05 +#define ARM_CPU_PART_CORTEX_A76 0xD0B +#define ARM_CPU_PART_NEOVERSE_N1 0xD0C +#define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_NEOVERSE_V1 0xD40 +#define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_X1 0xD44 +#define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_X2 0xD48 +#define ARM_CPU_PART_NEOVERSE_N2 0xD49 +#define ARM_CPU_PART_CORTEX_A78C 0xD4B + +#define APM_CPU_PART_POTENZA 0x000 + +#define CAVIUM_CPU_PART_THUNDERX 0x0A1 +#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 +#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 +#define CAVIUM_CPU_PART_THUNDERX2 0x0AF +/* OcteonTx2 series */ +#define CAVIUM_CPU_PART_OCTX2_98XX 0x0B1 +#define CAVIUM_CPU_PART_OCTX2_96XX 0x0B2 +#define CAVIUM_CPU_PART_OCTX2_95XX 0x0B3 +#define CAVIUM_CPU_PART_OCTX2_95XXN 0x0B4 +#define CAVIUM_CPU_PART_OCTX2_95XXMM 0x0B5 +#define CAVIUM_CPU_PART_OCTX2_95XXO 0x0B6 + +#define BRCM_CPU_PART_BRAHMA_B53 0x100 +#define BRCM_CPU_PART_VULCAN 0x516 + +#define QCOM_CPU_PART_FALKOR_V1 0x800 +#define QCOM_CPU_PART_FALKOR 0xC00 +#define QCOM_CPU_PART_KRYO 0x200 +#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 +#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 +#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 +#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 +#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 + +#define NVIDIA_CPU_PART_DENVER 0x003 +#define NVIDIA_CPU_PART_CARMEL 0x004 + +#define FUJITSU_CPU_PART_A64FX 0x001 + +#define HISI_CPU_PART_TSV110 0xD01 + +#define APPLE_CPU_PART_M1_ICESTORM 0x022 +#define APPLE_CPU_PART_M1_FIRESTORM 0x023 + +#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) +#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) +#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) +#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) +#define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) +#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) +#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) +#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) +#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) +#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) +#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) +#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) +#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) +#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) +#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX) +#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX) +#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX) +#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN) +#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM) +#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO) +#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) +#define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53) +#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) +#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) +#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) +#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) +#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) +#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) +#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) +#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) +#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) +#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) +#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) +#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) +#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) + +/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ +#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX +#define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0)) +#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0) + +#ifndef __ASSEMBLY__ + +#include "asm/sysreg.h" + +#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) + +/* + * Represent a range of MIDR values for a given CPU model and a + * range of variant/revision values. + * + * @model - CPU model as defined by MIDR_CPU_MODEL + * @rv_min - Minimum value for the revision/variant as defined by + * MIDR_CPU_VAR_REV + * @rv_max - Maximum value for the variant/revision for the range. + */ +struct midr_range { + u32 model; + u32 rv_min; + u32 rv_max; +}; + +#define MIDR_RANGE(m, v_min, r_min, v_max, r_max) \ + { \ + .model = m, \ + .rv_min = MIDR_CPU_VAR_REV(v_min, r_min), \ + .rv_max = MIDR_CPU_VAR_REV(v_max, r_max), \ + } + +#define MIDR_REV_RANGE(m, v, r_min, r_max) MIDR_RANGE(m, v, r_min, v, r_max) +#define MIDR_REV(m, v, r) MIDR_RANGE(m, v, r, v, r) +#define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf) + +static inline bool midr_is_cpu_model_range(u32 midr, u32 model, u32 rv_min, + u32 rv_max) +{ + u32 _model = midr & MIDR_CPU_MODEL_MASK; + u32 rv = midr & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); + + return _model == model && rv >= rv_min && rv <= rv_max; +} + +static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) +{ + return midr_is_cpu_model_range(midr, range->model, + range->rv_min, range->rv_max); +} + +static inline bool +is_midr_in_range_list(u32 midr, struct midr_range const *ranges) +{ + while (ranges->model) + if (is_midr_in_range(midr, ranges++)) + return true; + return false; +} + +/* + * The CPU ID never changes at run time, so we might as well tell the + * compiler that it's constant. Use this function to read the CPU ID + * rather than directly reading processor_id or read_cpuid() directly. + */ +static inline u32 __attribute_const__ read_cpuid_id(void) +{ + return read_cpuid(MIDR_EL1); +} + +static inline u64 __attribute_const__ read_cpuid_mpidr(void) +{ + return read_cpuid(MPIDR_EL1); +} + +static inline unsigned int __attribute_const__ read_cpuid_implementor(void) +{ + return MIDR_IMPLEMENTOR(read_cpuid_id()); +} + +static inline unsigned int __attribute_const__ read_cpuid_part_number(void) +{ + return MIDR_PARTNUM(read_cpuid_id()); +} + +static inline u32 __attribute_const__ read_cpuid_cachetype(void) +{ + return read_cpuid(CTR_EL0); +} +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index 7640fa27be94..41b811514374 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -1194,7 +1194,7 @@ #include #include -#include +#include #define __DEFINE_MRS_MSR_S_REGNUM \ " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 0472da3741ed..795e5702e77e 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -114,6 +114,7 @@ done check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' +check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' -- Gitee From 99ca1bd7cdc5a06dfbca457225027cece5848062 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 11 Aug 2022 14:24:39 +0800 Subject: [PATCH 146/173] perf arm-spe: Use SPE data source for neoverse cores [Upstream commit 4e6430cbb1a9f1dc0a698f93026b6178da437798] When synthesizing data from SPE, augment the type with source information for Arm Neoverse cores. The field is IMPLDEF but the Neoverse cores all use the same encoding. I can't find encoding information for any other SPE implementations to unify their choices with Arm's thus that is left for future work. This change populates the mem_lvl_num for Neoverse cores as well as the deprecated mem_lvl namespace. Reviewed-by: German Gomez Reviewed-by: Leo Yan Signed-off-by: Ali Saidi Tested-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Gustavo A. R. Silva Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Like Xu Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Timothy Hayes Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220811062451.435810-4-leo.yan@linaro.org Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../util/arm-spe-decoder/arm-spe-decoder.c | 1 + .../util/arm-spe-decoder/arm-spe-decoder.h | 12 ++ tools/perf/util/arm-spe.c | 126 ++++++++++++++++-- 3 files changed, 125 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 3fc528c9270c..3e3693447715 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -218,6 +218,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) break; case ARM_SPE_DATA_SOURCE: + decoder->record.source = payload; break; case ARM_SPE_BAD: break; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 46a8556a9e95..c3943eb95e30 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -29,6 +29,17 @@ enum arm_spe_op_type { ARM_SPE_ST = 1 << 1, }; +enum arm_spe_neoverse_data_source { + ARM_SPE_NV_L1D = 0x0, + ARM_SPE_NV_L2 = 0x8, + ARM_SPE_NV_PEER_CORE = 0x9, + ARM_SPE_NV_LOCAL_CLUSTER = 0xa, + ARM_SPE_NV_SYS_CACHE = 0xb, + ARM_SPE_NV_PEER_CLUSTER = 0xc, + ARM_SPE_NV_REMOTE = 0xd, + ARM_SPE_NV_DRAM = 0xe, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; @@ -39,6 +50,7 @@ struct arm_spe_record { u64 virt_addr; u64 phys_addr; u64 context_id; + u16 source; }; struct arm_spe_insn; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index fccac06b573a..f93a8f18c8f9 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -34,6 +34,7 @@ #include "arm-spe-decoder/arm-spe-decoder.h" #include "arm-spe-decoder/arm-spe-pkt-decoder.h" +#include "../../arch/arm64/include/asm/cputype.h" #define MAX_TIMESTAMP (~0ULL) struct arm_spe { @@ -45,6 +46,7 @@ struct arm_spe { struct perf_session *session; struct machine *machine; u32 pmu_type; + u64 midr; struct perf_tsc_conversion tc; @@ -363,33 +365,126 @@ static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) return false; } -static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) +static const struct midr_range neoverse_spe[] = { + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, +}; + +static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { - union perf_mem_data_src data_src = { 0 }; + /* + * Even though four levels of cache hierarchy are possible, no known + * production Neoverse systems currently include more than three levels + * so for the time being we assume three exist. If a production system + * is built with four the this function would have to be changed to + * detect the number of levels for reporting. + */ - if (record->op == ARM_SPE_LD) - data_src.mem_op = PERF_MEM_OP_LOAD; - else - data_src.mem_op = PERF_MEM_OP_STORE; + /* + * We have no data on the hit level or data source for stores in the + * Neoverse SPE records. + */ + if (record->op & ARM_SPE_ST) { + data_src->mem_lvl = PERF_MEM_LVL_NA; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; + data_src->mem_snoop = PERF_MEM_SNOOP_NA; + return; + } + + switch (record->source) { + case ARM_SPE_NV_L1D: + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_NV_L2: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_NV_PEER_CORE: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + /* + * We don't know if this is L1, L2 but we do know it was a cache-2-cache + * transfer, so set SNOOPX_PEER + */ + case ARM_SPE_NV_LOCAL_CLUSTER: + case ARM_SPE_NV_PEER_CLUSTER: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + /* + * System cache is assumed to be L3 + */ + case ARM_SPE_NV_SYS_CACHE: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + break; + /* + * We don't know what level it hit in, except it came from the other + * socket + */ + case ARM_SPE_NV_REMOTE: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_NV_DRAM: + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + default: + break; + } +} +static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { - data_src.mem_lvl = PERF_MEM_LVL_L3; + data_src->mem_lvl = PERF_MEM_LVL_L3; if (record->type & ARM_SPE_LLC_MISS) - data_src.mem_lvl |= PERF_MEM_LVL_MISS; + data_src->mem_lvl |= PERF_MEM_LVL_MISS; else - data_src.mem_lvl |= PERF_MEM_LVL_HIT; + data_src->mem_lvl |= PERF_MEM_LVL_HIT; } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { - data_src.mem_lvl = PERF_MEM_LVL_L1; + data_src->mem_lvl = PERF_MEM_LVL_L1; if (record->type & ARM_SPE_L1D_MISS) - data_src.mem_lvl |= PERF_MEM_LVL_MISS; + data_src->mem_lvl |= PERF_MEM_LVL_MISS; else - data_src.mem_lvl |= PERF_MEM_LVL_HIT; + data_src->mem_lvl |= PERF_MEM_LVL_HIT; } if (record->type & ARM_SPE_REMOTE_ACCESS) - data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1; + data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; +} + +static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) +{ + union perf_mem_data_src data_src = { 0 }; + bool is_neoverse = is_midr_in_range(midr, neoverse_spe); + + if (record->op == ARM_SPE_LD) + data_src.mem_op = PERF_MEM_OP_LOAD; + else + data_src.mem_op = PERF_MEM_OP_STORE; + + if (is_neoverse) + arm_spe__synth_data_source_neoverse(record, &data_src); + else + arm_spe__synth_data_source_generic(record, &data_src); if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { data_src.mem_dtlb = PERF_MEM_TLB_WK; @@ -410,7 +505,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq) u64 data_src; int err; - data_src = arm_spe__synth_data_source(record); + data_src = arm_spe__synth_data_source(record, spe->midr); if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { @@ -1118,6 +1213,8 @@ int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; struct perf_record_time_conv *tc = &session->time_conv; + const char *cpuid = perf_env__cpuid(session->evlist->env); + u64 midr = strtol(cpuid, NULL, 16); struct arm_spe *spe; int err; @@ -1137,6 +1234,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + spe->midr = midr; spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); -- Gitee From 5d3f3a33ab970d8914e4c584d7afe8808e85f2ab Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Mon, 26 Sep 2022 21:03:16 +0800 Subject: [PATCH 147/173] perf arm-spe: augment the data source type with neoverse_spe list [Upstream commit 74a61d53a6d1ca1172d85964d15c83c2cc3670b3] When synthesizing event with SPE data source, commit 4e6430cbb1a9("perf arm-spe: Use SPE data source for neoverse cores") augment the type with source information by MIDR. However, is_midr_in_range only compares the first entry in neoverse_spe. Change is_midr_in_range to is_midr_in_range_list to traverse the neoverse_spe array so that all neoverse cores synthesize event with data source packet. Fixes: 4e6430cbb1a9f1dc ("perf arm-spe: Use SPE data source for neoverse cores") Reviewed-by: Ali Saidi Reviewed-by: Leo Yan Signed-off-by: Jing Zhang Cc: Alexander Shishkin Cc: Ali Saidi Cc: German Gomez Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shuai Xue Cc: Timothy Hayes Cc: Will Deacon Cc: Zhuo Song Link: https://lore.kernel.org/r/1664197396-42672-1-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/arm-spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index f93a8f18c8f9..a1d3fa93a679 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -474,7 +474,7 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) { union perf_mem_data_src data_src = { 0 }; - bool is_neoverse = is_midr_in_range(midr, neoverse_spe); + bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); if (record->op == ARM_SPE_LD) data_src.mem_op = PERF_MEM_OP_LOAD; -- Gitee From 124665d1f1c2dc2e8a8fa418d0aecf5945e8c099 Mon Sep 17 00:00:00 2001 From: German Gomez Date: Mon, 20 Mar 2023 15:15:06 +0000 Subject: [PATCH 148/173] perf arm-spe: Refactor arm-spe to support operation packet type [Upstream commit 0066015a3d8f9c01a17eb04579edba7dac9510af] Extend the decoder of Arm SPE records to support more fields from the operation packet type. Not all fields are being decoded by this commit. Only those needed to support the use-case SVE load/store/other operations. Suggested-by: Leo Yan Signed-off-by: German Gomez Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Anshuman.Khandual@arm.com Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230320151509.1137462-2-james.clark@arm.com Signed-off-by: James Clark Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- .../util/arm-spe-decoder/arm-spe-decoder.c | 30 ++++++++++-- .../util/arm-spe-decoder/arm-spe-decoder.h | 47 +++++++++++++++---- tools/perf/util/arm-spe.c | 10 ++-- 3 files changed, 69 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 3e3693447715..3b937e89654f 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -184,11 +184,27 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.context_id = payload; break; case ARM_SPE_OP_TYPE: - if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) { - if (payload & 0x1) - decoder->record.op = ARM_SPE_ST; + switch (idx) { + case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC: + decoder->record.op |= ARM_SPE_OP_LDST; + if (payload & SPE_OP_PKT_ST) + decoder->record.op |= ARM_SPE_OP_ST; else - decoder->record.op = ARM_SPE_LD; + decoder->record.op |= ARM_SPE_OP_LD; + if (SPE_OP_PKT_IS_LDST_SVE(payload)) + decoder->record.op |= ARM_SPE_OP_SVE_LDST; + break; + case SPE_OP_PKT_HDR_CLASS_OTHER: + decoder->record.op |= ARM_SPE_OP_OTHER; + if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) + decoder->record.op |= ARM_SPE_OP_SVE_OTHER; + break; + case SPE_OP_PKT_HDR_CLASS_BR_ERET: + decoder->record.op |= ARM_SPE_OP_BRANCH_ERET; + break; + default: + pr_err("Get packet error!\n"); + return -1; } break; case ARM_SPE_EVENTS: @@ -216,6 +232,12 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) if (payload & BIT(EV_MISPRED)) decoder->record.type |= ARM_SPE_BRANCH_MISS; + if (payload & BIT(EV_PARTIAL_PREDICATE)) + decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED; + + if (payload & BIT(EV_EMPTY_PREDICATE)) + decoder->record.type |= ARM_SPE_SVE_EMPTY_PRED; + break; case ARM_SPE_DATA_SOURCE: decoder->record.source = payload; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index c3943eb95e30..fa269c9c53b3 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -14,19 +14,46 @@ #include "arm-spe-pkt-decoder.h" enum arm_spe_sample_type { - ARM_SPE_L1D_ACCESS = 1 << 0, - ARM_SPE_L1D_MISS = 1 << 1, - ARM_SPE_LLC_ACCESS = 1 << 2, - ARM_SPE_LLC_MISS = 1 << 3, - ARM_SPE_TLB_ACCESS = 1 << 4, - ARM_SPE_TLB_MISS = 1 << 5, - ARM_SPE_BRANCH_MISS = 1 << 6, - ARM_SPE_REMOTE_ACCESS = 1 << 7, + ARM_SPE_L1D_ACCESS = 1 << 0, + ARM_SPE_L1D_MISS = 1 << 1, + ARM_SPE_LLC_ACCESS = 1 << 2, + ARM_SPE_LLC_MISS = 1 << 3, + ARM_SPE_TLB_ACCESS = 1 << 4, + ARM_SPE_TLB_MISS = 1 << 5, + ARM_SPE_BRANCH_MISS = 1 << 6, + ARM_SPE_REMOTE_ACCESS = 1 << 7, + ARM_SPE_SVE_PARTIAL_PRED = 1 << 8, + ARM_SPE_SVE_EMPTY_PRED = 1 << 9, }; enum arm_spe_op_type { - ARM_SPE_LD = 1 << 0, - ARM_SPE_ST = 1 << 1, + /* First level operation type */ + ARM_SPE_OP_OTHER = 1 << 0, + ARM_SPE_OP_LDST = 1 << 1, + ARM_SPE_OP_BRANCH_ERET = 1 << 2, + + /* Second level operation type for OTHER */ + ARM_SPE_OP_SVE_OTHER = 1 << 16, + ARM_SPE_OP_SVE_FP = 1 << 17, + ARM_SPE_OP_SVE_PRED_OTHER = 1 << 18, + + /* Second level operation type for LDST */ + ARM_SPE_OP_LD = 1 << 16, + ARM_SPE_OP_ST = 1 << 17, + ARM_SPE_OP_ATOMIC = 1 << 18, + ARM_SPE_OP_EXCL = 1 << 19, + ARM_SPE_OP_AR = 1 << 20, + ARM_SPE_OP_SIMD_FP = 1 << 21, + ARM_SPE_OP_GP_REG = 1 << 22, + ARM_SPE_OP_UNSPEC_REG = 1 << 23, + ARM_SPE_OP_NV_SYSREG = 1 << 24, + ARM_SPE_OP_SVE_LDST = 1 << 25, + ARM_SPE_OP_SVE_PRED_LDST = 1 << 26, + ARM_SPE_OP_SVE_SG = 1 << 27, + + /* Second level operation type for BRANCH_ERET */ + ARM_SPE_OP_BR_COND = 1 << 16, + ARM_SPE_OP_BR_INDIRECT = 1 << 17, }; enum arm_spe_neoverse_data_source { diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index a1d3fa93a679..4073d656dfeb 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -387,7 +387,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We have no data on the hit level or data source for stores in the * Neoverse SPE records. */ - if (record->op & ARM_SPE_ST) { + if (record->op & ARM_SPE_OP_ST) { data_src->mem_lvl = PERF_MEM_LVL_NA; data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; data_src->mem_snoop = PERF_MEM_SNOOP_NA; @@ -473,13 +473,15 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) { - union perf_mem_data_src data_src = { 0 }; + union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); - if (record->op == ARM_SPE_LD) + if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; - else + else if (record->op & ARM_SPE_OP_ST) data_src.mem_op = PERF_MEM_OP_STORE; + else + return 0; if (is_neoverse) arm_spe__synth_data_source_neoverse(record, &data_src); -- Gitee From fac29e30f90cc89dcfa726947984c6f73ea5350e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 11 Aug 2022 19:06:38 -0300 Subject: [PATCH 149/173] perf arm64: Add missing -I for tools/arch/arm64/include/ to find asm/sysreg.h when building arm_spe.h [Upstream commit 4a88c4ec3c2c6743cc4424b51e66a0c034afe507] This cures a current problem where tools/perf/util/arm-spe.c isn't finding a ARM64 specific asm header, so lets add it for now to make progress. Adding a .o specific rule seems clunky, lets try and find if this is really the right solution. Signed-off-by: Leo Yan Reported-by: Suzuki K Poulose Tested-by: Arnaldo Carvalho de Melo Cc: Catalin Marinas Cc: Anshuman Khandual Cc: Will Deacon Cc: James Morse Link: https://lore.kernel.org/lkml/20220811124825.GA868014@leoy-huanghe.lan Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/Build | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index df4219f7615a..6122b79a498c 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -224,6 +224,7 @@ CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_parse-events.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE +CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE $(call rule_mkdir) -- Gitee From 219486d23187ebdcad57cb88e28e682061e4ab9d Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 4 Jul 2024 16:56:46 +0800 Subject: [PATCH 150/173] arm64: arm_pmuv3: Correctly extract and check the PMUVer [Upstream commit b782e8d07baac95a5ce3f8773cc61f4ed7d0ccbc] Currently we're using "sbfx" to extract the PMUVer from ID_AA64DFR0_EL1 and skip the init/reset if no PMU present when the extracted PMUVer is negative or is zero. However for PMUv3p8 the PMUVer will be 0b1000 and PMUVer extracted by "sbfx" will always be negative and we'll skip the init/reset in __init_el2_debug/reset_pmuserenr_el0 unexpectedly. So this patch use "ubfx" instead of "sbfx" to extract the PMUVer. If the PMUVer is implementation defined (0b1111) or not implemented(0b0000) then skip the reset/init. Previously we'll also skip the init/reset if the PMUVer is higher than the version we known (currently PMUv3p9), with this patch we'll only skip if the PMU is not implemented or implementation defined. This keeps consistence with how we probe the PMU in the driver with pmuv3_implemented(). Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20240411123030.7201-1-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/include/asm/assembler.h | 7 ++++--- arch/arm64/include/asm/sysreg.h | 2 +- arch/arm64/kernel/head.S | 9 +++++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 9c0c22540b1c..b08e442c6a94 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -501,9 +501,10 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f + ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4 + cmp \tmpreg, #ID_AA64DFR0_PMUVER_NI + ccmp \tmpreg, #ID_AA64DFR0_PMUVER_IMP_DEF, #4, ne + b.eq 9000f msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7683351db8a9..7f19a4786d5e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1250,7 +1250,7 @@ #define ID_AA64DFR0_EL1_BRBE_NI 0x0 #define ID_AA64DFR0_EL1_BRBE_IMP 0x1 #define ID_AA64DFR0_EL1_BRBE_BRBE_V1P1 0x2 - +#define ID_AA64DFR0_PMUVER_NI 0x0 #define ID_AA64DFR0_PMUVER_8_0 0x1 #define ID_AA64DFR0_PMUVER_8_1 0x4 #define ID_AA64DFR0_PMUVER_8_4 0x5 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index dfbbd20afd01..90d31516901e 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -621,13 +621,14 @@ set_hcr: /* EL2 debug */ mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 - cmp x0, #1 - b.lt 4f // Skip if no PMU present + ubfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 + cmp x0, #ID_AA64DFR0_PMUVER_NI + ccmp x0, #ID_AA64DFR0_PMUVER_IMP_DEF, #4, ne + b.eq 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to 4: - csel x3, xzr, x0, lt // all PMU counters from EL1 + csel x3, xzr, x0, eq // all PMU counters from EL1 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 -- Gitee From 928c126b21e6f6a7542c11c2e1e48338829b2d04 Mon Sep 17 00:00:00 2001 From: Yushan Wang Date: Mon, 30 Sep 2024 16:43:09 +0800 Subject: [PATCH 151/173] perf cpumap: Wrapper for CPU map indices Linux mainline changed the type of perf CPU map into struct perf_cpu to avoid confuses of CPU maps from cpu. The change affects a lot of infrastructures of Perf tool which this patchset depends on to fix perf undesirably scrutinizing events on offline CPUs issue[1]. This patch, with the purpose of adapting of following patches from Linux mainline to openeuler, is a mere copy of the patch [2] which changes the type of perf CPU map with careful refaction of affected code. [1] https://lore.kernel.org/lkml/20240603092812.46616-1-yangyicong@huawei.com/ [2] https://lore.kernel.org/r/20220105061351.120843-49-irogers@google.com Signed-off-by: Yushan Wang Signed-off-by: huwentao --- tools/perf/bench/epoll-ctl.c | 2 +- tools/perf/bench/epoll-wait.c | 2 +- tools/perf/bench/futex-hash.c | 2 +- tools/perf/bench/futex-lock-pi.c | 2 +- tools/perf/bench/futex-requeue.c | 2 +- tools/perf/bench/futex-wake-parallel.c | 2 +- tools/perf/bench/futex-wake.c | 2 +- tools/perf/builtin-c2c.c | 6 ++-- tools/perf/builtin-script.c | 2 +- tools/perf/builtin-stat.c | 10 +++---- tools/perf/lib/cpumap.c | 26 ++++++++-------- tools/perf/lib/evsel.c | 2 +- tools/perf/lib/include/internal/cpumap.h | 3 +- tools/perf/lib/include/perf/cpumap.h | 5 ++++ tools/perf/tests/bitmap.c | 2 +- tools/perf/tests/cpumap.c | 6 ++-- tools/perf/tests/event_update.c | 6 ++-- tools/perf/tests/mem2node.c | 2 +- tools/perf/tests/mmap-basic.c | 4 +-- tools/perf/tests/openat-syscall-all-cpus.c | 14 ++++----- tools/perf/tests/topology.c | 4 +-- tools/perf/util/auxtrace.c | 2 +- tools/perf/util/cpumap.c | 30 +++++++++---------- tools/perf/util/cpumap.h | 2 +- tools/perf/util/cputopo.c | 2 +- tools/perf/util/evlist.c | 4 +-- tools/perf/util/evsel.c | 2 +- tools/perf/util/mmap.c | 2 +- tools/perf/util/perf_api_probe.c | 4 +-- tools/perf/util/record.c | 6 ++-- .../scripting-engines/trace-event-python.c | 2 +- tools/perf/util/session.c | 2 +- tools/perf/util/stat-display.c | 8 ++--- tools/perf/util/svghelper.c | 2 +- tools/perf/util/synthetic-events.c | 6 ++-- 35 files changed, 93 insertions(+), 87 deletions(-) diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index a7526c05df38..e0cd1622aa80 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -254,7 +254,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(cpu->map[i % cpu->nr].cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index d1c5cb526b9f..3fdfe814bc4b 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -343,7 +343,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(cpu->map[i % cpu->nr].cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 21776862e940..ca1251093924 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -169,7 +169,7 @@ int bench_futex_hash(int argc, const char **argv) goto errmem; CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(cpu->map[i % cpu->nr].cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index c54013806ba4..4ada1bdbb05f 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -134,7 +134,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, worker[i].futex = &global_futex; CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(cpu->map[i % cpu->nr].cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 11b7dbd37486..dedc279d3a2c 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -95,7 +95,7 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(cpu->map[i % cpu->nr].cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index c3033d0905db..84469cd7e914 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -149,7 +149,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr, /* create and block all threads */ for (i = 0; i < nblocked_threads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(cpu->map[i % cpu->nr].cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index ad44a78392dc..3d2ff55d6841 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -101,7 +101,7 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(cpu->map[i % cpu->nr].cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f1280115f039..be9353e10094 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2100,12 +2100,12 @@ static int setup_nodes(struct perf_session *session) continue; for (cpu = 0; cpu < map->nr; cpu++) { - set_bit(map->map[cpu], set); + set_bit(map->map[cpu].cpu, set); - if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug")) + if (WARN_ONCE(cpu2node[map->map[cpu].cpu] != -1, "node/cpu topology bug")) return -EINVAL; - cpu2node[map->map[cpu]] = node; + cpu2node[map->map[cpu].cpu] = node; } } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e67a534836a7..797a7a689dbc 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1943,7 +1943,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp) counts = perf_counts(counter->counts, cpu, thread); printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n", - counter->core.cpus->map[cpu], + counter->core.cpus->map[cpu].cpu, perf_thread_map__pid(counter->core.threads, thread), counts->val, counts->ena, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 748be8cf5a73..727b4eac6328 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -935,12 +935,12 @@ static int perf_stat__get_aggr(struct perf_stat_config *config, if (idx >= map->nr) return -1; - cpu = map->map[idx]; + cpu = map->map[idx].cpu; - if (config->cpus_aggr_map->map[cpu] == -1) - config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); + if (config->cpus_aggr_map->map[cpu].cpu == -1) + config->cpus_aggr_map->map[cpu].cpu = get_id(config, map, idx); - return config->cpus_aggr_map->map[cpu]; + return config->cpus_aggr_map->map[cpu].cpu; } static int perf_stat__get_socket_cached(struct perf_stat_config *config, @@ -1041,7 +1041,7 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *m if (idx > map->nr) return -1; - cpu = map->map[idx]; + cpu = map->map[idx].cpu; if (cpu >= env->nr_cpus_avail) return -1; diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c index 5e1ae7a23591..4a12d4368ad2 100644 --- a/tools/perf/lib/cpumap.c +++ b/tools/perf/lib/cpumap.c @@ -16,7 +16,7 @@ struct perf_cpu_map *perf_cpu_map__dummy_new(void) if (cpus != NULL) { cpus->nr = 1; - cpus->map[0] = -1; + cpus->map[0].cpu = -1; refcount_set(&cpus->refcnt, 1); } @@ -59,7 +59,7 @@ static struct perf_cpu_map *cpu_map__default_new(void) int i; for (i = 0; i < nr_cpus; ++i) - cpus->map[i] = i; + cpus->map[i].cpu = i; cpus->nr = nr_cpus; refcount_set(&cpus->refcnt, 1); @@ -234,7 +234,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) { if (idx < cpus->nr) - return cpus->map[idx]; + return cpus->map[idx].cpu; return -1; } @@ -246,7 +246,7 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus) bool perf_cpu_map__empty(const struct perf_cpu_map *map) { - return map ? map->map[0] == -1 : true; + return map ? map->map[0].cpu == -1 : true; } int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) @@ -254,7 +254,7 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) int i; for (i = 0; i < cpus->nr; ++i) { - if (cpus->map[i] == cpu) + if (cpus->map[i].cpu == cpu) return i; } @@ -266,8 +266,8 @@ int perf_cpu_map__max(struct perf_cpu_map *map) int i, max = -1; for (i = 0; i < map->nr; i++) { - if (map->map[i] > max) - max = map->map[i]; + if (map->map[i].cpu > max) + max = map->map[i].cpu; } return max; @@ -309,19 +309,19 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, /* Standard merge algorithm from wikipedia */ i = j = k = 0; while (i < orig->nr && j < other->nr) { - if (orig->map[i] <= other->map[j]) { - if (orig->map[i] == other->map[j]) + if (orig->map[i].cpu <= other->map[j].cpu) { + if (orig->map[i].cpu == other->map[j].cpu) j++; - tmp_cpus[k++] = orig->map[i++]; + tmp_cpus[k++] = orig->map[i++].cpu; } else - tmp_cpus[k++] = other->map[j++]; + tmp_cpus[k++] = other->map[j++].cpu; } while (i < orig->nr) - tmp_cpus[k++] = orig->map[i++]; + tmp_cpus[k++] = orig->map[i++].cpu; while (j < other->nr) - tmp_cpus[k++] = other->map[j++]; + tmp_cpus[k++] = other->map[j++].cpu; assert(k <= tmp_len); merged = cpu_map__trim_new(k, tmp_cpus); diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c index ea775dacbd2d..3aefe19b7378 100644 --- a/tools/perf/lib/evsel.c +++ b/tools/perf/lib/evsel.c @@ -102,7 +102,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, - cpus->map[cpu], -1, 0); + cpus->map[cpu].cpu, -1, 0); if (fd < 0) return -errno; diff --git a/tools/perf/lib/include/internal/cpumap.h b/tools/perf/lib/include/internal/cpumap.h index 840d4032587b..2d30168ee2fc 100644 --- a/tools/perf/lib/include/internal/cpumap.h +++ b/tools/perf/lib/include/internal/cpumap.h @@ -3,11 +3,12 @@ #define __LIBPERF_INTERNAL_CPUMAP_H #include +#include struct perf_cpu_map { refcount_t refcnt; int nr; - int map[]; + struct perf_cpu map[]; }; #ifndef MAX_NR_CPUS diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h index 6a17ad730cbc..101a0f2b7693 100644 --- a/tools/perf/lib/include/perf/cpumap.h +++ b/tools/perf/lib/include/perf/cpumap.h @@ -6,6 +6,11 @@ #include #include +/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ +struct perf_cpu { + int cpu; +}; + struct perf_cpu_map; LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 96c137360918..8717e03924dd 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -18,7 +18,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) if (map && bm) { for (i = 0; i < map->nr; i++) - set_bit(map->map[i], bm); + set_bit(map->map[i].cpu, bm); } if (map) diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 4ac56741ac5f..da19a72d8f7a 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -38,7 +38,7 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong nr", map->nr == 20); for (i = 0; i < 20; i++) { - TEST_ASSERT_VAL("wrong cpu", map->map[i] == i); + TEST_ASSERT_VAL("wrong cpu", map->map[i].cpu == i); } perf_cpu_map__put(map); @@ -67,8 +67,8 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, map = cpu_map__new_data(data); TEST_ASSERT_VAL("wrong nr", map->nr == 2); - TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1); - TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256); + TEST_ASSERT_VAL("wrong cpu", map->map[0].cpu == 1); + TEST_ASSERT_VAL("wrong cpu", map->map[1].cpu == 256); TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1); perf_cpu_map__put(map); return 0; diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 195b29797acc..cf8fa4f61bed 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -76,9 +76,9 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong id", ev->id == 123); TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS); TEST_ASSERT_VAL("wrong cpus", map->nr == 3); - TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1); - TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2); - TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3); + TEST_ASSERT_VAL("wrong cpus", map->map[0].cpu == 1); + TEST_ASSERT_VAL("wrong cpus", map->map[1].cpu == 2); + TEST_ASSERT_VAL("wrong cpus", map->map[2].cpu == 3); perf_cpu_map__put(map); return 0; } diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index a258bd51f1a4..96afd0e7599f 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -31,7 +31,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) if (map && bm) { for (i = 0; i < map->nr; i++) { - set_bit(map->map[i], bm); + set_bit(map->map[i].cpu, bm); } } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 3fcd6e56ecb3..58d83dc40773 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -59,11 +59,11 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse } CPU_ZERO(&cpu_set); - CPU_SET(cpus->map[0], &cpu_set); + CPU_SET(cpus->map[0].cpu, &cpu_set); sched_setaffinity(0, sizeof(cpu_set), &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf))); + cpus->map[0].cpu, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_free_cpus; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index e6013b99d2c7..3601c436493b 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -66,15 +66,15 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int * without CPU_ALLOC. 1024 cpus in 2010 still seems * a reasonable upper limit tho :-) */ - if (cpus->map[cpu] >= CPU_SETSIZE) { - pr_debug("Ignoring CPU %d\n", cpus->map[cpu]); + if (cpus->map[cpu].cpu >= CPU_SETSIZE) { + pr_debug("Ignoring CPU %d\n", cpus->map[cpu].cpu); continue; } - CPU_SET(cpus->map[cpu], &cpu_set); + CPU_SET(cpus->map[cpu].cpu, &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpus->map[cpu], + cpus->map[cpu].cpu, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_close_fd; } @@ -82,7 +82,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int fd = openat(0, "/etc/passwd", O_RDONLY); close(fd); } - CPU_CLR(cpus->map[cpu], &cpu_set); + CPU_CLR(cpus->map[cpu].cpu, &cpu_set); } /* @@ -100,7 +100,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int for (cpu = 0; cpu < cpus->nr; ++cpu) { unsigned int expected; - if (cpus->map[cpu] >= CPU_SETSIZE) + if (cpus->map[cpu].cpu >= CPU_SETSIZE) continue; if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { @@ -112,7 +112,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int expected = nr_openat_calls + cpu; if (perf_counts(evsel->counts, cpu, 0)->val != expected) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); + expected, cpus->map[cpu].cpu, perf_counts(evsel->counts, cpu, 0)->val); err = -1; } } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index c8af1310bd7c..a1a0e9284137 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -101,10 +101,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { TEST_ASSERT_VAL("Core ID doesn't match", - (session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff))); + (session->header.env.cpu[map->map[i].cpu].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff))); TEST_ASSERT_VAL("Socket ID doesn't match", - (session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i, NULL))); + (session->header.env.cpu[map->map[i].cpu].socket_id == cpu_map__get_socket(map, i, NULL))); } perf_session__delete(session); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 8c1156c8d14a..456a7e731c42 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -183,7 +183,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, mp->idx = idx; if (per_cpu) { - mp->cpu = evlist->core.cpus->map[idx]; + mp->cpu = evlist->core.cpus->map[idx].cpu; if (evlist->core.threads) mp->tid = perf_thread_map__pid(evlist->core.threads, 0); else diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 324ec0456c83..575f9cc6b07a 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -33,9 +33,9 @@ static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) * otherwise it would become 65535. */ if (cpus->cpu[i] == (u16) -1) - map->map[i] = -1; + map->map[i].cpu = -1; else - map->map[i] = (int) cpus->cpu[i]; + map->map[i].cpu = (int) cpus->cpu[i]; } } @@ -54,7 +54,7 @@ static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map int cpu, i = 0; for_each_set_bit(cpu, mask->mask, nbits) - map->map[i++] = cpu; + map->map[i++].cpu = cpu; } return map; @@ -87,7 +87,7 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i] = -1; + cpus->map[i].cpu = -1; refcount_set(&cpus->refcnt, 1); } @@ -118,7 +118,7 @@ int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_un if (idx > map->nr) return -1; - cpu = map->map[idx]; + cpu = map->map[idx].cpu; return cpu_map__get_socket_id(cpu); } @@ -144,11 +144,11 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res, for (cpu = 0; cpu < nr; cpu++) { s1 = f(cpus, cpu, data); for (s2 = 0; s2 < c->nr; s2++) { - if (s1 == c->map[s2]) + if (s1 == c->map[s2].cpu) break; } if (s2 == c->nr) { - c->map[c->nr] = s1; + c->map[c->nr].cpu = s1; c->nr++; } } @@ -174,7 +174,7 @@ int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) if (idx > map->nr) return -1; - cpu = map->map[idx]; + cpu = map->map[idx].cpu; die_id = cpu_map__get_die_id(cpu); /* There is no die_id on legacy system. */ @@ -213,7 +213,7 @@ int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) if (idx > map->nr) return -1; - cpu = map->map[idx]; + cpu = map->map[idx].cpu; cpu = cpu_map__get_core_id(cpu); @@ -464,7 +464,7 @@ bool cpu_map__has(struct perf_cpu_map *cpus, int cpu) int cpu_map__cpu(struct perf_cpu_map *cpus, int idx) { - return cpus->map[idx]; + return cpus->map[idx].cpu; } size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) @@ -478,26 +478,26 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) for (i = 0; i < map->nr + 1; i++) { bool last = i == map->nr; - cpu = last ? INT_MAX : map->map[i]; + cpu = last ? INT_MAX : map->map[i].cpu; if (start == -1) { start = i; if (last) { ret += snprintf(buf + ret, size - ret, "%s%d", COMMA, - map->map[i]); + map->map[i].cpu); } - } else if (((i - start) != (cpu - map->map[start])) || last) { + } else if (((i - start) != (cpu - map->map[start].cpu)) || last) { int end = i - 1; if (start == end) { ret += snprintf(buf + ret, size - ret, "%s%d", COMMA, - map->map[start]); + map->map[start].cpu); } else { ret += snprintf(buf + ret, size - ret, "%s%d-%d", COMMA, - map->map[start], map->map[end]); + map->map[start].cpu, map->map[end].cpu); } first = false; start = i; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index dbc1d7e949ed..9401e360f980 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -29,7 +29,7 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) { if (!sock || s > sock->nr || s < 0) return 0; - return sock->map[s]; + return sock->map[s].cpu; } static inline int cpu_map__id_to_socket(int id) diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index ec77e2a7b3ca..f0b54cafedb5 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -329,7 +329,7 @@ struct numa_topology *numa_topology__new(void) tp->nr = nr; for (i = 0; i < nr; i++) { - if (load_numa_node(&tp->nodes[i], node_map->map[i])) { + if (load_numa_node(&tp->nodes[i], node_map->map[i].cpu)) { numa_topology__delete(tp); tp = NULL; break; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 84c252d28eb6..a4dfa1162db9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -342,7 +342,7 @@ bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu) { if (ev->cpu_iter >= ev->core.cpus->nr) return true; - if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu) + if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter].cpu != cpu) return true; return false; } @@ -465,7 +465,7 @@ static void perf_evlist__set_sid_idx(struct evlist *evlist, struct perf_sample_id *sid = SID(evsel, cpu, thread); sid->idx = idx; if (evlist->core.cpus && cpu >= 0) - sid->cpu = evlist->core.cpus->map[cpu]; + sid->cpu = evlist->core.cpus->map[cpu].cpu; else sid->cpu = -1; if (!evsel->core.system_wide && evlist->core.threads && thread >= 0) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2a84926c3963..e09ce6f7ced0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1713,7 +1713,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, retry_open: test_attr__ready(); - fd = perf_event_open(evsel, pid, cpus->map[cpu], + fd = perf_event_open(evsel, pid, cpus->map[cpu].cpu, group_fd, flags); FD(evsel, cpu, thread) = fd; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 2a8bf0ab861c..d3a8e8c5464e 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -226,7 +226,7 @@ static void build_node_mask(int node, cpu_set_t *mask) nr_cpus = perf_cpu_map__nr(cpu_map); for (c = 0; c < nr_cpus; c++) { - cpu = cpu_map->map[c]; /* map c index to online cpu index */ + cpu = cpu_map->map[c].cpu; /* map c index to online cpu index */ if (cpu__get_node(cpu) == node) CPU_SET(cpu, mask); } diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 1337965673d7..5d30a86fc060 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -66,7 +66,7 @@ static bool perf_probe_api(setup_probe_fn_t fn) cpus = perf_cpu_map__new(NULL); if (!cpus) return false; - cpu = cpus->map[0]; + cpu = cpus->map[0].cpu; perf_cpu_map__put(cpus); do { @@ -121,7 +121,7 @@ bool perf_can_record_cpu_wide(void) cpus = perf_cpu_map__new(NULL); if (!cpus) return false; - cpu = cpus->map[0]; + cpu = cpus->map[0].cpu; perf_cpu_map__put(cpus); fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 24e3c867d3a7..b43d26bea509 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -29,7 +29,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, if (opts->group) perf_evlist__set_leader(evlist); - if (evlist->core.cpus->map[0] < 0) + if (evlist->core.cpus->map[0].cpu < 0) opts->no_inherit = true; use_comm_exec = perf_can_comm_exec(); @@ -159,10 +159,10 @@ bool perf_evlist__can_select_event(struct evlist *evlist, const char *str) if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) { struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); - cpu = cpus ? cpus->map[0] : 0; + cpu = cpus ? cpus->map[0].cpu : 0; perf_cpu_map__put(cpus); } else { - cpu = evlist->core.cpus->map[0]; + cpu = evlist->core.cpus->map[0].cpu; } while (1) { diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 2bdd10c4c246..8db3340c2805 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1406,7 +1406,7 @@ static void python_process_stat(struct perf_stat_config *config, for (thread = 0; thread < threads->nr; thread++) { for (cpu = 0; cpu < cpus->nr; cpu++) { - process_stat(counter, cpus->map[cpu], + process_stat(counter, cpus->map[cpu].cpu, perf_thread_map__pid(threads, thread), tstamp, perf_counts(counter->counts, cpu, thread)); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 9c746b845f3f..6666f80ea31d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2384,7 +2384,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, } for (i = 0; i < map->nr; i++) { - int cpu = map->map[i]; + int cpu = map->map[i].cpu; if (cpu >= nr_cpus) { pr_err("Requested CPU %d too large. " diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index e18c26501a7f..25f3694f29ba 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -110,7 +110,7 @@ static void aggr_printout(struct perf_stat_config *config, } else { fprintf(config->output, "CPU%*d%s ", config->csv_output ? 0 : -5, - evsel__cpus(evsel)->map[id], + evsel__cpus(evsel)->map[id].cpu, config->csv_sep); } break; @@ -323,7 +323,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, return 0; for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { - int cpu2 = evsel__cpus(evsel)->map[i]; + int cpu2 = evsel__cpus(evsel)->map[i].cpu; if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id) return cpu2; @@ -494,7 +494,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, struct evsel *counter; for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; + id = config->aggr_map->map[s].cpu; evlist__for_each_entry(evlist, counter) { val = 0; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { @@ -624,7 +624,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, int id, nr; double uval; - ad.id = id = config->aggr_map->map[s]; + ad.id = id = config->aggr_map->map[s].cpu; ad.val = ad.ena = ad.run = 0; ad.nr = 0; if (!collect_data(config, counter, aggr_cb, &ad)) diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 96f941e01681..660702eb4d79 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -735,7 +735,7 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) return -1; for (i = 0; i < m->nr; i++) { - c = m->map[i]; + c = m->map[i].cpu; if (c >= nr_cpus) { ret = -1; break; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index b18cf5d1af44..bdbc83ecd3e0 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -925,7 +925,7 @@ static void synthesize_cpus(struct cpu_map_entries *cpus, cpus->nr = map->nr; for (i = 0; i < map->nr; i++) - cpus->cpu[i] = map->map[i]; + cpus->cpu[i] = map->map[i].cpu; } static void synthesize_mask(struct perf_record_record_cpu_map *mask, @@ -937,7 +937,7 @@ static void synthesize_mask(struct perf_record_record_cpu_map *mask, mask->long_size = sizeof(long); for (i = 0; i < map->nr; i++) - set_bit(map->map[i], mask->mask); + set_bit(map->map[i].cpu, mask->mask); } static size_t cpus_size(struct perf_cpu_map *map) @@ -953,7 +953,7 @@ static size_t mask_size(struct perf_cpu_map *map, int *max) for (i = 0; i < map->nr; i++) { /* bit possition of the cpu is + 1 */ - int bit = map->map[i] + 1; + int bit = map->map[i].cpu + 1; if (bit > *max) *max = bit; -- Gitee From 5130a405d0097f9b897c9382f837185dcbbc394d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 28 Mar 2022 16:26:45 -0700 Subject: [PATCH 152/173] perf cpumap: Add is_subset function [Upstream commit c3ad8d23bc0e8cacd8ea2e6615b53c6a7811cb66] Returns true if the second argument is a subset of the first. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Alexey Bayduraev Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: John Fastabend Cc: John Garry Cc: KP Singh Cc: Kajol Jain Cc: Leo Yan Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Song Liu Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Will Deacon Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20220328232648.2127340-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/lib/cpumap.c | 20 ++++++++++++++++++++ tools/perf/lib/include/internal/cpumap.h | 1 + 2 files changed, 21 insertions(+) diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c index 4a12d4368ad2..ec73bcc9286b 100644 --- a/tools/perf/lib/cpumap.c +++ b/tools/perf/lib/cpumap.c @@ -329,3 +329,23 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, perf_cpu_map__put(orig); return merged; } + +/** Is 'b' a subset of 'a'. */ +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b) +{ + if (a == b || !b) + return true; + if (!a || b->nr > a->nr) + return false; + + for (int i = 0, j = 0; i < a->nr; i++) { + if (a->map[i].cpu > b->map[j].cpu) + return false; + if (a->map[i].cpu == b->map[j].cpu) { + j++; + if (j == b->nr) + return true; + } + } + return false; +} diff --git a/tools/perf/lib/include/internal/cpumap.h b/tools/perf/lib/include/internal/cpumap.h index 2d30168ee2fc..3e31d7bc385d 100644 --- a/tools/perf/lib/include/internal/cpumap.h +++ b/tools/perf/lib/include/internal/cpumap.h @@ -16,5 +16,6 @@ struct perf_cpu_map { #endif int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu); +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b); #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ -- Gitee From 2564db7dc91c9d7164ab2963919d32979a717c9a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 26 May 2023 14:53:36 -0700 Subject: [PATCH 153/173] perf cpumap: Add intersect function [Upstream commit 237d41d4a2d7d45e41f5450636d1cf689cba0e8a] The merge function gives the union of two cpu maps. Add an intersect function which is necessary, for example, when intersecting a PMUs supported CPUs with user requested. Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ali Saidi Cc: Athira Rajeev Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kang Minchul Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Mike Leach Cc: Ming Wang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Sean Christopherson Cc: Suzuki Poulouse Cc: Thomas Richter Cc: Will Deacon Cc: Xing Zhengjun Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230526215410.2435674-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/lib/cpumap.c | 35 ++++++++++++++++++++++++++++ tools/perf/lib/include/perf/cpumap.h | 2 ++ 2 files changed, 37 insertions(+) diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c index ec73bcc9286b..04ee9825b572 100644 --- a/tools/perf/lib/cpumap.c +++ b/tools/perf/lib/cpumap.c @@ -349,3 +349,38 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu } return false; } + +struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other) +{ + struct perf_cpu *tmp_cpus; + int tmp_len; + int i, j, k; + struct perf_cpu_map *merged = NULL; + + if (perf_cpu_map__is_subset(other, orig)) + return perf_cpu_map__get(orig); + if (perf_cpu_map__is_subset(orig, other)) + return perf_cpu_map__get(other); + + tmp_len = max(orig->nr, other->nr); + tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); + if (!tmp_cpus) + return NULL; + + i = j = k = 0; + while (i < orig->nr && j < other->nr) { + if (orig->map[i].cpu < other->map[j].cpu) + i++; + else if (orig->map[i].cpu > other->map[j].cpu) + j++; + else { + j++; + tmp_cpus[k++] = orig->map[i++]; + } + } + if (k) + merged = cpu_map__trim_new(k, &tmp_cpus->cpu); + free(tmp_cpus); + return merged; +} diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h index 101a0f2b7693..98e785dedb7e 100644 --- a/tools/perf/lib/include/perf/cpumap.h +++ b/tools/perf/lib/include/perf/cpumap.h @@ -19,6 +19,8 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); +LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); -- Gitee From 299f161a236d2305ccfc8193474067717e1e7d26 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 27 May 2023 00:21:43 -0700 Subject: [PATCH 154/173] perf pmu: Add CPU map for "cpu" PMUs [Upstream commit a0c41caebab2fa224454d50dd4e29ae008ead25f] A typical "cpu" PMU has no "cpus" or "cpumask" file meaning the CPU map is set to NULL, which also encodes an empty CPU map. Update pmu_cpumask so that if the "cpu" PMU fails to load a CPU map, use a default of all online PMUs. Remove const from cpu_map__online for the sake of reference counting. Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ali Saidi Cc: Athira Rajeev Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kang Minchul Cc: Leo Yan Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Mike Leach Cc: Ming Wang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Sean Christopherson Cc: Suzuki Poulouse Cc: Thomas Richter Cc: Will Deacon Cc: Xing Zhengjun Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230527072210.2900565-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/cpumap.c | 4 ++-- tools/perf/util/cpumap.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 575f9cc6b07a..3635563d5755 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -559,9 +559,9 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) return ptr - buf; } -const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ +struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ { - static const struct perf_cpu_map *online = NULL; + static struct perf_cpu_map *online; if (!online) online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 9401e360f980..adb6b66172f8 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -23,7 +23,7 @@ int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp); int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep); int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep); -const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ +struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) { -- Gitee From 69d2c7674dd8884ec19de47e7210fbcf9b11800f Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 19 Sep 2024 14:25:17 +0800 Subject: [PATCH 155/173] arm: pmu: Share user ABI format mechanism with SPE [Upstream commit f6da86969a3c284466ab6080764b2ed91689f262] This mechanism makes it much easier to define and read new attributes so move it to the arm_pmu.h header so that it can be shared. At the same time update the existing format attributes to use it. GENMASK has to be changed to GENMASK_ULL because the config fields are 64 bits even on arm32 where this will also be used now. Signed-off-by: James Clark Link: https://lore.kernel.org/r/20231211161331.1277825-7-james.clark@arm.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/kernel/perf_event.c | 15 ++++++++++++--- drivers/perf/arm_spe_pmu.c | 22 ---------------------- include/linux/perf/arm_pmu.h | 22 ++++++++++++++++++++++ 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index dc3c99b304d0..33598c861f90 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -324,12 +324,21 @@ static struct attribute_group armv8_pmuv3_events_attr_group = { .is_visible = armv8pmu_event_attr_is_visible, }; -PMU_FORMAT_ATTR(event, "config:0-15"); -PMU_FORMAT_ATTR(long, "config1:0"); +/* User ABI */ +#define ATTR_CFG_FLD_event_CFG config +#define ATTR_CFG_FLD_event_LO 0 +#define ATTR_CFG_FLD_event_HI 15 +#define ATTR_CFG_FLD_long_CFG config1 +#define ATTR_CFG_FLD_long_LO 0 +#define ATTR_CFG_FLD_long_HI 0 + +GEN_PMU_FORMAT_ATTR(event); +GEN_PMU_FORMAT_ATTR(long); + static inline bool armv8pmu_event_is_64bit(struct perf_event *event) { - return event->attr.config1 & 0x1; + return ATTR_CFG_GET_FLD(&event->attr, long); } static struct attribute *armv8_pmuv3_format_attrs[] = { diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index a462602db408..8984263c8857 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -182,28 +182,6 @@ static struct attribute_group arm_spe_pmu_cap_group = { #define ATTR_CFG_FLD_min_latency_LO 0 #define ATTR_CFG_FLD_min_latency_HI 11 -/* Why does everything I do descend into this? */ -#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ - (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi - -#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ - __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) - -#define GEN_PMU_FORMAT_ATTR(name) \ - PMU_FORMAT_ATTR(name, \ - _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \ - ATTR_CFG_FLD_##name##_LO, \ - ATTR_CFG_FLD_##name##_HI)) - -#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \ - ((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0)) - -#define ATTR_CFG_GET_FLD(attr, name) \ - _ATTR_CFG_GET_FLD(attr, \ - ATTR_CFG_FLD_##name##_CFG, \ - ATTR_CFG_FLD_##name##_LO, \ - ATTR_CFG_FLD_##name##_HI) - GEN_PMU_FORMAT_ATTR(ts_enable); GEN_PMU_FORMAT_ATTR(pa_enable); GEN_PMU_FORMAT_ATTR(pct_enable); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 98a43a577e8c..1fc38e6ac075 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -202,4 +202,26 @@ void armpmu_free_irq(int irq, int cpu); #define ARMV8_SPE_PDEV_NAME "arm,spe-v1" +/* Why does everything I do descend into this? */ +#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ + (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi + +#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \ + __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) + +#define GEN_PMU_FORMAT_ATTR(name) \ + PMU_FORMAT_ATTR(name, \ + _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \ + ATTR_CFG_FLD_##name##_LO, \ + ATTR_CFG_FLD_##name##_HI)) + +#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \ + ((((attr)->cfg) >> lo) & GENMASK_ULL(hi - lo, 0)) + +#define ATTR_CFG_GET_FLD(attr, name) \ + _ATTR_CFG_GET_FLD(attr, \ + ATTR_CFG_FLD_##name##_CFG, \ + ATTR_CFG_FLD_##name##_LO, \ + ATTR_CFG_FLD_##name##_HI) + #endif /* __ARM_PMU_H__ */ -- Gitee From 48859104826a937d82cf2a39afdfad3d9fa388e8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 6 Jun 2024 23:53:43 -0700 Subject: [PATCH 156/173] perf arm: Workaround ARM PMUs cpu maps having offline cpus [Upstream commit 5518063fcb2e022411c1112b1b9b069e68380bbb] When PMUs have a cpu map in the 'cpus' or 'cpumask' file, perf will try to open events on those CPUs. ARM doesn't remove offline CPUs meaning taking a CPU offline will cause perf commands to fail unless a CPU map is passed on the command line. More context in: https://lore.kernel.org/lkml/20240603092812.46616-1-yangyicong@huawei.com/ Reported-by: Yicong Yang Closes: https://lore.kernel.org/lkml/20240603092812.46616-2-yangyicong@huawei.com/ Signed-off-by: Ian Rogers Tested-by: Yicong Yang Tested-by: Leo Yan Cc: James Clark Cc: Suzuki K Poulose Cc: Will Deacon Cc: Mike Leach Cc: Leo Yan Cc: linux-arm-kernel@lists.infradead.org Cc: coresight@lists.linaro.org Cc: John Garry Signed-off-by: Namhyung Kim Signed-off-by: Qizhi Zhang Link: https://lore.kernel.org/r/20240607065343.695369-1-irogers@google.com Signed-off-by: huwentao --- tools/perf/arch/arm/util/pmu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index fd2b1df4a877..4121f2b45b2d 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -11,12 +11,15 @@ #include "arm-spe.h" #include "hisi-ptt.h" +#include "../../../util/cpumap.h" #include "../../../util/pmu.h" struct perf_event_attr -*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) +*perf_pmu__get_default_config(struct perf_pmu *pmu) { + struct perf_cpu_map *intersect; + #ifdef HAVE_AUXTRACE_SUPPORT if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) { /* add ETM default config here */ @@ -30,5 +33,10 @@ struct perf_event_attr } #endif + /* Workaround some ARM PMU's failing to correctly set CPU maps for online processors. */ + intersect = perf_cpu_map__intersect(cpu_map__online(), pmu->cpus); + perf_cpu_map__put(pmu->cpus); + pmu->cpus = intersect; + return NULL; } -- Gitee From 2ad1a3632cf2769e488dd0687f91bc32003e1a53 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:41 +0800 Subject: [PATCH 157/173] drivers/perf: hisi: Relax the event ID check in the framework Event ID is only using the attr::config bit [7, 0] but we check the event range using the whole 64bit field. It blocks the usage of the resident field of attr::config. Relax the check by only using the bit [7, 0]. Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 8a5263d2103e..59395df44878 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -248,7 +248,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return -EINVAL; hisi_pmu = to_hisi_pmu(event->pmu); - if (event->attr.config > hisi_pmu->check_event) + if ((event->attr.config & HISI_EVENTID_MASK) > hisi_pmu->check_event) return -EINVAL; if (hisi_pmu->on_cpu == -1) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 8fc983c83008..d3da533c8bf4 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -42,7 +42,8 @@ return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \ } -#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) +#define HISI_EVENTID_MASK 0xff +#define HISI_GET_EVENTID(ev) (ev->hw.config_base & HISI_EVENTID_MASK) #define HISI_PMU_EVTYPE_BITS 8 #define HISI_PMU_EVTYPE_SHIFT(idx) ((idx) % 4 * HISI_PMU_EVTYPE_BITS) -- Gitee From a13184374835d2658670f02145c8c064b5946504 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:42 +0800 Subject: [PATCH 158/173] drivers/perf: hisi: Export hisi_uncore_pmu_isr() Currently Uncore PMU framework assume one PMU device only have one interrupt and will help register the interrupt handler. It cannot support a PMU with multiple interrupt resources. However the interrupt handling for the Uncore PMU maybe the same and could share the same interrupt handler. Export hisi_uncore_pmu_isr() to allow drivers register the irq handler by their own routine. Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 3 ++- drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 59395df44878..5ea53686a528 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -163,7 +163,7 @@ static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) clear_bit(idx, hisi_pmu->pmu_events.used_mask); } -static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) +irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) { struct hisi_pmu *hisi_pmu = data; struct perf_event *event; @@ -192,6 +192,7 @@ static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) return IRQ_HANDLED; } +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_isr, HISI_PMU); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index d3da533c8bf4..c6d9346eb30d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -166,6 +166,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node); ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, struct device_attribute *attr, char *page); +irqreturn_t hisi_uncore_pmu_isr(int irq, void *data); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev); -- Gitee From 4aa96ef1c9d82d38ba3117289f365215f0aac8be Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:43 +0800 Subject: [PATCH 159/173] drivers/perf: hisi: Simplify the probe process of each L3C PMU version Version 1 and 2 of L3C PMU also use different HID. Make use of struct acpi_device_id::driver_data for version specific information rather than judge the version register. This will help to simplify the probe process and also a bit easier for extension. Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 43 ++++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 5fc9045c31f1..cf8331b45e33 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -345,13 +345,6 @@ static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR); } -static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { - { "HISI0213", }, - { "HISI0214", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); - static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { @@ -372,6 +365,10 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return -EINVAL; } + l3c_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!l3c_pmu->dev_info) + return -ENODEV; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); l3c_pmu->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(l3c_pmu->base)) { @@ -459,6 +456,18 @@ static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { NULL }; +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v1 = { + .attr_groups = hisi_l3c_pmu_v1_attr_groups, + .counter_bits = 48, + .check_event = L3C_V1_NR_EVENTS, +}; + +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = { + .attr_groups = hisi_l3c_pmu_v2_attr_groups, + .counter_bits = 64, + .check_event = L3C_V2_NR_EVENTS, +}; + static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -489,16 +498,9 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - if (l3c_pmu->identifier >= HISI_PMU_V2) { - l3c_pmu->counter_bits = 64; - l3c_pmu->check_event = L3C_V2_NR_EVENTS; - l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups; - } else { - l3c_pmu->counter_bits = 48; - l3c_pmu->check_event = L3C_V1_NR_EVENTS; - l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups; - } - + l3c_pmu->pmu_events.attr_groups = l3c_pmu->dev_info->attr_groups; + l3c_pmu->counter_bits = l3c_pmu->dev_info->counter_bits; + l3c_pmu->check_event = l3c_pmu->dev_info->check_event; l3c_pmu->num_counters = L3C_NR_COUNTERS; l3c_pmu->ops = &hisi_uncore_l3c_ops; l3c_pmu->dev = &pdev->dev; @@ -564,6 +566,13 @@ static int hisi_l3c_pmu_remove(struct platform_device *pdev) return 0; } +static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { + { "HISI0213", (kernel_ulong_t)&hisi_l3c_pmu_v1 }, + { "HISI0214", (kernel_ulong_t)&hisi_l3c_pmu_v2 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); + static struct platform_driver hisi_l3c_pmu_driver = { .driver = { .name = "hisi_l3c_pmu", -- Gitee From beedef1d27b085602113480a3f52b202a1d9c463 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:44 +0800 Subject: [PATCH 160/173] drivers/perf: hisi: Extract the event filter check of L3C PMU L3C PMU has 4 filter options which are sharing perf_event_attr::config1. Driver will check config1 to see whether a certain event has a filter setting. It'll be incorrect if we make use of other bits in config1 for non-filter options. So check whether each filter options are set directly in a separate function instead. Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index cf8331b45e33..97bc7546a461 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -204,9 +204,15 @@ static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) } } +static bool hisi_l3c_pmu_have_filter(struct perf_event *event) +{ + return hisi_get_tt_req(event) || hisi_get_tt_core(event) || + hisi_get_datasrc_cfg(event) || hisi_get_datasrc_skt(event); +} + static void hisi_l3c_pmu_enable_filter(struct perf_event *event) { - if (event->attr.config1 != 0x0) { + if (hisi_l3c_pmu_have_filter(event)) { hisi_l3c_pmu_config_req_tracetag(event); hisi_l3c_pmu_config_core_tracetag(event); hisi_l3c_pmu_config_ds(event); @@ -215,7 +221,7 @@ static void hisi_l3c_pmu_enable_filter(struct perf_event *event) static void hisi_l3c_pmu_disable_filter(struct perf_event *event) { - if (event->attr.config1 != 0x0) { + if (hisi_l3c_pmu_have_filter(event)) { hisi_l3c_pmu_clear_ds(event); hisi_l3c_pmu_clear_core_tracetag(event); hisi_l3c_pmu_clear_req_tracetag(event); -- Gitee From 622872a8176a0f16569ff8efc01b51d6e8e02baf Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:45 +0800 Subject: [PATCH 161/173] drivers/perf: hisi: Extend the field of tt_core Currently the tt_core's using config1's bit [7, 0] and can not be extended. For some platforms there's more the 8 CPUs sharing the L3 cache. So make tt_core use config2's bit [15, 0] and the remaining bits in config2 is reserved for extension. Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 97bc7546a461..649099bdce85 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -55,10 +55,10 @@ #define L3C_V1_NR_EVENTS 0x59 #define L3C_V2_NR_EVENTS 0xFF -HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) { @@ -399,7 +399,7 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = { static struct attribute *hisi_l3c_pmu_v2_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), - HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"), + HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"), HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"), -- Gitee From 74ae1bb3c8e1229ed2b986d4a5b53549d7b13109 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:46 +0800 Subject: [PATCH 162/173] drivers/perf: hisi: Refactor the event configuration of L3C PMU The event register is configured using hisi_pmu::base directly since only one address space is supported for L3C PMU. It'll be hard to extend if events configuration locates in different address space. In order to make preparation for such hardware, extract the event register configuration to separate function using hw_perf_event::event_base as each event's base address. Implement a private hisi_uncore_ops::get_event_idx() callback for initialize the event_base besides get the hardware index. No functional changes intended. Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 128 ++++++++++++------- 1 file changed, 83 insertions(+), 45 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 649099bdce85..38ee8a1afe2d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -60,51 +60,86 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); -static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) +static int hisi_l3c_pmu_get_event_idx(struct perf_event *event) { struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; + u32 num_counters = l3c_pmu->num_counters; + int idx; + + idx = find_first_zero_bit(used_mask, num_counters); + if (idx == num_counters) + return -EAGAIN; + + set_bit(idx, used_mask); + event->hw.event_base = (unsigned long)l3c_pmu->base; + return idx; +} + +static u32 hisi_l3c_pmu_event_readl(struct hw_perf_event *hwc, u32 reg) +{ + return readl((void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_event_writel(struct hw_perf_event *hwc, u32 reg, u32 val) +{ + writel(val, (void __iomem *)hwc->event_base + reg); +} + +static u64 hisi_l3c_pmu_event_readq(struct hw_perf_event *hwc, u32 reg) +{ + return readq((void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_event_writeq(struct hw_perf_event *hwc, u32 reg, u64 val) +{ + writeq(val, (void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; u32 tt_req = hisi_get_tt_req(event); if (tt_req) { u32 val; /* Set request-type for tracetag */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val |= tt_req << L3C_TRACETAG_REQ_SHIFT; val |= L3C_TRACETAG_REQ_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Enable request-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val |= L3C_TRACETAG_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); } } static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 tt_req = hisi_get_tt_req(event); if (tt_req) { u32 val; /* Clear request-type */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT); val &= ~L3C_TRACETAG_REQ_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Disable request-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val &= ~L3C_TRACETAG_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); } } static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; u32 reg, reg_idx, shift, val; int idx = hwc->idx; @@ -120,15 +155,15 @@ static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) reg_idx = idx % 4; shift = 8 * reg_idx; - val = readl(l3c_pmu->base + reg); + val = hisi_l3c_pmu_event_readl(hwc, reg); val &= ~(L3C_DATSRC_MASK << shift); val |= ds_cfg << shift; - writel(val, l3c_pmu->base + reg); + hisi_l3c_pmu_event_writel(hwc, reg, val); } static void hisi_l3c_pmu_config_ds(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 ds_cfg = hisi_get_datasrc_cfg(event); u32 ds_skt = hisi_get_datasrc_skt(event); @@ -138,15 +173,15 @@ static void hisi_l3c_pmu_config_ds(struct perf_event *event) if (ds_skt) { u32 val; - val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL); val |= L3C_DATSRC_SKT_EN; - writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val); } } static void hisi_l3c_pmu_clear_ds(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 ds_cfg = hisi_get_datasrc_cfg(event); u32 ds_skt = hisi_get_datasrc_skt(event); @@ -156,51 +191,51 @@ static void hisi_l3c_pmu_clear_ds(struct perf_event *event) if (ds_skt) { u32 val; - val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL); val &= ~L3C_DATSRC_SKT_EN; - writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val); } } static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 core = hisi_get_tt_core(event); if (core) { u32 val; /* Config and enable core information */ - writel(core, l3c_pmu->base + L3C_CORE_CTRL); - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, core); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val |= L3C_CORE_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); /* Enable core-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val |= L3C_TRACETAG_CORE_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); } } static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 core = hisi_get_tt_core(event); if (core) { u32 val; /* Clear core information */ - writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL); - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, L3C_COER_NONE); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val &= ~L3C_CORE_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); /* Disable core-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val &= ~L3C_TRACETAG_CORE_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); } } @@ -239,18 +274,19 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc) { - return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); + return hisi_l3c_pmu_event_readq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx)); } static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc, u64 val) { - writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); + hisi_l3c_pmu_event_writeq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx), val); } static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, u32 type) { + struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; u32 reg, reg_idx, shift, val; /* @@ -265,10 +301,10 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, shift = 8 * reg_idx; /* Write event code to L3C_EVENT_TYPEx Register */ - val = readl(l3c_pmu->base + reg); + val = hisi_l3c_pmu_event_readl(hwc, reg); val &= ~(L3C_EVTYPE_NONE << shift); val |= (type << shift); - writel(val, l3c_pmu->base + reg); + hisi_l3c_pmu_event_writel(hwc, reg, val); } static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu) @@ -303,9 +339,9 @@ static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, u32 val; /* Enable counter index in L3C_EVENT_CTRL register */ - val = readl(l3c_pmu->base + L3C_EVENT_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); val |= (1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_EVENT_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, @@ -314,9 +350,9 @@ static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, u32 val; /* Clear counter index in L3C_EVENT_CTRL register */ - val = readl(l3c_pmu->base + L3C_EVENT_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); val &= ~(1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_EVENT_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, @@ -324,10 +360,10 @@ static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, { u32 val; - val = readl(l3c_pmu->base + L3C_INT_MASK); + val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 0 to enable interrupt */ val &= ~(1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_INT_MASK); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, @@ -335,10 +371,10 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, { u32 val; - val = readl(l3c_pmu->base + L3C_INT_MASK); + val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 1 to mask interrupt */ val |= (1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_INT_MASK); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) @@ -348,7 +384,9 @@ static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) { - writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR); + struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; + + hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << idx); } static int hisi_l3c_pmu_init_data(struct platform_device *pdev, @@ -476,7 +514,7 @@ static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = { static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, - .get_event_idx = hisi_uncore_pmu_get_event_idx, + .get_event_idx = hisi_l3c_pmu_get_event_idx, .start_counters = hisi_l3c_pmu_start_counters, .stop_counters = hisi_l3c_pmu_stop_counters, .enable_counter = hisi_l3c_pmu_enable_counter, -- Gitee From 2aa11b557a042be1497c94ebcf8cabc964aaef47 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:47 +0800 Subject: [PATCH 163/173] drivers/perf: hisi: Add support for L3C PMU v3 This patch adds support for L3C PMU v3. The v3 L3C PMU supports an extended events space which can be controlled in a second address space with a separate overflow interrupt. The offset of the control/event registers keep the same. The extended events with original ones together cover the monitoring of all the L3C transactions. The extended events is specified with `ext[=1]` option for the driver to distinguish like: perf stat -e hisi_sccl0_l3c0_0/event=,ext/ Currently only event option using config bit [7, 0]. There's still lots of field unused. Make ext using config 16 and reserve bit [15, 8] for event option for future extension. The hw_perf_event::event_base is initialized to the base MMIO address of the event and will be used for later control, overflow handling and counts readout. We still make use of the Uncore PMU framework for handling the events and interrupt migration on CPU hotplug. The framework's cpuhp callback will handle the event migration and interrupt migration of orginial event, if PMU supports extended events then the interrupt of extended events is migrated to the same CPU choosed by the framework. A new HID of HISI0215 is used for this version of L3C PMU. Signed-off-by: Yicong Yang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 350 +++++++++++++++++-- 1 file changed, 328 insertions(+), 22 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 38ee8a1afe2d..20855322aeda 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -55,24 +55,73 @@ #define L3C_V1_NR_EVENTS 0x59 #define L3C_V2_NR_EVENTS 0xFF +HISI_PMU_EVENT_ATTR_EXTRACTOR(ext, config, 16, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); +struct hisi_l3c_pmu { + struct hisi_pmu l3c_pmu; + unsigned long feature; +#define L3C_PMU_FEAT_EXT 0x1 + + /* MMIO and IRQ resources for extension events */ + void __iomem *ext_base; + int ext_irq; +}; + +#define to_hisi_l3c_pmu(_l3c_pmu) \ + container_of(_l3c_pmu, struct hisi_l3c_pmu, l3c_pmu) + +/* + * The hardware counter idx used in counter enable/disable, + * interrupt enable/disable and status check, etc. + */ +#define L3C_HW_IDX(_idx) ((_idx) % L3C_NR_COUNTERS) + static int hisi_l3c_pmu_get_event_idx(struct perf_event *event) { struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; u32 num_counters = l3c_pmu->num_counters; + struct hisi_l3c_pmu *hisi_l3c_pmu; int idx; - idx = find_first_zero_bit(used_mask, num_counters); + hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + + /* + * For an L3C PMU that supports extension events, we can monitor + * maximum 2 * num_counters events. Thus use bit [0, num_counters - 1] + * for normal events and bit [num_counters, 2 * num_counters - 1] for + * extension events. The idx allocation will keep unchanged for normal + * events and we can also use the idx to distinguish whether it's an + * extension event or not. + * + * Since normal events and extension events locates on the different + * address space, save the base address to the event->hw.event_base. + */ + if (hisi_get_ext(event)) { + if (!(hisi_l3c_pmu->feature & L3C_PMU_FEAT_EXT)) + return -EOPNOTSUPP; + + event->hw.event_base = (unsigned long)hisi_l3c_pmu->ext_base; + idx = find_next_zero_bit(used_mask, num_counters, L3C_NR_COUNTERS); + } else { + event->hw.event_base = (unsigned long)l3c_pmu->base; + idx = find_next_zero_bit(used_mask, L3C_NR_COUNTERS, 0); + if (idx == L3C_NR_COUNTERS) + idx = num_counters; + } + if (idx == num_counters) return -EAGAIN; set_bit(idx, used_mask); - event->hw.event_base = (unsigned long)l3c_pmu->base; + + WARN_ON(hisi_get_ext(event) && idx < L3C_NR_COUNTERS); + WARN_ON(!hisi_get_ext(event) && idx >= L3C_NR_COUNTERS); + return idx; } @@ -142,7 +191,7 @@ static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) { struct hw_perf_event *hwc = &event->hw; u32 reg, reg_idx, shift, val; - int idx = hwc->idx; + int idx = L3C_HW_IDX(hwc->idx); /* * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1). @@ -268,7 +317,7 @@ static void hisi_l3c_pmu_disable_filter(struct perf_event *event) */ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) { - return (L3C_CNTR0_LOWER + (cntr_idx * 8)); + return (L3C_CNTR0_LOWER + (L3C_HW_IDX(cntr_idx) * 8)); } static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, @@ -289,6 +338,8 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; u32 reg, reg_idx, shift, val; + idx = L3C_HW_IDX(idx); + /* * Select the appropriate event select register(L3C_EVENT_TYPE0/1). * There are 2 event select registers for the 8 hardware counters. @@ -309,28 +360,48 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; u32 val; /* * Set perf_enable bit in L3C_PERF_CTRL register to start counting * for all enabled counters. */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); - val |= L3C_PERF_CTRL_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + if (find_first_bit(used_mask, l3c_pmu->num_counters) < L3C_NR_COUNTERS) { + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val |= L3C_PERF_CTRL_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } + + if (find_next_bit(used_mask, l3c_pmu->num_counters, L3C_NR_COUNTERS) != l3c_pmu->num_counters) { + val = readl(hisi_l3c_pmu->ext_base + L3C_PERF_CTRL); + val |= L3C_PERF_CTRL_EN; + writel(val, hisi_l3c_pmu->ext_base + L3C_PERF_CTRL); + } } static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; u32 val; /* * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting * for all enabled counters. */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); - val &= ~(L3C_PERF_CTRL_EN); - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + if (find_first_bit(used_mask, l3c_pmu->num_counters) < L3C_NR_COUNTERS) { + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val &= ~(L3C_PERF_CTRL_EN); + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } + + if (find_next_bit(used_mask, l3c_pmu->num_counters, L3C_NR_COUNTERS) != l3c_pmu->num_counters) { + val = readl(hisi_l3c_pmu->ext_base + L3C_PERF_CTRL); + val &= ~(L3C_PERF_CTRL_EN); + writel(val, hisi_l3c_pmu->ext_base + L3C_PERF_CTRL); + } } static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, @@ -340,7 +411,7 @@ static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, /* Enable counter index in L3C_EVENT_CTRL register */ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); - val |= (1 << hwc->idx); + val |= (1 << L3C_HW_IDX(hwc->idx)); hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } @@ -351,7 +422,7 @@ static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, /* Clear counter index in L3C_EVENT_CTRL register */ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); - val &= ~(1 << hwc->idx); + val &= ~(1 << L3C_HW_IDX(hwc->idx)); hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } @@ -362,7 +433,7 @@ static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 0 to enable interrupt */ - val &= ~(1 << hwc->idx); + val &= ~(1 << L3C_HW_IDX(hwc->idx)); hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } @@ -373,20 +444,27 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 1 to mask interrupt */ - val |= (1 << hwc->idx); + val |= (1 << L3C_HW_IDX(hwc->idx)); hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) { - return readl(l3c_pmu->base + L3C_INT_STATUS); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + u32 status, status_ext = 0; + + status = readl(l3c_pmu->base + L3C_INT_STATUS); + if (hisi_l3c_pmu->feature & L3C_PMU_FEAT_EXT) + status_ext = readl(hisi_l3c_pmu->ext_base + L3C_INT_STATUS); + + return status | (status_ext << L3C_NR_COUNTERS); } static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) { struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; - hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << idx); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << L3C_HW_IDX(idx)); } static int hisi_l3c_pmu_init_data(struct platform_device *pdev, @@ -425,6 +503,41 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return 0; } +static int hisi_l3c_pmu_init_ext(struct hisi_pmu *l3c_pmu, struct platform_device *pdev) +{ + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + char *irqname; + int ret, irq; + + hisi_l3c_pmu->ext_base = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(hisi_l3c_pmu->ext_base)) + return PTR_ERR(hisi_l3c_pmu->ext_base); + + irq = platform_get_irq(pdev, 1); + /* + * We may don't need to handle -EPROBDEFER since we should have already + * handle it when probling irq[0]. + */ + if (irq < 0) + return irq; + + irqname = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s ext", dev_name(&pdev->dev)); + if (!irqname) + return -ENOMEM; + + ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr, + IRQF_NOBALANCING | IRQF_NO_THREAD, + irqname, l3c_pmu); + if (ret < 0) { + dev_err(&pdev->dev, + "Fail to request EXT IRQ: %d ret: %d.\n", irq, ret); + return ret; + } + + hisi_l3c_pmu->ext_irq = irq; + return 0; +} + static struct attribute *hisi_l3c_pmu_v1_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), NULL, @@ -449,6 +562,19 @@ static const struct attribute_group hisi_l3c_pmu_v2_format_group = { .attrs = hisi_l3c_pmu_v2_format_attr, }; +static struct attribute *hisi_l3c_pmu_v3_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(ext, "config:16"), + HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), + HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), + NULL +}; + +static const struct attribute_group hisi_l3c_pmu_v3_format_group = { + .name = "format", + .attrs = hisi_l3c_pmu_v3_format_attr, +}; + static struct attribute *hisi_l3c_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00), HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01), @@ -484,6 +610,105 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = { .attrs = hisi_l3c_pmu_v2_events_attr, }; +struct hisi_l3c_pmu_v3_event { + unsigned long event_id; + bool ext; +}; + +static ssize_t hisi_l3c_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct hisi_l3c_pmu_v3_event *event; + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + event = eattr->var; + + if (!event->ext) + return sysfs_emit(page, "event=0x%lx\n", event->event_id); + else + return sysfs_emit(page, "event=0x%lx,ext=1\n", event->event_id); +} + +#define HISI_L3C_PMU_EVENT_ATTR(_name, _event, _ext) \ +static struct hisi_l3c_pmu_v3_event hisi_l3c_##_name = { _event, _ext }; \ +static struct dev_ext_attribute hisi_l3c_##_name##_attr = \ + { __ATTR(_name, 0444, hisi_l3c_pmu_event_show, NULL), (void *) &hisi_l3c_##_name } + +HISI_L3C_PMU_EVENT_ATTR(rd_cpipe, 0x00, true); +HISI_L3C_PMU_EVENT_ATTR(rd_hit_cpipe, 0x01, true); +HISI_L3C_PMU_EVENT_ATTR(wr_cpipe, 0x02, true); +HISI_L3C_PMU_EVENT_ATTR(wr_hit_cpipe, 0x03, true); +HISI_L3C_PMU_EVENT_ATTR(io_rd_cpipe, 0x04, true); +HISI_L3C_PMU_EVENT_ATTR(io_rd_hit_cpipe, 0x05, true); +HISI_L3C_PMU_EVENT_ATTR(io_wr_cpipe, 0x06, true); +HISI_L3C_PMU_EVENT_ATTR(io_wr_hit_cpipe, 0x07, true); +HISI_L3C_PMU_EVENT_ATTR(victim_num, 0x0c, true); +HISI_L3C_PMU_EVENT_ATTR(rd_spipe, 0x18, false); +HISI_L3C_PMU_EVENT_ATTR(rd_hit_spipe, 0x19, false); +HISI_L3C_PMU_EVENT_ATTR(wr_spipe, 0x1a, false); +HISI_L3C_PMU_EVENT_ATTR(wr_hit_spipe, 0x1b, false); +HISI_L3C_PMU_EVENT_ATTR(io_rd_spipe, 0x1c, false); +HISI_L3C_PMU_EVENT_ATTR(io_rd_hit_spipe, 0x1d, false); +HISI_L3C_PMU_EVENT_ATTR(io_wr_spipe, 0x1e, false); +HISI_L3C_PMU_EVENT_ATTR(io_wr_hit_spipe, 0x1f, false); +HISI_L3C_PMU_EVENT_ATTR(cycles, 0x7f, false); +HISI_L3C_PMU_EVENT_ATTR(l3t_comp_sum, 0x80, false); +HISI_L3C_PMU_EVENT_ATTR(l3t_rdnotram, 0x83, true); +HISI_L3C_PMU_EVENT_ATTR(l3c_ref, 0xbc, false); +HISI_L3C_PMU_EVENT_ATTR(l3c2ring, 0xbd, true); + +static struct attribute *hisi_l3c_pmu_v3_events_attr[] = { + &hisi_l3c_rd_cpipe_attr.attr.attr, + &hisi_l3c_rd_hit_cpipe_attr.attr.attr, + &hisi_l3c_wr_cpipe_attr.attr.attr, + &hisi_l3c_wr_hit_cpipe_attr.attr.attr, + &hisi_l3c_io_rd_cpipe_attr.attr.attr, + &hisi_l3c_io_rd_hit_cpipe_attr.attr.attr, + &hisi_l3c_io_wr_cpipe_attr.attr.attr, + &hisi_l3c_io_wr_hit_cpipe_attr.attr.attr, + &hisi_l3c_victim_num_attr.attr.attr, + &hisi_l3c_rd_spipe_attr.attr.attr, + &hisi_l3c_rd_hit_spipe_attr.attr.attr, + &hisi_l3c_wr_spipe_attr.attr.attr, + &hisi_l3c_wr_hit_spipe_attr.attr.attr, + &hisi_l3c_io_rd_spipe_attr.attr.attr, + &hisi_l3c_io_rd_hit_spipe_attr.attr.attr, + &hisi_l3c_io_wr_spipe_attr.attr.attr, + &hisi_l3c_io_wr_hit_spipe_attr.attr.attr, + &hisi_l3c_cycles_attr.attr.attr, + &hisi_l3c_l3t_comp_sum_attr.attr.attr, + &hisi_l3c_l3t_rdnotram_attr.attr.attr, + &hisi_l3c_l3c_ref_attr.attr.attr, + &hisi_l3c_l3c2ring_attr.attr.attr, + NULL +}; + +static umode_t hisi_l3c_pmu_v3_events_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + struct hisi_pmu *l3c_pmu = to_hisi_pmu(pmu); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + struct hisi_l3c_pmu_v3_event *event; + struct dev_ext_attribute *ext_attr; + + ext_attr = container_of(attr, struct dev_ext_attribute, attr.attr); + event = ext_attr->var; + + if (!event->ext || (hisi_l3c_pmu->feature & L3C_PMU_FEAT_EXT)) + return attr->mode; + + return 0; +} + +static const struct attribute_group hisi_l3c_pmu_v3_events_group = { + .name = "events", + .is_visible = hisi_l3c_pmu_v3_events_visible, + .attrs = hisi_l3c_pmu_v3_events_attr, +}; + static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { &hisi_l3c_pmu_v1_format_group, &hisi_l3c_pmu_v1_events_group, @@ -500,6 +725,14 @@ static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { NULL }; +static const struct attribute_group *hisi_l3c_pmu_v3_attr_groups[] = { + &hisi_l3c_pmu_v3_format_group, + &hisi_l3c_pmu_v3_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + static const struct hisi_pmu_dev_info hisi_l3c_pmu_v1 = { .attr_groups = hisi_l3c_pmu_v1_attr_groups, .counter_bits = 48, @@ -512,6 +745,13 @@ static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = { .check_event = L3C_V2_NR_EVENTS, }; +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v3 = { + .attr_groups = hisi_l3c_pmu_v3_attr_groups, + .counter_bits = 64, + .check_event = L3C_V2_NR_EVENTS, + .private = (void *) L3C_PMU_FEAT_EXT, +}; + static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, .get_event_idx = hisi_l3c_pmu_get_event_idx, @@ -532,6 +772,7 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); int ret; ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu); @@ -550,27 +791,50 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, l3c_pmu->dev = &pdev->dev; l3c_pmu->on_cpu = -1; + if ((unsigned long)l3c_pmu->dev_info->private & L3C_PMU_FEAT_EXT) { + ret = hisi_l3c_pmu_init_ext(l3c_pmu, pdev); + if (ret) { + dev_warn(&pdev->dev, "ext event is unavailable, ret = %d\n", ret); + } else { + /* + * The extension events have their own counters with the + * same number of the normal events counters. So we can + * have at maximum num_counters * 2 events monitored. + */ + l3c_pmu->num_counters <<= 1; + + hisi_l3c_pmu->feature |= L3C_PMU_FEAT_EXT; + } + } + return 0; } static int hisi_l3c_pmu_probe(struct platform_device *pdev) { + struct hisi_l3c_pmu *hisi_l3c_pmu; struct hisi_pmu *l3c_pmu; char *name; int ret; - l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL); - if (!l3c_pmu) + hisi_l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*hisi_l3c_pmu), GFP_KERNEL); + if (!hisi_l3c_pmu) return -ENOMEM; + l3c_pmu = &hisi_l3c_pmu->l3c_pmu; platform_set_drvdata(pdev, l3c_pmu); ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu); if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", - l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); + if (l3c_pmu->topo.sub_id >= 0) + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d_%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id, + l3c_pmu->topo.sub_id); + else + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); if (!name) return -ENOMEM; @@ -613,6 +877,7 @@ static int hisi_l3c_pmu_remove(struct platform_device *pdev) static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { { "HISI0213", (kernel_ulong_t)&hisi_l3c_pmu_v1 }, { "HISI0214", (kernel_ulong_t)&hisi_l3c_pmu_v2 }, + { "HISI0215", (kernel_ulong_t)&hisi_l3c_pmu_v3 }, {} }; MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); @@ -627,14 +892,55 @@ static struct platform_driver hisi_l3c_pmu_driver = { .remove = hisi_l3c_pmu_remove, }; +static int hisi_l3c_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node); + struct hisi_l3c_pmu *hisi_l3c_pmu; + int ret; + + hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + + /* + * Invoking the framework's online function for doing the core logic + * of CPU, interrupt and perf context migrating. Then return directly + * if we don't support L3C_PMU_FEAT_EXT. Otherwise migrate the ext_irq + * using the migrated CPU. + * + * Same logic for CPU offline. + */ + ret = hisi_uncore_pmu_online_cpu(cpu, node); + if (!(hisi_l3c_pmu->feature & L3C_PMU_FEAT_EXT) || + l3c_pmu->on_cpu >= nr_cpu_ids) + return ret; + + WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq, cpumask_of(l3c_pmu->on_cpu))); + return ret; +} + +static int hisi_l3c_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node); + struct hisi_l3c_pmu *hisi_l3c_pmu; + int ret; + + hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + ret = hisi_uncore_pmu_offline_cpu(cpu, node); + if (!(hisi_l3c_pmu->feature & L3C_PMU_FEAT_EXT) || + l3c_pmu->on_cpu >= nr_cpu_ids) + return ret; + + WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq, cpumask_of(l3c_pmu->on_cpu))); + return ret; +} + static int __init hisi_l3c_pmu_module_init(void) { int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, "AP_PERF_ARM_HISI_L3T_ONLINE", - hisi_uncore_pmu_online_cpu, - hisi_uncore_pmu_offline_cpu); + hisi_l3c_pmu_online_cpu, + hisi_l3c_pmu_offline_cpu); if (ret) { pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret); return ret; -- Gitee From 2732062c5bd489026a34c679669dc517997ed6e1 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 16 Jun 2025 18:28:31 +0800 Subject: [PATCH 164/173] drivers/perf: hisi: Clarifying event names and fix event ID for pa_pmu The rx_req and tx_req events were only counting the number of flit package requests for link0. Hisilicon PA supports 4 links (0-3), and the original event names could be misinterpreted as counting the total flit packages across all links. Additionally, the tx_req event ID was incorrect. Fixes this by: 1) Adding the "link0" suffix and "pa" prefix to the event names to clearly indicate that they are specific to link0 and belong to the PA PMU. 2) Updating the tx_req event ID to 0x60 to reflect the correct identifier. These changes ensure accurate event naming and counting for link0 and flit packages. Fixes: a0ab25cd82ee ("drivers/perf: hisi: Add support for HiSilicon PA PMU driver") Signed-off-by: Junhao He Signed-off-by: Yushan Wang Signed-off-by: Qizhi Zhang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index b611fbae55cb..53f994c88927 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -316,8 +316,8 @@ static const struct attribute_group hisi_pa_pmu_v2_format_group = { }; static struct attribute *hisi_pa_pmu_v2_events_attr[] = { - HISI_PMU_EVENT_ATTR(rx_req, 0x40), - HISI_PMU_EVENT_ATTR(tx_req, 0x5c), + HISI_PMU_EVENT_ATTR(pa_rx_req_link0, 0x40), + HISI_PMU_EVENT_ATTR(pa_tx_req_link0, 0x60), HISI_PMU_EVENT_ATTR(cycle, 0x78), NULL }; -- Gitee From 0cf4dbc096f271859e563ba8459709d57e62b00b Mon Sep 17 00:00:00 2001 From: Yushan Wang Date: Mon, 16 Jun 2025 18:47:47 +0800 Subject: [PATCH 165/173] perf: Remove unstable events for uncore L3C PMU Currently when taking event 0x80, 0x83, the value of PMU counters will be unstable if the value overflows and cause IRQs. Remove these events in sysfs then. Fixes: 89711c8962cc ("drivers/perf: hisi: Add support for L3C PMU v3") Signed-off-by: Yushan Wang Signed-off-by: Qizhi Zhang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 20855322aeda..9df901aea9eb 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -653,8 +653,6 @@ HISI_L3C_PMU_EVENT_ATTR(io_rd_hit_spipe, 0x1d, false); HISI_L3C_PMU_EVENT_ATTR(io_wr_spipe, 0x1e, false); HISI_L3C_PMU_EVENT_ATTR(io_wr_hit_spipe, 0x1f, false); HISI_L3C_PMU_EVENT_ATTR(cycles, 0x7f, false); -HISI_L3C_PMU_EVENT_ATTR(l3t_comp_sum, 0x80, false); -HISI_L3C_PMU_EVENT_ATTR(l3t_rdnotram, 0x83, true); HISI_L3C_PMU_EVENT_ATTR(l3c_ref, 0xbc, false); HISI_L3C_PMU_EVENT_ATTR(l3c2ring, 0xbd, true); @@ -677,8 +675,6 @@ static struct attribute *hisi_l3c_pmu_v3_events_attr[] = { &hisi_l3c_io_wr_spipe_attr.attr.attr, &hisi_l3c_io_wr_hit_spipe_attr.attr.attr, &hisi_l3c_cycles_attr.attr.attr, - &hisi_l3c_l3t_comp_sum_attr.attr.attr, - &hisi_l3c_l3t_rdnotram_attr.attr.attr, &hisi_l3c_l3c_ref_attr.attr.attr, &hisi_l3c_l3c2ring_attr.attr.attr, NULL -- Gitee From 7dceb64e65f583537dd7ec504094ba01e6b9e9f6 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 16 Jun 2025 18:47:51 +0800 Subject: [PATCH 166/173] drivers/perf: hisi: Add cacheable option for L3C PMU L3C PMU v3 implement additional control for tracetag which may influence the filter of certain events. Add below options: - cacheable: whether to filter the cacheable or noncacheable operation Fixes: 89711c8962cc ("drivers/perf: hisi: Add support for L3C PMU v3") Signed-off-by: Yicong Yang Signed-off-by: Yushan Wang Signed-off-by: Qizhi Zhang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 9df901aea9eb..1d76d47f2de1 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -46,6 +46,7 @@ #define L3C_TRACETAG_MARK_EN BIT(0) #define L3C_TRACETAG_REQ_EN (L3C_TRACETAG_MARK_EN | BIT(2)) #define L3C_TRACETAG_CORE_EN (L3C_TRACETAG_MARK_EN | BIT(3)) +#define L3C_TRACETAG_CACHEABLE_EN BIT(12) #define L3C_CORE_EN BIT(20) #define L3C_COER_NONE 0x0 #define L3C_DATSRC_MASK 0xFF @@ -59,6 +60,7 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(ext, config, 16, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_cacheable, config1, 17, 17); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); struct hisi_l3c_pmu { @@ -157,6 +159,10 @@ static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val |= tt_req << L3C_TRACETAG_REQ_SHIFT; val |= L3C_TRACETAG_REQ_EN; + + if (hisi_get_tt_cacheable(event)) + val |= L3C_TRACETAG_CACHEABLE_EN; + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Enable request-tracetag statistics */ @@ -178,6 +184,10 @@ static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event) val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT); val &= ~L3C_TRACETAG_REQ_EN; + + if (hisi_get_tt_cacheable(event)) + val &= ~L3C_TRACETAG_CACHEABLE_EN; + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Disable request-tracetag statistics */ @@ -566,6 +576,7 @@ static struct attribute *hisi_l3c_pmu_v3_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), HISI_PMU_FORMAT_ATTR(ext, "config:16"), HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), + HISI_PMU_FORMAT_ATTR(tt_cacheable, "config1:17"), HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), NULL }; -- Gitee From f08a7f14c220c0034dffdc02d8fd4a1adbb323e3 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 16 Jun 2025 21:25:59 +0800 Subject: [PATCH 167/173] drivers/perf: hisi: Fixes the incorrect bitmask limit for the CPA event sysfs interface However, when users input events in the range of 0x1ff to 0xffff, the driver does not return an error. Therefore, the bitmask limit should be adjusted to bitmask(0-7) to ensure proper functionality. before the patch: [root@localhost ~]# perf stat -e hisi_sicl0_cpa0/event=0x1FF/ sleep 1 Performance counter stats for 'system wide': hisi_sicl0_cpa0/event=0x1FF/ after the patch: [root@localhost ~]# perf stat -e hisi_sicl0_cpa0/event=0x1FF/ sleep 1 event syntax error: '..cpa0/event=0x1FF/' \___ value too big for format, maximum is 255 Fixes: 6b79738b6ed9 ("drivers/perf: hisi: Add Support for CPA PMU") Signed-off-by: Junhao He Signed-off-by: Yushan Wang Signed-off-by: Qizhi Zhang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index cca85d2f746d..0189ad6fd7cb 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -205,7 +205,7 @@ static int hisi_cpa_pmu_init_data(struct platform_device *pdev, } static struct attribute *hisi_cpa_pmu_format_attr[] = { - HISI_PMU_FORMAT_ATTR(event, "config:0-15"), + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), NULL }; -- Gitee From dd32840030098f32d74b4873ffc577ed1bc4e02b Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 17 Jun 2025 17:17:31 +0800 Subject: [PATCH 168/173] drivers/perf: hisi: Add events and rename event "cycle" for pa_pmu 1) Add events to count the number of request flit packages for links 1-3 and data flit packages for all links (0-3). 2) rename event "cycle" to pa_cycles to clearly indicate that it belongs to the PA PMU Fixes: a0ab25cd82ee ("drivers/perf: hisi: Add support for HiSilicon PA PMU driver") Signed-off-by: Junhao He Signed-off-by: Yushan Wang Signed-off-by: Qizhi Zhang Signed-off-by: huwentao --- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 53f994c88927..01c8c710365c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -317,8 +317,22 @@ static const struct attribute_group hisi_pa_pmu_v2_format_group = { static struct attribute *hisi_pa_pmu_v2_events_attr[] = { HISI_PMU_EVENT_ATTR(pa_rx_req_link0, 0x40), + HISI_PMU_EVENT_ATTR(pa_rx_req_link1, 0x41), + HISI_PMU_EVENT_ATTR(pa_rx_req_link2, 0x42), + HISI_PMU_EVENT_ATTR(pa_rx_req_link3, 0x43), + HISI_PMU_EVENT_ATTR(pa_rx_data_link0, 0x44), + HISI_PMU_EVENT_ATTR(pa_rx_data_link1, 0x45), + HISI_PMU_EVENT_ATTR(pa_rx_data_link2, 0x46), + HISI_PMU_EVENT_ATTR(pa_rx_data_link3, 0x47), HISI_PMU_EVENT_ATTR(pa_tx_req_link0, 0x60), - HISI_PMU_EVENT_ATTR(cycle, 0x78), + HISI_PMU_EVENT_ATTR(pa_tx_req_link1, 0x61), + HISI_PMU_EVENT_ATTR(pa_tx_req_link2, 0x62), + HISI_PMU_EVENT_ATTR(pa_tx_req_link3, 0x63), + HISI_PMU_EVENT_ATTR(pa_tx_data_link0, 0x64), + HISI_PMU_EVENT_ATTR(pa_tx_data_link1, 0x65), + HISI_PMU_EVENT_ATTR(pa_tx_data_link2, 0x66), + HISI_PMU_EVENT_ATTR(pa_tx_data_link3, 0x67), + HISI_PMU_EVENT_ATTR(pa_cycles, 0x78), NULL }; -- Gitee From 4c0aa4598998ce2f8ed4750729621b3c2060312a Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 25 Apr 2025 11:38:43 +0800 Subject: [PATCH 169/173] arm64: cputype: Add cputype definition for HIP12 [Upstream commit 1ae35d2924477ec178f4f9d10b7da91aa2ea8884] Add MIDR encoding for HiSilicon HIP12 which is used on HiSilicon HIP12 SoCs. Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20250425033845.57671-2-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/include/asm/cputype.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 414aa8a92896..4910e66bc3a0 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -112,6 +112,7 @@ #define HISI_CPU_PART_TSV110 0xD01 #define HISI_CPU_PART_TSV200 0xD02 #define HISI_CPU_PART_LINXICORE9100 0xD02 +#define HISI_CPU_PART_HIP12 0xD06 #define AMPERE_CPU_PART_AMPERE1 0xAC3 @@ -152,7 +153,7 @@ #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) #define MIDR_HISI_TSV200 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV200) #define MIDR_HISI_LINXICORE9100 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_LINXICORE9100) - +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) #define MIDR_PHYTIUM_FTC862 MIDR_CPU_MODEL(ARM_CPU_IMP_PHYTIUM, PHYTIUM_CPU_PART_FTC862) -- Gitee From 921150ab2465259b3b348ff450dbf47e8cf6b85f Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 25 Apr 2025 11:38:44 +0800 Subject: [PATCH 170/173] perf arm-spe: Add support for SPE Data Source packet on HiSilicon HIP12 [Upstream commit 846b62b3433d2e87018576ab386f7a96390f66e4] Add data source encoding for HiSilicon HIP12 and coresponding mapping to the perf's memory data source. This will help to synthesize the data and support upper layer tools like perf-mem and perf-c2c. Reviewed-by: Leo Yan Signed-off-by: Yicong Yang Cc: CaiJingtao Cc: Catalin Marinas Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Jonathan Cameron Cc: Junhao He Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: Yushan Wang Cc: Zeng Tao Cc: xueshan2@huawei.com Link: https://lore.kernel.org/r/20250425033845.57671-3-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/arch/arm64/include/asm/cputype.h | 2 + .../util/arm-spe-decoder/arm-spe-decoder.h | 17 ++++ tools/perf/util/arm-spe.c | 98 +++++++++++++++++++ 3 files changed, 117 insertions(+) diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 7e738fc6e3db..fc4e8cc973c1 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -114,6 +114,7 @@ #define FUJITSU_CPU_PART_A64FX 0x001 #define HISI_CPU_PART_TSV110 0xD01 +#define HISI_CPU_PART_HIP12 0xD06 #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 @@ -160,6 +161,7 @@ #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index fa269c9c53b3..baa5e6a564bd 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -67,6 +67,23 @@ enum arm_spe_neoverse_data_source { ARM_SPE_NV_DRAM = 0xe, }; +enum arm_spe_hisi_hip_data_source { + ARM_SPE_HISI_HIP_PEER_CPU = 0, + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1, + ARM_SPE_HISI_HIP_L3 = 2, + ARM_SPE_HISI_HIP_L3_HITM = 3, + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4, + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5, + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6, + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7, + ARM_SPE_HISI_HIP_LOCAL_MEM = 8, + ARM_SPE_HISI_HIP_REMOTE_MEM = 9, + ARM_SPE_HISI_HIP_NC_DEV = 13, + ARM_SPE_HISI_HIP_L2 = 16, + ARM_SPE_HISI_HIP_L2_HITM = 17, + ARM_SPE_HISI_HIP_L1 = 18, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 4073d656dfeb..9499a683bb0c 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -372,6 +372,11 @@ static const struct midr_range neoverse_spe[] = { {}, }; +static const struct midr_range hisi_hip_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), + {}, +}; + static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, union perf_mem_data_src *data_src) { @@ -471,10 +476,101 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; } +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + /* Use common synthesis method to handle store operations */ + if (record->op & ARM_SPE_OP_ST) { + arm_spe__synth_data_source_generic(record, data_src); + return; + } + + switch (record->source) { + case ARM_SPE_HISI_HIP_PEER_CPU: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CPU_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_L3: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + break; + case ARM_SPE_HISI_HIP_L3_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_LOCAL_MEM: + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_REMOTE_MEM: + data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + break; + case ARM_SPE_HISI_HIP_NC_DEV: + data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + break; + case ARM_SPE_HISI_HIP_L1: + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + default: + break; + } +} + static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) { union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); + bool is_hisilicon = is_midr_in_range_list(midr, hisi_hip_ds_encoding_cpus); if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; @@ -485,6 +581,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m if (is_neoverse) arm_spe__synth_data_source_neoverse(record, &data_src); + else if (is_hisilicon) + arm_spe__synth_data_source_hisi_hip(record, &data_src); else arm_spe__synth_data_source_generic(record, &data_src); -- Gitee From ffc0f04106476cf16e979c8989f50369494c687f Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 25 Apr 2025 11:38:45 +0800 Subject: [PATCH 171/173] perf mem: Count L2 HITM for c2c statistic [Upstream commit fa9b3578ed628641504ab74958bbe36a42c2615a] L2 HITM is not counted in c2c statistic decoding. Count it for lcl_hitm like how we handle L2 Peer snoop. Reviewed-by: Leo Yan Signed-off-by: Yicong Yang Cc: CaiJingtao Cc: Catalin Marinas Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Jonathan Cameron Cc: Junhao He Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: Yushan Wang Cc: Zeng Tao Cc: xueshan2@huawei.com Link: https://lore.kernel.org/r/20250425033845.57671-4-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: huwentao --- tools/perf/util/mem-events.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index bf9084b325ac..64f7e0501bc9 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -388,7 +388,12 @@ do { \ if (lvl & P(LVL, IO)) stats->ld_io++; if (lvl & P(LVL, LFB)) stats->ld_fbhit++; if (lvl & P(LVL, L1 )) stats->ld_l1hit++; - if (lvl & P(LVL, L2 )) stats->ld_l2hit++; + if (lvl & P(LVL, L2)) { + if (snoop & P(SNOOP, HITM)) + HITM_INC(lcl_hitm); + else + stats->ld_l2hit++; + } if (lvl & P(LVL, L3 )) { if (snoop & P(SNOOP, HITM)) HITM_INC(lcl_hitm); -- Gitee From a3f1d14fdb415b8fe58cb3695bc8557197c81678 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 26 Aug 2025 14:48:20 +0800 Subject: [PATCH 172/173] arm64: perf: Add support for HIP12 hw metric HIP12 provides hardware metric sampling with adjacent counters counter_2n and counter_2n+1. Overflow of counter_2n+1 will result in an interrupt while overflow of counter_2n will load initial value, which are stored in dedicated registers reload_counter_2n and reload_counter_2n+1, to both counters. With the ability above, software could only perform sampling during handling interrupt of counter_2n and configure different values of reload_counter_2n and reload_counter_2n+1, which realizes hardware metric sampling. For example, perf record -e '\ {armv8_pmuv3_0/cpu_cycles,period=1000000,hw_metric=1/, \ armv8_pmuv3_0/inst_retired,period=800000,hw_metric=1/}:u' \ -- Above command will only perform sampling when IPC < 800000 / 1000000, since the interrupt will only appear when cpu_cycles reaches 1000000 and inst_retired is less than 800000. Signed-off-by: Yicong Yang Signed-off-by: Yushan Wang Signed-off-by: Qizhi Zhang Signed-off-by: huwentao --- arch/arm64/configs/tencent.config | 1 + arch/arm64/kernel/perf_event.c | 194 ++++++++++++++++++++++++++++++ drivers/perf/Kconfig | 7 ++ drivers/perf/arm_pmu.c | 9 ++ 4 files changed, 211 insertions(+) diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index 63cebd9a7a46..9935fd573f7d 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -1507,3 +1507,4 @@ CONFIG_ARM64_HAFT=y CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y CONFIG_HISI_L3T_PMU=m CONFIG_HISI_LPDDRC_PMU=m +CONFIG_HISILICON_HW_METRIC=y diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 33598c861f90..83b47b055709 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -332,23 +332,73 @@ static struct attribute_group armv8_pmuv3_events_attr_group = { #define ATTR_CFG_FLD_long_LO 0 #define ATTR_CFG_FLD_long_HI 0 +#ifdef CONFIG_HISILICON_HW_METRIC +#define ATTR_CFG_FLD_hw_metric_CFG config2 +#define ATTR_CFG_FLD_hw_metric_LO 0 +#define ATTR_CFG_FLD_hw_metric_HI 0 +#endif + GEN_PMU_FORMAT_ATTR(event); GEN_PMU_FORMAT_ATTR(long); +#ifdef CONFIG_HISILICON_HW_METRIC +GEN_PMU_FORMAT_ATTR(hw_metric); +#endif static inline bool armv8pmu_event_is_64bit(struct perf_event *event) { return ATTR_CFG_GET_FLD(&event->attr, long); } +#ifdef CONFIG_HISILICON_HW_METRIC +static inline bool armv8pmu_event_is_hw_metric(struct perf_event *event) +{ + return ATTR_CFG_GET_FLD(&event->attr, hw_metric); +} + +static bool armpmu_support_hisi_hw_metric(void) +{ + static const struct midr_range hip12_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), + { } + }; + + /* + * Feature of hw metric requires access to EL1 registers to accomplish, + * which will cause kernel panic in virtual machine because of lack of + * authority. Thus, this feature is banned for virtual machines. + */ + return is_midr_in_range_list(read_cpuid_id(), hip12_cpus) && + is_kernel_in_hyp_mode(); +} +#endif + static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, &format_attr_long.attr, +#ifdef CONFIG_HISILICON_HW_METRIC + &format_attr_hw_metric.attr, +#endif NULL, }; +#ifdef CONFIG_HISILICON_HW_METRIC +static umode_t +armv8pmu_format_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + if (attr == &format_attr_hw_metric.attr && !armpmu_support_hisi_hw_metric()) + return 0; + + return attr->mode; +} +#endif + static struct attribute_group armv8_pmuv3_format_attr_group = { .name = "format", +#ifdef CONFIG_HISILICON_HW_METRIC + .is_visible = armv8pmu_format_attr_is_visible, +#endif .attrs = armv8_pmuv3_format_attrs, }; @@ -566,6 +616,41 @@ static inline void armv8pmu_write_evcntr(int idx, u64 value) write_pmevcntrn(counter, value); } +#ifdef CONFIG_HISILICON_HW_METRIC +static inline void armv8pmu_write_reload_counter(struct perf_event *event, + u64 value) +{ + /* Need to be event->hw.idx - 1 since counter 0 is PMCCNTR_EL0 */ + int idx = event->hw.idx - 1; + +#define HW_METRIC_RELOAD_CNTR(n) sys_reg(3, 3, 15, 3, (2 + n)) +#define write_hw_metric_reload_cntr(_value, _n) \ + do { \ + switch (_n) { \ + case 0: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(0)); break; \ + case 1: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(1)); break; \ + case 2: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(2)); break; \ + case 3: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(3)); break; \ + case 4: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(4)); break; \ + case 5: \ + write_sysreg_s(_value, HW_METRIC_RELOAD_CNTR(5)); break; \ + default: \ + WARN(1, "Invalid hw_metric reload counter index\n"); \ + dev_err(event->pmu->dev, "event is 0x%lx index is %x\n",\ + event->hw.config_base, event->hw.idx); \ + } \ + } while (0) + write_hw_metric_reload_cntr(value, idx); +#undef write_hw_metric_reload_cntr +#undef HW_METRIC_RELOAD_CNTR +} +#endif + static inline void armv8pmu_write_hw_counter(struct perf_event *event, u64 value) { @@ -577,6 +662,10 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event, } else { armv8pmu_write_evcntr(idx, value); } +#ifdef CONFIG_HISILICON_HW_METRIC + if (armv8pmu_event_is_hw_metric(event)) + armv8pmu_write_reload_counter(event, value); +#endif } static void armv8pmu_write_counter(struct perf_event *event, u64 value) @@ -637,6 +726,38 @@ static inline int armv8pmu_enable_counter(int idx) return idx; } +#ifdef CONFIG_HISILICON_HW_METRIC +static inline void armv8pmu_enable_hw_metric(struct perf_event *event, bool enable) +{ + int idx = event->hw.idx; + u64 reg; + + /* + * Configure the chicken bit on leader event enabling. + */ + if (event != event->group_leader) + return; + + /* Convert the idx since we only use general counters, counter 0 is + * used for PMCCNTR_EL0. + */ + idx -= 1; + +#define HISI_DTU_CTLR_EL1 sys_reg(3, 0, 15, 8, 4) +#define HISI_DTU_CTLR_EL1_CHK_GROUP0 BIT(15) + + reg = read_sysreg_s(HISI_DTU_CTLR_EL1); + if (enable) + reg |= HISI_DTU_CTLR_EL1_CHK_GROUP0 << (idx >> 1); + else + reg &= ~(HISI_DTU_CTLR_EL1_CHK_GROUP0 << (idx >> 1)); + + write_sysreg_s(reg, HISI_DTU_CTLR_EL1); + + reg = read_sysreg_s(HISI_DTU_CTLR_EL1); +} +#endif + static inline void armv8pmu_enable_event_counter(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; @@ -653,6 +774,11 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event) armv8pmu_enable_counter(idx); if (armv8pmu_event_is_chained(event)) armv8pmu_enable_counter(idx - 1); + +#ifdef CONFIG_HISILICON_HW_METRIC + if (armv8pmu_event_is_hw_metric(event)) + armv8pmu_enable_hw_metric(event, false); +#endif } } @@ -896,6 +1022,61 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +#ifdef CONFIG_HISILICON_HW_METRIC +static int armv8pmu_check_hw_metric_event(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + int hw_metric_cnt = 0; + + if (cpuc->percpu_pmu) { + for_each_sibling_event(sibling, leader) { + if (armv8pmu_event_is_hw_metric(sibling)) + hw_metric_cnt++; + } + + if (hw_metric_cnt != 1) + return -EINVAL; + } else { + if (event == leader) + return 0; + + if (!armv8pmu_event_is_hw_metric(leader)) + return -EINVAL; + + for_each_sibling_event(sibling, leader) { + if (armv8pmu_event_is_hw_metric(sibling)) + hw_metric_cnt++; + } + + if (hw_metric_cnt > 0) + return -EINVAL; + } + + return 0; +} + +static int armv8pmu_get_hw_metric_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct perf_event *leader = event->group_leader; + int leader_idx; + + if (armv8pmu_check_hw_metric_event(cpuc, event)) + return -EINVAL; + + if (event == leader || leader->hw.idx < 1) + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + + leader_idx = leader->hw.idx; + if (cpuc->events[leader_idx - 1]) + return -EAGAIN; + + return leader_idx - 1; +} +#endif + static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { @@ -903,6 +1084,14 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; +#ifdef CONFIG_HISILICON_HW_METRIC + if (armv8pmu_event_is_hw_metric(event)) + return armv8pmu_get_hw_metric_event_idx(cpuc, event); + else if (event != event->group_leader && + armv8pmu_event_is_hw_metric(event->group_leader)) + return -EINVAL; +#endif + /* Always prefer to place a cycle counter into the cycle counter. */ if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) @@ -1055,6 +1244,11 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, if (armv8pmu_event_is_64bit(event)) event->hw.flags |= ARMPMU_EVT_64BIT; +#ifdef CONFIG_HISILICON_HW_METRIC + if (armv8pmu_event_is_hw_metric(event) && !armpmu_support_hisi_hw_metric()) + return -EOPNOTSUPP; +#endif + /* Only expose micro/arch events supported by this PMU */ if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 6d2f2cc86379..9de9e70a779a 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -152,4 +152,11 @@ config ARM64_BRBE source "drivers/perf/hisilicon/Kconfig" +config HISILICON_HW_METRIC + bool "HiSilicon hardware metric sampling support" + depends on ARM64 + help + Support hardware metric that allows filter of sampling for specific + sampling period ratio. + endmenu diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 1d9ef5065db3..d856b096cf94 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -417,6 +417,15 @@ validate_group(struct perf_event *event) */ memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); + +#ifdef CONFIG_HISILICON_HW_METRIC + /* + * Make percpu_pmu null so that PMU might get a chance to know if + * get_event_idx is called for validation. + */ + fake_pmu.percpu_pmu = NULL; +#endif + if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; -- Gitee From 8584cd90419a9873252b51ab44b8c49508c984fd Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 21 Jul 2020 18:49:33 +0800 Subject: [PATCH 173/173] arm64: perf: Expose some new events via sysfs [Upstream commit 55fdc1f44cd6bb1d61c9ca946d8f7cd67ea0bf36] Some new PMU events can been detected by PMCEID1_EL0, but it can't be listed, Let's expose these through sysfs. Signed-off-by: Shaokun Zhang Cc: Will Deacon Cc: Mark Rutland Link: https://lore.kernel.org/r/1595328573-12751-2-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon Signed-off-by: huwentao --- arch/arm64/include/asm/perf_event.h | 27 ++++++++++++++++++++ arch/arm64/kernel/perf_event.c | 38 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index c121eaf67d37..d1448affebec 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -72,6 +72,13 @@ #define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36 #define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37 #define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x39 +#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x3A +#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x3B +#define ARMV8_PMUV3_PERFCTR_STALL 0x3C +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x3D +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x3E +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x3F /* Statistical profiling extension microarchitectural events */ #define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 @@ -79,6 +86,26 @@ #define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002 #define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003 +/* AMUv1 architecture events */ +#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004 +#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005 + +/* long-latency read miss events */ +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B + +/* additional latency from alignment events */ +#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 +#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 +#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022 + +/* Armv8.5 Memory Tagging Extension events */ +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 + /* ARMv8 recommended implementation defined event types */ #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40 #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41 diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 83b47b055709..50c0ebd033e4 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -223,10 +223,29 @@ ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK); ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD); ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD); ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD); +ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD); +ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED); +ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC); +ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL); +ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND); +ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND); +ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT); ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP); ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED); ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE); ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION); +ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES); +ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM); +ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS); +ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD); +ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS); +ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD); +ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT); +ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT); +ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT); +ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED); +ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD); +ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR); static struct attribute *armv8_pmuv3_event_attrs[] = { &armv8_event_attr_sw_incr.attr.attr, @@ -285,10 +304,29 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { &armv8_event_attr_ll_cache_rd.attr.attr, &armv8_event_attr_ll_cache_miss_rd.attr.attr, &armv8_event_attr_remote_access_rd.attr.attr, + &armv8_event_attr_l1d_cache_lmiss_rd.attr.attr, + &armv8_event_attr_op_retired.attr.attr, + &armv8_event_attr_op_spec.attr.attr, + &armv8_event_attr_stall.attr.attr, + &armv8_event_attr_stall_slot_backend.attr.attr, + &armv8_event_attr_stall_slot_frontend.attr.attr, + &armv8_event_attr_stall_slot.attr.attr, &armv8_event_attr_sample_pop.attr.attr, &armv8_event_attr_sample_feed.attr.attr, &armv8_event_attr_sample_filtrate.attr.attr, &armv8_event_attr_sample_collision.attr.attr, + &armv8_event_attr_cnt_cycles.attr.attr, + &armv8_event_attr_stall_backend_mem.attr.attr, + &armv8_event_attr_l1i_cache_lmiss.attr.attr, + &armv8_event_attr_l2d_cache_lmiss_rd.attr.attr, + &armv8_event_attr_l2i_cache_lmiss.attr.attr, + &armv8_event_attr_l3d_cache_lmiss_rd.attr.attr, + &armv8_event_attr_ldst_align_lat.attr.attr, + &armv8_event_attr_ld_align_lat.attr.attr, + &armv8_event_attr_st_align_lat.attr.attr, + &armv8_event_attr_mem_access_checked.attr.attr, + &armv8_event_attr_mem_access_checked_rd.attr.attr, + &armv8_event_attr_mem_access_checked_wr.attr.attr, NULL, }; -- Gitee