From d70c25b9b8ad0f15e0982ae542ed32fd216eb931 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 16:34:00 -0500 Subject: [PATCH 01/35] x86,fs/resctrl: Consolidate monitor event descriptions ANBZ: #20932 commit 09f37134464cc03baf5cb8eab2d99db27ee73217 upstream. There are currently only three monitor events, all associated with the RDT_RESOURCE_L3 resource. Growing support for additional events will be easier with some restructuring to have a single point in file system code where all attributes of all events are defined. Place all event descriptions into an array mon_event_all[]. Doing this has the beneficial side effect of removing the need for rdt_resource::evt_list. Add resctrl_event_id::QOS_FIRST_EVENT for a lower bound on range checks for event ids and as the starting index to scan mon_event_all[]. Drop the code that builds evt_list and change the two places where the list is scanned to scan mon_event_all[] instead using a new helper macro for_each_mon_event(). Architecture code now informs file system code which events are available with resctrl_enable_mon_event(). Signed-off-by: Tony Luck Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- arch/x86/kernel/cpu/resctrl/core.c | 12 ++++-- fs/resctrl/internal.h | 13 ++++-- fs/resctrl/monitor.c | 63 +++++++++++++++--------------- fs/resctrl/rdtgroup.c | 11 +++--- include/linux/resctrl.h | 4 +- include/linux/resctrl_types.h | 12 ++++-- 6 files changed, 66 insertions(+), 49 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 794eddd4e649..528fc1bd86d3 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -864,12 +864,18 @@ static __init bool get_rdt_mon_resources(void) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) + if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) { + resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID); rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); - if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) + } + if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { + resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID); - if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) + } + if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { + resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID); + } if (!rdt_mon_features) return false; diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 9a8cf6f11151..7a57366d1abc 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -52,19 +52,26 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) } /** - * struct mon_evt - Entry in the event list of a resource + * struct mon_evt - Properties of a monitor event * @evtid: event id + * @rid: resource id for this event * @name: name of the event * @configurable: true if the event is configurable - * @list: entry in &rdt_resource->evt_list + * @enabled: true if the event is enabled */ struct mon_evt { enum resctrl_event_id evtid; + enum resctrl_res_level rid; char *name; bool configurable; - struct list_head list; + bool enabled; }; +extern struct mon_evt mon_event_all[QOS_NUM_EVENTS]; + +#define for_each_mon_event(mevt) for (mevt = &mon_event_all[QOS_FIRST_EVENT]; \ + mevt < &mon_event_all[QOS_NUM_EVENTS]; mevt++) + /** * struct mon_data - Monitoring details for each event file. * @list: Member of the global @mon_data_kn_priv_list list. diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 7326c28a7908..d5bf3e0334b6 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -842,38 +842,39 @@ static void dom_data_exit(struct rdt_resource *r) mutex_unlock(&rdtgroup_mutex); } -static struct mon_evt llc_occupancy_event = { - .name = "llc_occupancy", - .evtid = QOS_L3_OCCUP_EVENT_ID, -}; - -static struct mon_evt mbm_total_event = { - .name = "mbm_total_bytes", - .evtid = QOS_L3_MBM_TOTAL_EVENT_ID, -}; - -static struct mon_evt mbm_local_event = { - .name = "mbm_local_bytes", - .evtid = QOS_L3_MBM_LOCAL_EVENT_ID, -}; - /* - * Initialize the event list for the resource. - * - * Note that MBM events are also part of RDT_RESOURCE_L3 resource - * because as per the SDM the total and local memory bandwidth - * are enumerated as part of L3 monitoring. + * All available events. Architecture code marks the ones that + * are supported by a system using resctrl_enable_mon_event() + * to set .enabled. */ -static void l3_mon_evt_init(struct rdt_resource *r) +struct mon_evt mon_event_all[QOS_NUM_EVENTS] = { + [QOS_L3_OCCUP_EVENT_ID] = { + .name = "llc_occupancy", + .evtid = QOS_L3_OCCUP_EVENT_ID, + .rid = RDT_RESOURCE_L3, + }, + [QOS_L3_MBM_TOTAL_EVENT_ID] = { + .name = "mbm_total_bytes", + .evtid = QOS_L3_MBM_TOTAL_EVENT_ID, + .rid = RDT_RESOURCE_L3, + }, + [QOS_L3_MBM_LOCAL_EVENT_ID] = { + .name = "mbm_local_bytes", + .evtid = QOS_L3_MBM_LOCAL_EVENT_ID, + .rid = RDT_RESOURCE_L3, + }, +}; + +void resctrl_enable_mon_event(enum resctrl_event_id eventid) { - INIT_LIST_HEAD(&r->evt_list); + if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS)) + return; + if (mon_event_all[eventid].enabled) { + pr_warn("Duplicate enable for event %d\n", eventid); + return; + } - if (resctrl_arch_is_llc_occupancy_enabled()) - list_add_tail(&llc_occupancy_event.list, &r->evt_list); - if (resctrl_arch_is_mbm_total_enabled()) - list_add_tail(&mbm_total_event.list, &r->evt_list); - if (resctrl_arch_is_mbm_local_enabled()) - list_add_tail(&mbm_local_event.list, &r->evt_list); + mon_event_all[eventid].enabled = true; } /** @@ -900,15 +901,13 @@ int resctrl_mon_resource_init(void) if (ret) return ret; - l3_mon_evt_init(r); - if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { - mbm_total_event.configurable = true; + mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true; resctrl_file_fflags_init("mbm_total_bytes_config", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); } if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { - mbm_local_event.configurable = true; + mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true; resctrl_file_fflags_init("mbm_local_bytes_config", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); } diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 31377854de15..6a3cec92667f 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1152,7 +1152,9 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, struct rdt_resource *r = rdt_kn_parent_priv(of->kn); struct mon_evt *mevt; - list_for_each_entry(mevt, &r->evt_list, list) { + for_each_mon_event(mevt) { + if (mevt->rid != r->rid || !mevt->enabled) + continue; seq_printf(seq, "%s\n", mevt->name); if (mevt->configurable) seq_printf(seq, "%s_config\n", mevt->name); @@ -3054,10 +3056,9 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, struct mon_evt *mevt; int ret, domid; - if (WARN_ON(list_empty(&r->evt_list))) - return -EPERM; - - list_for_each_entry(mevt, &r->evt_list, list) { + for_each_mon_event(mevt) { + if (mevt->rid != r->rid || !mevt->enabled) + continue; domid = do_sum ? d->ci_id : d->hdr.id; priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum); if (WARN_ON_ONCE(!priv)) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 6fb4894b8cfd..2944042bd84c 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -269,7 +269,6 @@ enum resctrl_schema_fmt { * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. * @schema_fmt: Which format string and parser is used for this schema. - * @evt_list: List of monitoring events * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth * monitoring events can be configured. * @cdp_capable: Is the CDP feature available on this resource @@ -287,7 +286,6 @@ struct rdt_resource { struct list_head mon_domains; char *name; enum resctrl_schema_fmt schema_fmt; - struct list_head evt_list; unsigned int mbm_cfg_mask; bool cdp_capable; }; @@ -372,6 +370,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); +void resctrl_enable_mon_event(enum resctrl_event_id eventid); + bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); /** diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index a25fb9c4070d..2dadbc54e4b3 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -34,11 +34,15 @@ /* Max event bits supported */ #define MAX_EVT_CONFIG_BITS GENMASK(6, 0) -/* - * Event IDs, the values match those used to program IA32_QM_EVTSEL before - * reading IA32_QM_CTR on RDT systems. - */ +/* Event IDs */ enum resctrl_event_id { + /* Must match value of first event below */ + QOS_FIRST_EVENT = 0x01, + + /* + * These values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ QOS_L3_OCCUP_EVENT_ID = 0x01, QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, -- Gitee From b375fcf5bbd69a2ca14788f0bbf46786a57acd5a Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 16:34:01 -0500 Subject: [PATCH 02/35] x86,fs/resctrl: Replace architecture event enabled checks ANBZ: #20932 commit d257cc2e5c8bb8236cb161360d6c0529fd442712 upstream. The resctrl file system now has complete knowledge of the status of every event. So there is no need for per-event function calls to check. Replace each of the resctrl_arch_is_{event}enabled() calls with resctrl_is_mon_event_enabled(QOS_{EVENT}). No functional change. Signed-off-by: Tony Luck Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Fenghua Yu Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- arch/x86/include/asm/resctrl.h | 15 --------------- arch/x86/kernel/cpu/resctrl/core.c | 4 ++-- arch/x86/kernel/cpu/resctrl/monitor.c | 4 ++-- fs/resctrl/ctrlmondata.c | 4 ++-- fs/resctrl/monitor.c | 16 +++++++++++----- fs/resctrl/rdtgroup.c | 18 +++++++++--------- include/linux/resctrl.h | 2 ++ 7 files changed, 28 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index ad497ab196d1..9c889f51b7f1 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -82,21 +82,6 @@ static inline void resctrl_arch_disable_mon(void) static_branch_dec_cpuslocked(&rdt_enable_key); } -static inline bool resctrl_arch_is_llc_occupancy_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID)); -} - -static inline bool resctrl_arch_is_mbm_total_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID)); -} - -static inline bool resctrl_arch_is_mbm_local_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); -} - /* * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR * diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 528fc1bd86d3..e655da6d4038 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -401,13 +401,13 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) { size_t tsize; - if (resctrl_arch_is_mbm_total_enabled()) { + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) { tsize = sizeof(*hw_dom->arch_mbm_total); hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); if (!hw_dom->arch_mbm_total) return -ENOMEM; } - if (resctrl_arch_is_mbm_local_enabled()) { + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) { tsize = sizeof(*hw_dom->arch_mbm_local); hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); if (!hw_dom->arch_mbm_local) { diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 60de452c9511..0eba7c467407 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -206,11 +206,11 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * { struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); - if (resctrl_arch_is_mbm_total_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) memset(hw_dom->arch_mbm_total, 0, sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); - if (resctrl_arch_is_mbm_local_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) memset(hw_dom->arch_mbm_local, 0, sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); } diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 3c39cfacb251..42b281b3852f 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -473,12 +473,12 @@ ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of, rdt_last_cmd_clear(); if (!strcmp(buf, "mbm_local_bytes")) { - if (resctrl_arch_is_mbm_local_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID; else ret = -EINVAL; } else if (!strcmp(buf, "mbm_total_bytes")) { - if (resctrl_arch_is_mbm_total_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID; else ret = -EINVAL; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index d5bf3e0334b6..b0b1dcc36795 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -336,7 +336,7 @@ void free_rmid(u32 closid, u32 rmid) entry = __rmid_entry(idx); - if (resctrl_arch_is_llc_occupancy_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) add_rmid_to_limbo(entry); else list_add_tail(&entry->list, &rmid_free_lru); @@ -635,10 +635,10 @@ static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, * This is protected from concurrent reads from user as both * the user and overflow handler hold the global mutex. */ - if (resctrl_arch_is_mbm_total_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID); - if (resctrl_arch_is_mbm_local_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID); } @@ -877,6 +877,12 @@ void resctrl_enable_mon_event(enum resctrl_event_id eventid) mon_event_all[eventid].enabled = true; } +bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid) +{ + return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS && + mon_event_all[eventid].enabled; +} + /** * resctrl_mon_resource_init() - Initialise global monitoring structures. * @@ -912,9 +918,9 @@ int resctrl_mon_resource_init(void) RFTYPE_MON_INFO | RFTYPE_RES_CACHE); } - if (resctrl_arch_is_mbm_local_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; - else if (resctrl_arch_is_mbm_total_enabled()) + else if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; return 0; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 6a3cec92667f..4847c66fb58a 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -123,8 +123,8 @@ void rdt_staged_configs_clear(void) static bool resctrl_is_mbm_enabled(void) { - return (resctrl_arch_is_mbm_total_enabled() || - resctrl_arch_is_mbm_local_enabled()); + return (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID) || + resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)); } static bool resctrl_is_mbm_event(int e) @@ -196,7 +196,7 @@ static int closid_alloc(void) lockdep_assert_held(&rdtgroup_mutex); if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) && - resctrl_arch_is_llc_occupancy_enabled()) { + resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) { cleanest_closid = resctrl_find_cleanest_closid(); if (cleanest_closid < 0) return cleanest_closid; @@ -4049,7 +4049,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); - if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { + if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) && has_busy_rmid(d)) { /* * When a package is going down, forcefully * decrement rmid->ebusy. There is no way to know @@ -4085,12 +4085,12 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain u32 idx_limit = resctrl_arch_system_num_rmid_idx(); size_t tsize; - if (resctrl_arch_is_llc_occupancy_enabled()) { + if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) { d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); if (!d->rmid_busy_llc) return -ENOMEM; } - if (resctrl_arch_is_mbm_total_enabled()) { + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) { tsize = sizeof(*d->mbm_total); d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); if (!d->mbm_total) { @@ -4098,7 +4098,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain return -ENOMEM; } } - if (resctrl_arch_is_mbm_local_enabled()) { + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) { tsize = sizeof(*d->mbm_local); d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); if (!d->mbm_local) { @@ -4143,7 +4143,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) RESCTRL_PICK_ANY_CPU); } - if (resctrl_arch_is_llc_occupancy_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); /* @@ -4218,7 +4218,7 @@ void resctrl_offline_cpu(unsigned int cpu) cancel_delayed_work(&d->mbm_over); mbm_setup_overflow_handler(d, 0, cpu); } - if (resctrl_arch_is_llc_occupancy_enabled() && + if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) && cpu == d->cqm_work_cpu && has_busy_rmid(d)) { cancel_delayed_work(&d->cqm_limbo); cqm_setup_limbo_handler(d, 0, cpu); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 2944042bd84c..40aba6b5d4f0 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -372,6 +372,8 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); void resctrl_enable_mon_event(enum resctrl_event_id eventid); +bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid); + bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); /** -- Gitee From d507f74bc7d83861faab055835e39aff953a4754 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 16:34:02 -0500 Subject: [PATCH 03/35] x86/resctrl: Remove the rdt_mon_features global variable ANBZ: #20932 commit 63cc9811aa874e6fab671599ba93a989f4f93a5d upstream. rdt_mon_features is used as a bitmask of enabled monitor events. A monitor event's status is now maintained in mon_evt::enabled with all monitor events' mon_evt structures found in the filesystem's mon_event_all[] array. Remove the remaining uses of rdt_mon_features. Signed-off-by: Tony Luck Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- arch/x86/include/asm/resctrl.h | 1 - arch/x86/kernel/cpu/resctrl/core.c | 9 +++++---- arch/x86/kernel/cpu/resctrl/monitor.c | 5 ----- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index 9c889f51b7f1..089742970cc1 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -42,7 +42,6 @@ DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state); extern bool rdt_alloc_capable; extern bool rdt_mon_capable; -extern unsigned int rdt_mon_features; DECLARE_STATIC_KEY_FALSE(rdt_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e655da6d4038..1809fab98687 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -863,21 +863,22 @@ static __init bool get_rdt_alloc_resources(void) static __init bool get_rdt_mon_resources(void) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + bool ret = false; if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) { resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID); - rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); + ret = true; } if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); - rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID); + ret = true; } if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); - rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID); + ret = true; } - if (!rdt_mon_features) + if (!ret) return false; return !rdt_get_mon_l3_config(r); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 0eba7c467407..9b1505975d21 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -30,11 +30,6 @@ */ bool rdt_mon_capable; -/* - * Global to indicate which monitoring events are enabled. - */ -unsigned int rdt_mon_features; - #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) static int snc_nodes_per_l3_cache = 1; -- Gitee From 74c696f1d843f15094714c9175de3785dfcf7171 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 5 Sep 2025 16:34:03 -0500 Subject: [PATCH 04/35] x86,fs/resctrl: Prepare for more monitor events ANBZ: #20932 commit 83b039877310ae1eb614eef17b780df1e10d9fb5 upstream. There's a rule in computer programming that objects appear zero, once, or many times. So code accordingly. There are two MBM events and resctrl is coded with a lot of if (local) do one thing if (total) do a different thing Change the rdt_mon_domain and rdt_hw_mon_domain structures to hold arrays of pointers to per event data instead of explicit fields for total and local bandwidth. Simplify by coding for many events using loops on which are enabled. Move resctrl_is_mbm_event() to so it can be used more widely. Also provide a for_each_mbm_event_id() helper macro. Cleanup variable names in functions touched to consistently use "eventid" for those with type enum resctrl_event_id. Signed-off-by: Tony Luck Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- arch/x86/kernel/cpu/resctrl/core.c | 40 +++++++++++---------- arch/x86/kernel/cpu/resctrl/internal.h | 8 ++--- arch/x86/kernel/cpu/resctrl/monitor.c | 36 +++++++++---------- fs/resctrl/monitor.c | 13 ++++--- fs/resctrl/rdtgroup.c | 50 +++++++++++++------------- include/linux/resctrl.h | 23 +++++++++--- include/linux/resctrl_types.h | 3 ++ 7 files changed, 96 insertions(+), 77 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 1809fab98687..3885ec1c61ef 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -364,8 +364,10 @@ static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom) { - kfree(hw_dom->arch_mbm_total); - kfree(hw_dom->arch_mbm_local); + int idx; + + for_each_mbm_idx(idx) + kfree(hw_dom->arch_mbm_states[idx]); kfree(hw_dom); } @@ -399,25 +401,27 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain * */ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) { - size_t tsize; - - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) { - tsize = sizeof(*hw_dom->arch_mbm_total); - hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); - if (!hw_dom->arch_mbm_total) - return -ENOMEM; - } - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) { - tsize = sizeof(*hw_dom->arch_mbm_local); - hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); - if (!hw_dom->arch_mbm_local) { - kfree(hw_dom->arch_mbm_total); - hw_dom->arch_mbm_total = NULL; - return -ENOMEM; - } + size_t tsize = sizeof(*hw_dom->arch_mbm_states[0]); + enum resctrl_event_id eventid; + int idx; + + for_each_mbm_event_id(eventid) { + if (!resctrl_is_mon_event_enabled(eventid)) + continue; + idx = MBM_STATE_IDX(eventid); + hw_dom->arch_mbm_states[idx] = kcalloc(num_rmid, tsize, GFP_KERNEL); + if (!hw_dom->arch_mbm_states[idx]) + goto cleanup; } return 0; +cleanup: + for_each_mbm_idx(idx) { + kfree(hw_dom->arch_mbm_states[idx]); + hw_dom->arch_mbm_states[idx] = NULL; + } + + return -ENOMEM; } static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 62f2693fd34f..30d9cac5ec72 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -57,15 +57,15 @@ struct rdt_hw_ctrl_domain { * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share * a resource for a monitor function * @d_resctrl: Properties exposed to the resctrl file system - * @arch_mbm_total: arch private state for MBM total bandwidth - * @arch_mbm_local: arch private state for MBM local bandwidth + * @arch_mbm_states: Per-event pointer to the MBM event's saved state. + * An MBM event's state is an array of struct arch_mbm_state + * indexed by RMID on x86. * * Members of this structure are accessed via helpers that provide abstraction. */ struct rdt_hw_mon_domain { struct rdt_mon_domain d_resctrl; - struct arch_mbm_state *arch_mbm_total; - struct arch_mbm_state *arch_mbm_local; + struct arch_mbm_state *arch_mbm_states[QOS_NUM_L3_MBM_EVENTS]; }; static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 9b1505975d21..9376615b0d16 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -160,18 +160,14 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do u32 rmid, enum resctrl_event_id eventid) { - switch (eventid) { - case QOS_L3_OCCUP_EVENT_ID: - return NULL; - case QOS_L3_MBM_TOTAL_EVENT_ID: - return &hw_dom->arch_mbm_total[rmid]; - case QOS_L3_MBM_LOCAL_EVENT_ID: - return &hw_dom->arch_mbm_local[rmid]; - default: - /* Never expect to get here */ - WARN_ON_ONCE(1); + struct arch_mbm_state *state; + + if (!resctrl_is_mbm_event(eventid)) return NULL; - } + + state = hw_dom->arch_mbm_states[MBM_STATE_IDX(eventid)]; + + return state ? &state[rmid] : NULL; } void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, @@ -200,14 +196,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) { struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); - - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) - memset(hw_dom->arch_mbm_total, 0, - sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); - - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) - memset(hw_dom->arch_mbm_local, 0, - sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); + enum resctrl_event_id eventid; + int idx; + + for_each_mbm_event_id(eventid) { + if (!resctrl_is_mon_event_enabled(eventid)) + continue; + idx = MBM_STATE_IDX(eventid); + memset(hw_dom->arch_mbm_states[idx], 0, + sizeof(*hw_dom->arch_mbm_states[0]) * r->num_rmid); + } } static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index b0b1dcc36795..e0dfa5fb969e 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -346,15 +346,14 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id evtid) { u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); + struct mbm_state *state; - switch (evtid) { - case QOS_L3_MBM_TOTAL_EVENT_ID: - return &d->mbm_total[idx]; - case QOS_L3_MBM_LOCAL_EVENT_ID: - return &d->mbm_local[idx]; - default: + if (!resctrl_is_mbm_event(evtid)) return NULL; - } + + state = d->mbm_states[MBM_STATE_IDX(evtid)]; + + return state ? &state[idx] : NULL; } static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 4847c66fb58a..45cc6f8b013f 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -127,12 +127,6 @@ static bool resctrl_is_mbm_enabled(void) resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)); } -static bool resctrl_is_mbm_event(int e) -{ - return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && - e <= QOS_L3_MBM_LOCAL_EVENT_ID); -} - /* * Trivial allocator for CLOSIDs. Use BITMAP APIs to manipulate a bitmap * of free CLOSIDs. @@ -4021,9 +4015,13 @@ static void rdtgroup_setup_default(void) static void domain_destroy_mon_state(struct rdt_mon_domain *d) { + int idx; + bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - kfree(d->mbm_local); + for_each_mbm_idx(idx) { + kfree(d->mbm_states[idx]); + d->mbm_states[idx] = NULL; + } } void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) @@ -4083,32 +4081,34 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); - size_t tsize; + size_t tsize = sizeof(*d->mbm_states[0]); + enum resctrl_event_id eventid; + int idx; if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) { d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); if (!d->rmid_busy_llc) return -ENOMEM; } - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) { - tsize = sizeof(*d->mbm_total); - d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); - if (!d->mbm_total) { - bitmap_free(d->rmid_busy_llc); - return -ENOMEM; - } - } - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) { - tsize = sizeof(*d->mbm_local); - d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); - if (!d->mbm_local) { - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - return -ENOMEM; - } + + for_each_mbm_event_id(eventid) { + if (!resctrl_is_mon_event_enabled(eventid)) + continue; + idx = MBM_STATE_IDX(eventid); + d->mbm_states[idx] = kcalloc(idx_limit, tsize, GFP_KERNEL); + if (!d->mbm_states[idx]) + goto cleanup; } return 0; +cleanup: + bitmap_free(d->rmid_busy_llc); + for_each_mbm_idx(idx) { + kfree(d->mbm_states[idx]); + d->mbm_states[idx] = NULL; + } + + return -ENOMEM; } int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 40aba6b5d4f0..478d7a935ca3 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -161,8 +161,9 @@ struct rdt_ctrl_domain { * @hdr: common header for different domain types * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold - * @mbm_total: saved state for MBM total bandwidth - * @mbm_local: saved state for MBM local bandwidth + * @mbm_states: Per-event pointer to the MBM event's saved state. + * An MBM event's state is an array of struct mbm_state + * indexed by RMID on x86 or combined CLOSID, RMID on Arm. * @mbm_over: worker to periodically read MBM h/w counters * @cqm_limbo: worker to periodically read CQM h/w counters * @mbm_work_cpu: worker CPU for MBM h/w counters @@ -172,8 +173,7 @@ struct rdt_mon_domain { struct rdt_domain_hdr hdr; unsigned int ci_id; unsigned long *rmid_busy_llc; - struct mbm_state *mbm_total; - struct mbm_state *mbm_local; + struct mbm_state *mbm_states[QOS_NUM_L3_MBM_EVENTS]; struct delayed_work mbm_over; struct delayed_work cqm_limbo; int mbm_work_cpu; @@ -376,6 +376,21 @@ bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid); bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); +static inline bool resctrl_is_mbm_event(enum resctrl_event_id eventid) +{ + return (eventid >= QOS_L3_MBM_TOTAL_EVENT_ID && + eventid <= QOS_L3_MBM_LOCAL_EVENT_ID); +} + +/* Iterate over all memory bandwidth events */ +#define for_each_mbm_event_id(eventid) \ + for (eventid = QOS_L3_MBM_TOTAL_EVENT_ID; \ + eventid <= QOS_L3_MBM_LOCAL_EVENT_ID; eventid++) + +/* Iterate over memory bandwidth arrays in domain structures */ +#define for_each_mbm_idx(idx) \ + for (idx = 0; idx < QOS_NUM_L3_MBM_EVENTS; idx++) + /** * resctrl_arch_mon_event_config_write() - Write the config for an event. * @config_info: struct resctrl_mon_config_info describing the resource, domain diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index 2dadbc54e4b3..d98351663c2c 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -51,4 +51,7 @@ enum resctrl_event_id { QOS_NUM_EVENTS, }; +#define QOS_NUM_L3_MBM_EVENTS (QOS_L3_MBM_LOCAL_EVENT_ID - QOS_L3_MBM_TOTAL_EVENT_ID + 1) +#define MBM_STATE_IDX(evt) ((evt) - QOS_L3_MBM_TOTAL_EVENT_ID) + #endif /* __LINUX_RESCTRL_TYPES_H */ -- Gitee From 37512552012e6c9b77d10ca637690f3d96cfcadb Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:04 -0500 Subject: [PATCH 05/35] x86/cpufeatures: Add support for Assignable Bandwidth Monitoring Counters (ABMC) ANBZ: #20932 commit e19c06219985f2beb9d71959d80f56e318abf744 upstream. Users can create as many monitor groups as RMIDs supported by the hardware. However, the bandwidth monitoring feature on AMD only guarantees that RMIDs currently assigned to a processor will be tracked by hardware. The counters of any other RMIDs which are no longer being tracked will be reset to zero. The MBM event counters return "Unavailable" for the RMIDs that are not tracked by hardware. So, there can be only limited number of groups that can give guaranteed monitoring numbers. With ever changing configurations there is no way to definitely know which of these groups are being tracked during a particular time. Users do not have the option to monitor a group or set of groups for a certain period of time without worrying about RMID being reset in between. The ABMC feature allows users to assign a hardware counter to an RMID, event pair and monitor bandwidth usage as long as it is assigned. The hardware continues to track the assigned counter until it is explicitly unassigned by the user. There is no need to worry about counters being reset during this period. Additionally, the user can specify the type of memory transactions (e.g., reads, writes) for the counter to track. Without ABMC enabled, monitoring will work in current mode without assignment option. The Linux resctrl subsystem provides an interface that allows monitoring of up to two memory bandwidth events per group, selected from a combination of available total and local events. When ABMC is enabled, two events will be assigned to each group by default, in line with the current interface design. Users will also have the option to configure which types of memory transactions are counted by these events. Due to the limited number of available counters (32), users may quickly exhaust the available counters. If the system runs out of assignable ABMC counters, the kernel will report an error. In such cases, users will need to unassign one or more active counters to free up counters for new assignments. resctrl will provide options to assign or unassign events through the group-specific interface file. The feature is detected via CPUID_Fn80000020_EBX_x00 bit 5: ABMC (Assignable Bandwidth Monitoring Counters). The ABMC feature details are documented in APM [1] available from [2]. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.3.3.3 Assignable Bandwidth Monitoring (ABMC). [ bp: Massage commit message, fixup enumeration due to VMSCAPE ] Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] Signed-off-by: Qinyun Tan --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/scattered.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index fdac919c8cc5..418178df7970 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -511,6 +511,7 @@ #define X86_FEATURE_TSA_SQ_NO (21*32+11) /* "" AMD CPU not vulnerable to TSA-SQ */ #define X86_FEATURE_TSA_L1_NO (21*32+12) /* "" AMD CPU not vulnerable to TSA-L1 */ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* "" Clear CPU buffers using VERW before VMRUN */ +#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index e1d8419f6e08..6534c1f9a90c 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -50,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, -- Gitee From e3c00f61cb47f5a1ecb5d9782b9facd0ba508fb6 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:05 -0500 Subject: [PATCH 06/35] x86/resctrl: Add ABMC feature in the command line options ANBZ: #20932 commit bebf57bf054b561a62f3440142b2eddab2b0bbff upstream. Add a kernel command-line parameter to enable or disable the exposure of the ABMC (Assignable Bandwidth Monitoring Counters) hardware feature to resctrl. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/admin-guide/kernel-parameters.txt | 2 +- Documentation/filesystems/resctrl.rst | 1 + arch/x86/kernel/cpu/resctrl/core.c | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 99d2596e0174..ae26c0b0f4fc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5538,7 +5538,7 @@ rdt= [HW,X86,RDT] Turn on/off individual RDT features. List is: cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp, - mba, smba, bmec. + mba, smba, bmec, abmc. E.g. to turn on cmt and turn off mba use: rdt=cmt,!mba diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index 361b524f6f31..19f839841a96 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -26,6 +26,7 @@ MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local" MBA (Memory Bandwidth Allocation) "mba" SMBA (Slow Memory Bandwidth Allocation) "" BMEC (Bandwidth Monitoring Event Configuration) "" +ABMC (Assignable Bandwidth Monitoring Counters) "" =============================================== ================================ Historically, new features were made visible by default in /proc/cpuinfo. This diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 3885ec1c61ef..f321d5fa6a53 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -710,6 +710,7 @@ enum { RDT_FLAG_MBA, RDT_FLAG_SMBA, RDT_FLAG_BMEC, + RDT_FLAG_ABMC, }; #define RDT_OPT(idx, n, f) \ @@ -735,6 +736,7 @@ static struct rdt_options rdt_options[] __ro_after_init = { RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), + RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) -- Gitee From a19d616cde49fdbfd3456e81609baf80db918a3f Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:06 -0500 Subject: [PATCH 07/35] x86,fs/resctrl: Consolidate monitoring related data from rdt_resource ANBZ: #20932 commit 5ad68c8f965fed78c61f2ac7aea933f06bb50032 upstream. The cache allocation and memory bandwidth allocation feature properties are consolidated into struct resctrl_cache and struct resctrl_membw respectively. In preparation for more monitoring properties that will clobber the existing resource struct more, re-organize the monitoring specific properties to also be in a separate structure. Also convert "bandwidth sources" terminology to "memory transactions" to have consistency within resctrl for related monitoring features. [ bp: Massage commit message. ] Suggested-by: Reinette Chatre Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- arch/x86/kernel/cpu/resctrl/core.c | 4 ++-- arch/x86/kernel/cpu/resctrl/monitor.c | 10 +++++----- fs/resctrl/rdtgroup.c | 6 +++--- include/linux/resctrl.h | 18 +++++++++++++----- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index f321d5fa6a53..78f97722fa11 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -106,7 +106,7 @@ u32 resctrl_arch_system_num_rmid_idx(void) struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */ - return r->num_rmid; + return r->mon.num_rmid; } struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) @@ -540,7 +540,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) arch_mon_domain_online(r, d); - if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { + if (arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) { mon_domain_free(hw_dom); return; } diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 9376615b0d16..f6c2a2581008 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -129,7 +129,7 @@ static int logical_rmid_to_physical_rmid(int cpu, int lrmid) if (snc_nodes_per_l3_cache == 1) return lrmid; - return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->num_rmid; + return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->mon.num_rmid; } static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val) @@ -204,7 +204,7 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * continue; idx = MBM_STATE_IDX(eventid); memset(hw_dom->arch_mbm_states[idx], 0, - sizeof(*hw_dom->arch_mbm_states[0]) * r->num_rmid); + sizeof(*hw_dom->arch_mbm_states[0]) * r->mon.num_rmid); } } @@ -344,7 +344,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache; - r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache; + r->mon.num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache; hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) @@ -359,7 +359,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) * * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. */ - threshold = resctrl_rmid_realloc_limit / r->num_rmid; + threshold = resctrl_rmid_realloc_limit / r->mon.num_rmid; /* * Because num_rmid may not be a power of two, round the value @@ -373,7 +373,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) /* Detect list of bandwidth sources that can be tracked */ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx); - r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; + r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } r->mon_capable = true; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 45cc6f8b013f..50a3f961fce8 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1135,7 +1135,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of, { struct rdt_resource *r = rdt_kn_parent_priv(of->kn); - seq_printf(seq, "%d\n", r->num_rmid); + seq_printf(seq, "%d\n", r->mon.num_rmid); return 0; } @@ -1731,9 +1731,9 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) } /* Value from user cannot be more than the supported set of events */ - if ((val & r->mbm_cfg_mask) != val) { + if ((val & r->mon.mbm_cfg_mask) != val) { rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", - r->mbm_cfg_mask); + r->mon.mbm_cfg_mask); return -EINVAL; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 478d7a935ca3..fe2af6cb96d4 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -255,38 +255,46 @@ enum resctrl_schema_fmt { RESCTRL_SCHEMA_RANGE, }; +/** + * struct resctrl_mon - Monitoring related data of a resctrl resource. + * @num_rmid: Number of RMIDs available. + * @mbm_cfg_mask: Memory transactions that can be tracked when bandwidth + * monitoring events can be configured. + */ +struct resctrl_mon { + int num_rmid; + unsigned int mbm_cfg_mask; +}; + /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine - * @num_rmid: Number of RMIDs available * @ctrl_scope: Scope of this resource for control functions * @mon_scope: Scope of this resource for monitor functions * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. + * @mon: Monitoring related data. * @ctrl_domains: RCU list of all control domains for this resource * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. * @schema_fmt: Which format string and parser is used for this schema. - * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth - * monitoring events can be configured. * @cdp_capable: Is the CDP feature available on this resource */ struct rdt_resource { int rid; bool alloc_capable; bool mon_capable; - int num_rmid; enum resctrl_scope ctrl_scope; enum resctrl_scope mon_scope; struct resctrl_cache cache; struct resctrl_membw membw; + struct resctrl_mon mon; struct list_head ctrl_domains; struct list_head mon_domains; char *name; enum resctrl_schema_fmt schema_fmt; - unsigned int mbm_cfg_mask; bool cdp_capable; }; -- Gitee From 925b300b50b8b4c2d6b178aa0b5beb154a4cd8dd Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:07 -0500 Subject: [PATCH 08/35] x86,fs/resctrl: Detect Assignable Bandwidth Monitoring feature details ANBZ: #20932 commit 13390861b426e936db20d675804a5b405622bc79 upstream. ABMC feature details are reported via CPUID Fn8000_0020_EBX_x5. Bits Description 15:0 MAX_ABMC Maximum Supported Assignable Bandwidth Monitoring Counter ID + 1 The ABMC feature details are documented in APM [1] available from [2]. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.3.3.3 Assignable Bandwidth Monitoring (ABMC). Detect the feature and number of assignable counters supported. For backward compatibility, upon detecting the assignable counter feature, enable the mbm_total_bytes and mbm_local_bytes events that users are familiar with as part of original L3 MBM support. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] Signed-off-by: Qinyun Tan --- arch/x86/kernel/cpu/resctrl/core.c | 7 +++++-- arch/x86/kernel/cpu/resctrl/monitor.c | 11 ++++++++--- fs/resctrl/monitor.c | 7 +++++++ include/linux/resctrl.h | 4 ++++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 78f97722fa11..9edf803964cd 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -883,6 +883,8 @@ static __init bool get_rdt_mon_resources(void) resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); ret = true; } + if (rdt_cpu_has(X86_FEATURE_ABMC)) + ret = true; if (!ret) return false; @@ -979,7 +981,7 @@ static enum cpuhp_state rdt_online; /* Runs once on the BSP during boot. */ void resctrl_cpu_detect(struct cpuinfo_x86 *c) { - if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { + if (!cpu_has(c, X86_FEATURE_CQM_LLC) && !cpu_has(c, X86_FEATURE_ABMC)) { c->x86_cache_max_rmid = -1; c->x86_cache_occ_scale = -1; c->x86_cache_mbm_width_offset = -1; @@ -991,7 +993,8 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || - cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { + cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL) || + cpu_has(c, X86_FEATURE_ABMC)) { u32 eax, ebx, ecx, edx; /* QoS sub-leaf, EAX=0Fh, ECX=1 */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index f6c2a2581008..82fce4738e49 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -339,6 +339,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); unsigned int threshold; + u32 eax, ebx, ecx, edx; snc_nodes_per_l3_cache = snc_get_config(); @@ -368,14 +369,18 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) */ resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold); - if (rdt_cpu_has(X86_FEATURE_BMEC)) { - u32 eax, ebx, ecx, edx; - + if (rdt_cpu_has(X86_FEATURE_BMEC) || rdt_cpu_has(X86_FEATURE_ABMC)) { /* Detect list of bandwidth sources that can be tracked */ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx); r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } + if (rdt_cpu_has(X86_FEATURE_ABMC)) { + r->mon.mbm_cntr_assignable = true; + cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx); + r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1; + } + r->mon_capable = true; return 0; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index e0dfa5fb969e..b578451de2b5 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -922,6 +922,13 @@ int resctrl_mon_resource_init(void) else if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; + if (r->mon.mbm_cntr_assignable) { + if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); + if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); + } + return 0; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index fe2af6cb96d4..eb80cc233be4 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -260,10 +260,14 @@ enum resctrl_schema_fmt { * @num_rmid: Number of RMIDs available. * @mbm_cfg_mask: Memory transactions that can be tracked when bandwidth * monitoring events can be configured. + * @num_mbm_cntrs: Number of assignable counters. + * @mbm_cntr_assignable:Is system capable of supporting counter assignment? */ struct resctrl_mon { int num_rmid; unsigned int mbm_cfg_mask; + int num_mbm_cntrs; + bool mbm_cntr_assignable; }; /** -- Gitee From b3924b767441dfab05007e50c97b856444a53663 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:08 -0500 Subject: [PATCH 09/35] x86/resctrl: Add support to enable/disable AMD ABMC feature ANBZ: #20932 commit faebbc58cde9d8f6050ac152c34c88195ed4abaa upstream. Add the functionality to enable/disable the AMD ABMC feature. The AMD ABMC feature is enabled by setting enabled bit(0) in the L3_QOS_EXT_CFG MSR. When the state of ABMC is changed, the MSR needs to be updated on all the logical processors in the QOS Domain. Hardware counters will reset when ABMC state is changed. [ bp: Massage commit message. ] Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] Signed-off-by: Qinyun Tan --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/resctrl/internal.h | 5 +++ arch/x86/kernel/cpu/resctrl/monitor.c | 45 ++++++++++++++++++++++++++ include/linux/resctrl.h | 20 ++++++++++++ 4 files changed, 71 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5afb07c04669..d23511aa22c5 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1194,6 +1194,7 @@ /* - AMD: */ #define MSR_IA32_MBA_BW_BASE 0xc0000200 #define MSR_IA32_SMBA_BW_BASE 0xc0000280 +#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff #define MSR_IA32_EVT_CFG_BASE 0xc0000400 /* MSR_IA32_VMX_MISC bits */ diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 30d9cac5ec72..71a9cc0307b9 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -40,6 +40,9 @@ struct arch_mbm_state { u64 prev_msr; }; +/* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */ +#define ABMC_ENABLE_BIT 0 + /** * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share * a resource for a control function @@ -105,6 +108,7 @@ struct msr_param { * @mon_scale: cqm counter * mon_scale = occupancy in bytes * @mbm_width: Monitor width, to detect and correct for overflow. * @cdp_enabled: CDP state of this resource + * @mbm_cntr_assign_enabled: ABMC feature is enabled * * Members of this structure are either private to the architecture * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. @@ -118,6 +122,7 @@ struct rdt_hw_resource { unsigned int mon_scale; unsigned int mbm_width; bool cdp_enabled; + bool mbm_cntr_assign_enabled; }; static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 82fce4738e49..c46cbf7cec26 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -399,3 +399,48 @@ void __init intel_rdt_mbm_apply_quirk(void) mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold; mbm_cf = mbm_cf_table[cf_index].cf; } + +static void resctrl_abmc_set_one_amd(void *arg) +{ + bool *enable = arg; + + if (*enable) + msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT); + else + msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT); +} + +/* + * ABMC enable/disable requires update of L3_QOS_EXT_CFG MSR on all the CPUs + * associated with all monitor domains. + */ +static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable) +{ + struct rdt_mon_domain *d; + + lockdep_assert_cpus_held(); + + list_for_each_entry(d, &r->mon_domains, hdr.list) { + on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_abmc_set_one_amd, + &enable, 1); + resctrl_arch_reset_rmid_all(r, d); + } +} + +int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + if (r->mon.mbm_cntr_assignable && + hw_res->mbm_cntr_assign_enabled != enable) { + _resctrl_abmc_enable(r, enable); + hw_res->mbm_cntr_assign_enabled = enable; + } + + return 0; +} + +bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r) +{ + return resctrl_to_arch_res(r)->mbm_cntr_assign_enabled; +} diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index eb80cc233be4..919806122c50 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -445,6 +445,26 @@ static inline u32 resctrl_get_config_index(u32 closid, bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l); int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); +/** + * resctrl_arch_mbm_cntr_assign_enabled() - Check if MBM counter assignment + * mode is enabled. + * @r: Pointer to the resource structure. + * + * Return: + * true if the assignment mode is enabled, false otherwise. + */ +bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r); + +/** + * resctrl_arch_mbm_cntr_assign_set() - Configure the MBM counter assignment mode. + * @r: Pointer to the resource structure. + * @enable: Set to true to enable, false to disable the assignment mode. + * + * Return: + * 0 on success, < 0 on error. + */ +int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable); + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. -- Gitee From 5f0091f84bf9d2836da70b5ff30737ebd67babd5 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:09 -0500 Subject: [PATCH 10/35] fs/resctrl: Introduce the interface to display monitoring modes ANBZ: #20932 commit 3b497c3f4f0427d940ec5c8600e840c8adc5cfbf upstream. Introduce the resctrl file "mbm_assign_mode" to list the supported counter assignment modes. The "mbm_event" counter assignment mode allows users to assign a hardware counter to an RMID, event pair and monitor bandwidth usage as long as it is assigned. The hardware continues to track the assigned counter until it is explicitly unassigned by the user. Each event within a resctrl group can be assigned independently in this mode. On AMD systems "mbm_event" mode is backed by the ABMC (Assignable Bandwidth Monitoring Counters) hardware feature and is enabled by default. The "default" mode is the existing mode that works without the explicit counter assignment, instead relying on dynamic counter assignment by hardware that may result in hardware not dedicating a counter resulting in monitoring data reads returning "Unavailable". Provide an interface to display the monitor modes on the system. $ cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode [mbm_event] default Add IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED) check to support Arm64. On x86, CONFIG_RESCTRL_ASSIGN_FIXED is not defined. On Arm64, it will be defined when the "mbm_event" mode is supported. Add IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED) check early to ensure the user interface remains compatible with upcoming Arm64 support. IS_ENABLED() safely evaluates to 0 when the configuration is not defined. As a result, for MPAM, the display would be either: [default] or [mbm_event] Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/filesystems/resctrl.rst | 31 +++++++++++++++++++++++++++ fs/resctrl/internal.h | 4 ++++ fs/resctrl/monitor.c | 30 ++++++++++++++++++++++++++ fs/resctrl/rdtgroup.c | 9 +++++++- 4 files changed, 73 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index 19f839841a96..bbf2d0fb8f06 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -257,6 +257,37 @@ with the following files: # cat /sys/fs/resctrl/info/L3_MON/mbm_local_bytes_config 0=0x30;1=0x30;3=0x15;4=0x15 +"mbm_assign_mode": + The supported counter assignment modes. The enclosed brackets indicate which mode + is enabled. + :: + + # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + [mbm_event] + default + + "mbm_event": + + mbm_event mode allows users to assign a hardware counter to an RMID, event + pair and monitor the bandwidth usage as long as it is assigned. The hardware + continues to track the assigned counter until it is explicitly unassigned by + the user. Each event within a resctrl group can be assigned independently. + + In this mode, a monitoring event can only accumulate data while it is backed + by a hardware counter. Use "mbm_L3_assignments" found in each CTRL_MON and MON + group to specify which of the events should have a counter assigned. The number + of counters available is described in the "num_mbm_cntrs" file. Changing the + mode may cause all counters on the resource to reset. + + "default": + + In default mode, resctrl assumes there is a hardware counter for each + event within every CTRL_MON and MON group. On AMD platforms, it is + recommended to use the mbm_event mode, if supported, to prevent reset of MBM + events between reads resulting from hardware re-allocating counters. This can + result in misleading values or display "Unavailable" if no counter is assigned + to the event. + "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 7a57366d1abc..78aeb7ea38af 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -382,6 +382,10 @@ bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); +void *rdt_kn_parent_priv(struct kernfs_node *kn); + +int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, struct seq_file *s, void *v); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index b578451de2b5..96231d517e71 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -882,6 +882,36 @@ bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid) mon_event_all[eventid].enabled; } +int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + bool enabled; + + mutex_lock(&rdtgroup_mutex); + enabled = resctrl_arch_mbm_cntr_assign_enabled(r); + + if (r->mon.mbm_cntr_assignable) { + if (enabled) + seq_puts(s, "[mbm_event]\n"); + else + seq_puts(s, "[default]\n"); + + if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) { + if (enabled) + seq_puts(s, "default\n"); + else + seq_puts(s, "mbm_event\n"); + } + } else { + seq_puts(s, "[default]\n"); + } + + mutex_unlock(&rdtgroup_mutex); + + return 0; +} + /** * resctrl_mon_resource_init() - Initialise global monitoring structures. * diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 50a3f961fce8..c7e12059a86c 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -975,7 +975,7 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of, return 0; } -static void *rdt_kn_parent_priv(struct kernfs_node *kn) +void *rdt_kn_parent_priv(struct kernfs_node *kn) { /* * The parent pointer is only valid within RCU section since it can be @@ -1911,6 +1911,13 @@ static struct rftype res_common_files[] = { .seq_show = mbm_local_bytes_config_show, .write = mbm_local_bytes_config_write, }, + { + .name = "mbm_assign_mode", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_mbm_assign_mode_show, + .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE, + }, { .name = "cpus", .mode = 0644, -- Gitee From ef4261baae39ca70e40c4391c1cf46325bbd0e56 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:10 -0500 Subject: [PATCH 11/35] fs/resctrl: Add resctrl file to display number of assignable counters ANBZ: #20932 commit 8c793336eaf8893a29626155d74615fe9f03e7f2 upstream. The "mbm_event" counter assignment mode allows users to assign a hardware counter to an RMID, event pair and monitor bandwidth usage as long as it is assigned. The hardware continues to track the assigned counter until it is explicitly unassigned by the user. Create 'num_mbm_cntrs' resctrl file that displays the number of counters supported in each domain. 'num_mbm_cntrs' is only visible to user space when the system supports "mbm_event" mode. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/filesystems/resctrl.rst | 11 +++++++++++ fs/resctrl/internal.h | 2 ++ fs/resctrl/monitor.c | 26 ++++++++++++++++++++++++++ fs/resctrl/rdtgroup.c | 6 ++++++ 4 files changed, 45 insertions(+) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index bbf2d0fb8f06..1d592e91e34b 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -288,6 +288,17 @@ with the following files: result in misleading values or display "Unavailable" if no counter is assigned to the event. +"num_mbm_cntrs": + The maximum number of counters (total of available and assigned counters) in + each domain when the system supports mbm_event mode. + + For example, on a system with maximum of 32 memory bandwidth monitoring + counters in each of its L3 domains: + :: + + # cat /sys/fs/resctrl/info/L3_MON/num_mbm_cntrs + 0=32;1=32 + "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 78aeb7ea38af..7a12187eced8 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -386,6 +386,8 @@ void *rdt_kn_parent_priv(struct kernfs_node *kn); int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, struct seq_file *s, void *v); +int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, void *v); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 96231d517e71..112979e9c444 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -912,6 +912,30 @@ int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, return 0; } +int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + struct rdt_mon_domain *dom; + bool sep = false; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + list_for_each_entry(dom, &r->mon_domains, hdr.list) { + if (sep) + seq_putc(s, ';'); + + seq_printf(s, "%d=%d", dom->hdr.id, r->mon.num_mbm_cntrs); + sep = true; + } + seq_putc(s, '\n'); + + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + return 0; +} + /** * resctrl_mon_resource_init() - Initialise global monitoring structures. * @@ -957,6 +981,8 @@ int resctrl_mon_resource_init(void) resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); + resctrl_file_fflags_init("num_mbm_cntrs", + RFTYPE_MON_INFO | RFTYPE_RES_CACHE); } return 0; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index c7e12059a86c..8fb6ec661d94 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1836,6 +1836,12 @@ static struct rftype res_common_files[] = { .seq_show = rdt_default_ctrl_show, .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, }, + { + .name = "num_mbm_cntrs", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_num_mbm_cntrs_show, + }, { .name = "min_cbm_bits", .mode = 0444, -- Gitee From e634615fcbb6d751520870b1839752879038eee4 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:11 -0500 Subject: [PATCH 12/35] fs/resctrl: Introduce mbm_cntr_cfg to track assignable counters per domain ANBZ: #20932 commit 4d32c24a74f2c12ff440d381ba01de574f6631ce upstream. The "mbm_event" counter assignment mode allows users to assign a hardware counter to an RMID, event pair and monitor bandwidth usage as long as it is assigned. The hardware continues to track the assigned counter until it is explicitly unassigned by the user. Counters are assigned/unassigned at monitoring domain level. Manage a monitoring domain's hardware counters using a per monitoring domain array of struct mbm_cntr_cfg that is indexed by the hardware counter ID. A hardware counter's configuration contains the MBM event ID and points to the monitoring group that it is assigned to, with a NULL pointer meaning that the hardware counter is available for assignment. There is no direct way to determine which hardware counters are assigned to a particular monitoring group. Check every entry of every hardware counter configuration array in every monitoring domain to query which MBM events of a monitoring group is tracked by hardware. Such queries are acceptable because of a very small number of assignable counters (32 to 64). Suggested-by: Peter Newman Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- fs/resctrl/rdtgroup.c | 8 ++++++++ include/linux/resctrl.h | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 8fb6ec661d94..3af905ffd485 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -4030,6 +4030,7 @@ static void domain_destroy_mon_state(struct rdt_mon_domain *d) { int idx; + kfree(d->cntr_cfg); bitmap_free(d->rmid_busy_llc); for_each_mbm_idx(idx) { kfree(d->mbm_states[idx]); @@ -4113,6 +4114,13 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain goto cleanup; } + if (resctrl_is_mbm_enabled() && r->mon.mbm_cntr_assignable) { + tsize = sizeof(*d->cntr_cfg); + d->cntr_cfg = kcalloc(r->mon.num_mbm_cntrs, tsize, GFP_KERNEL); + if (!d->cntr_cfg) + goto cleanup; + } + return 0; cleanup: bitmap_free(d->rmid_busy_llc); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 919806122c50..e013caba6641 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -156,6 +156,18 @@ struct rdt_ctrl_domain { u32 *mbps_val; }; +/** + * struct mbm_cntr_cfg - Assignable counter configuration. + * @evtid: MBM event to which the counter is assigned. Only valid + * if @rdtgroup is not NULL. + * @rdtgrp: resctrl group assigned to the counter. NULL if the + * counter is free. + */ +struct mbm_cntr_cfg { + enum resctrl_event_id evtid; + struct rdtgroup *rdtgrp; +}; + /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types @@ -168,6 +180,8 @@ struct rdt_ctrl_domain { * @cqm_limbo: worker to periodically read CQM h/w counters * @mbm_work_cpu: worker CPU for MBM h/w counters * @cqm_work_cpu: worker CPU for CQM h/w counters + * @cntr_cfg: array of assignable counters' configuration (indexed + * by counter ID) */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; @@ -178,6 +192,7 @@ struct rdt_mon_domain { struct delayed_work cqm_limbo; int mbm_work_cpu; int cqm_work_cpu; + struct mbm_cntr_cfg *cntr_cfg; }; /** -- Gitee From b6c9119fb99ca633e3ee89f0087c8523b35cd525 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:12 -0500 Subject: [PATCH 13/35] fs/resctrl: Introduce interface to display number of free MBM counters ANBZ: #20932 commit 16ff6b038fb3b64aa033efdc95d673239610e1a6 upstream. Introduce the "available_mbm_cntrs" resctrl file to display the number of counters available for assignment in each domain when "mbm_event" mode is enabled. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/filesystems/resctrl.rst | 11 +++++++ fs/resctrl/internal.h | 3 ++ fs/resctrl/monitor.c | 44 +++++++++++++++++++++++++++ fs/resctrl/rdtgroup.c | 6 ++++ 4 files changed, 64 insertions(+) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index 1d592e91e34b..5a32ed287ad4 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -299,6 +299,17 @@ with the following files: # cat /sys/fs/resctrl/info/L3_MON/num_mbm_cntrs 0=32;1=32 +"available_mbm_cntrs": + The number of counters available for assignment in each domain when mbm_event + mode is enabled on the system. + + For example, on a system with 30 available [hardware] assignable counters + in each of its L3 domains: + :: + + # cat /sys/fs/resctrl/info/L3_MON/available_mbm_cntrs + 0=30;1=30 + "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 7a12187eced8..4f372e80bf37 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -388,6 +388,9 @@ int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, struct seq_file *s int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, void *v); +int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, + void *v); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 112979e9c444..1fa82a62b2e5 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -936,6 +936,48 @@ int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, return 0; } +int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + struct rdt_mon_domain *dom; + bool sep = false; + u32 cntrs, i; + int ret = 0; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + list_for_each_entry(dom, &r->mon_domains, hdr.list) { + if (sep) + seq_putc(s, ';'); + + cntrs = 0; + for (i = 0; i < r->mon.num_mbm_cntrs; i++) { + if (!dom->cntr_cfg[i].rdtgrp) + cntrs++; + } + + seq_printf(s, "%d=%u", dom->hdr.id, cntrs); + sep = true; + } + seq_putc(s, '\n'); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return ret; +} + /** * resctrl_mon_resource_init() - Initialise global monitoring structures. * @@ -983,6 +1025,8 @@ int resctrl_mon_resource_init(void) resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); resctrl_file_fflags_init("num_mbm_cntrs", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + resctrl_file_fflags_init("available_mbm_cntrs", + RFTYPE_MON_INFO | RFTYPE_RES_CACHE); } return 0; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 3af905ffd485..34a95f6f622d 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1822,6 +1822,12 @@ static struct rftype res_common_files[] = { .seq_show = rdt_mon_features_show, .fflags = RFTYPE_MON_INFO, }, + { + .name = "available_mbm_cntrs", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_available_mbm_cntrs_show, + }, { .name = "num_rmids", .mode = 0444, -- Gitee From db37e219df420af649641d5abf898242246acb12 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:13 -0500 Subject: [PATCH 14/35] x86/resctrl: Add data structures and definitions for ABMC assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #20932 commit 84ecefb766748916099f5b7444a973a623611d63 upstream. The ABMC feature allows users to assign a hardware counter to an RMID, event pair and monitor bandwidth usage as long as it is assigned. The hardware continues to track the assigned counter until it is explicitly unassigned by the user. The ABMC feature implements an MSR L3_QOS_ABMC_CFG (C000_03FDh). ABMC counter assignment is done by setting the counter id, bandwidth source (RMID) and bandwidth configuration. Attempts to read or write the MSR when ABMC is not enabled will result in a #GP(0) exception. Introduce the data structures and definitions for MSR L3_QOS_ABMC_CFG (0xC000_03FDh): ========================================================================= Bits Mnemonic Description Access Reset Type Value ========================================================================= 63 CfgEn Configuration Enable R/W 0 62 CtrEn Enable/disable counting R/W 0 61:53 – Reserved MBZ 0 52:48 CtrID Counter Identifier R/W 0 47 IsCOS BwSrc field is a CLOSID R/W 0 (not an RMID) 46:44 – Reserved MBZ 0 43:32 BwSrc Bandwidth Source R/W 0 (RMID or CLOSID) 31:0 BwType Bandwidth configuration R/W 0 tracked by the CtrID ========================================================================== The ABMC feature details are documented in APM [1] available from [2]. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.3.3.3 Assignable Bandwidth Monitoring (ABMC). [ bp: Touchups. ] Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] Signed-off-by: Qinyun Tan --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/resctrl/internal.h | 36 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d23511aa22c5..45ce1175cfbe 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1194,6 +1194,7 @@ /* - AMD: */ #define MSR_IA32_MBA_BW_BASE 0xc0000200 #define MSR_IA32_SMBA_BW_BASE 0xc0000280 +#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd #define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff #define MSR_IA32_EVT_CFG_BASE 0xc0000400 diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 71a9cc0307b9..63166be49ea6 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -167,6 +167,42 @@ union cpuid_0x10_x_edx { unsigned int full; }; +/* + * ABMC counters are configured by writing to MSR_IA32_L3_QOS_ABMC_CFG. + * + * @bw_type : Event configuration that represents the memory + * transactions being tracked by the @cntr_id. + * @bw_src : Bandwidth source (RMID or CLOSID). + * @reserved1 : Reserved. + * @is_clos : @bw_src field is a CLOSID (not an RMID). + * @cntr_id : Counter identifier. + * @reserved : Reserved. + * @cntr_en : Counting enable bit. + * @cfg_en : Configuration enable bit. + * + * Configuration and counting: + * Counter can be configured across multiple writes to MSR. Configuration + * is applied only when @cfg_en = 1. Counter @cntr_id is reset when the + * configuration is applied. + * @cfg_en = 1, @cntr_en = 0 : Apply @cntr_id configuration but do not + * count events. + * @cfg_en = 1, @cntr_en = 1 : Apply @cntr_id configuration and start + * counting events. + */ +union l3_qos_abmc_cfg { + struct { + unsigned long bw_type :32, + bw_src :12, + reserved1: 3, + is_clos : 1, + cntr_id : 5, + reserved : 9, + cntr_en : 1, + cfg_en : 1; + } split; + unsigned long full; +}; + void rdt_ctrl_update(void *arg); int rdt_get_mon_l3_config(struct rdt_resource *r); -- Gitee From 9a569b37515c694b0bdbbefb2b2b992b271f8135 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:14 -0500 Subject: [PATCH 15/35] fs/resctrl: Introduce event configuration field in struct mon_evt ANBZ: #20932 commit ebebda853633de389ba2c6737f8ca38405713e90 upstream. When supported, mbm_event counter assignment mode allows the user to configure events to track specific types of memory transactions. Introduce an evt_cfg field in struct mon_evt to define the type of memory transactions tracked by a monitoring event. Also add a helper function to get the evt_cfg value. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- fs/resctrl/internal.h | 5 +++++ fs/resctrl/monitor.c | 10 ++++++++++ include/linux/resctrl.h | 2 ++ 3 files changed, 17 insertions(+) diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 4f372e80bf37..1cddfff007a2 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -56,6 +56,10 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) * @evtid: event id * @rid: resource id for this event * @name: name of the event + * @evt_cfg: Event configuration value that represents the + * memory transactions (e.g., READS_TO_LOCAL_MEM, + * READS_TO_REMOTE_MEM) being tracked by @evtid. + * Only valid if @evtid is an MBM event. * @configurable: true if the event is configurable * @enabled: true if the event is enabled */ @@ -63,6 +67,7 @@ struct mon_evt { enum resctrl_event_id evtid; enum resctrl_res_level rid; char *name; + u32 evt_cfg; bool configurable; bool enabled; }; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 1fa82a62b2e5..f714e7baaea6 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -882,6 +882,11 @@ bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid) mon_event_all[eventid].enabled; } +u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid) +{ + return mon_event_all[evtid].evt_cfg; +} + int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { @@ -1023,6 +1028,11 @@ int resctrl_mon_resource_init(void) resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); + mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; + mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & + (READS_TO_LOCAL_MEM | + READS_TO_LOCAL_S_MEM | + NON_TEMP_WRITE_TO_LOCAL_MEM); resctrl_file_fflags_init("num_mbm_cntrs", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); resctrl_file_fflags_init("available_mbm_cntrs", diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index e013caba6641..87daa4ca312d 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -409,6 +409,8 @@ static inline bool resctrl_is_mbm_event(enum resctrl_event_id eventid) eventid <= QOS_L3_MBM_LOCAL_EVENT_ID); } +u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id eventid); + /* Iterate over all memory bandwidth events */ #define for_each_mbm_event_id(eventid) \ for (eventid = QOS_L3_MBM_TOTAL_EVENT_ID; \ -- Gitee From bbf51f0fda41cb9b2a7f07f64daf89111f39e41d Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:15 -0500 Subject: [PATCH 16/35] x86,fs/resctrl: Implement resctrl_arch_config_cntr() to assign a counter with ABMC ANBZ: #20932 commit f7a4fb22312646329ba21bc58958fd83fb9fc15d upstream. The ABMC feature allows users to assign a hardware counter to an RMID, event pair and monitor bandwidth usage as long as it is assigned. The hardware continues to track the assigned counter until it is explicitly unassigned by the user. Implement an x86 architecture-specific handler to configure a counter. This architecture specific handler is called by resctrl fs when a counter is assigned or unassigned as well as when an already assigned counter's configuration should be updated. Configure counters by writing to the L3_QOS_ABMC_CFG MSR, specifying the counter ID, bandwidth source (RMID), and event configuration. The ABMC feature details are documented in APM [1] available from [2]. [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.3.3.3 Assignable Bandwidth Monitoring (ABMC). Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] Signed-off-by: Qinyun Tan --- arch/x86/kernel/cpu/resctrl/monitor.c | 36 +++++++++++++++++++++++++++ include/linux/resctrl.h | 19 ++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c46cbf7cec26..857e9075c455 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -444,3 +444,39 @@ bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r) { return resctrl_to_arch_res(r)->mbm_cntr_assign_enabled; } + +static void resctrl_abmc_config_one_amd(void *info) +{ + union l3_qos_abmc_cfg *abmc_cfg = info; + + wrmsrl(MSR_IA32_L3_QOS_ABMC_CFG, abmc_cfg->full); +} + +/* + * Send an IPI to the domain to assign the counter to RMID, event pair. + */ +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + enum resctrl_event_id evtid, u32 rmid, u32 closid, + u32 cntr_id, bool assign) +{ + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + union l3_qos_abmc_cfg abmc_cfg = { 0 }; + struct arch_mbm_state *am; + + abmc_cfg.split.cfg_en = 1; + abmc_cfg.split.cntr_en = assign ? 1 : 0; + abmc_cfg.split.cntr_id = cntr_id; + abmc_cfg.split.bw_src = rmid; + if (assign) + abmc_cfg.split.bw_type = resctrl_get_mon_evt_cfg(evtid); + + smp_call_function_any(&d->hdr.cpu_mask, resctrl_abmc_config_one_amd, &abmc_cfg, 1); + + /* + * The hardware counter is reset (because cfg_en == 1) so there is no + * need to record initial non-zero counts. + */ + am = get_arch_mbm_state(hw_dom, rmid, evtid); + if (am) + memset(am, 0, sizeof(*am)); +} diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 87daa4ca312d..50e38445183a 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -594,6 +594,25 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * */ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); +/** + * resctrl_arch_config_cntr() - Configure the counter with its new RMID + * and event details. + * @r: Resource structure. + * @d: The domain in which counter with ID @cntr_id should be configured. + * @evtid: Monitoring event type (e.g., QOS_L3_MBM_TOTAL_EVENT_ID + * or QOS_L3_MBM_LOCAL_EVENT_ID). + * @rmid: RMID. + * @closid: CLOSID. + * @cntr_id: Counter ID to configure. + * @assign: True to assign the counter or update an existing assignment, + * false to unassign the counter. + * + * This can be called from any CPU. + */ +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + enum resctrl_event_id evtid, u32 rmid, u32 closid, + u32 cntr_id, bool assign); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -- Gitee From a5dce010d0f6f547ce52c6a2088756d13e42b6bf Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:16 -0500 Subject: [PATCH 17/35] fs/resctrl: Add the functionality to assign MBM events ANBZ: #20932 commit bd85310efd71b9e7809e1b95fe7a60fde42e62db upstream. When supported, "mbm_event" counter assignment mode offers "num_mbm_cntrs" number of counters that can be assigned to RMID, event pairs and monitor bandwidth usage as long as it is assigned. Add the functionality to allocate and assign a counter to an RMID, event pair in the domain. Also, add the helper rdtgroup_assign_cntrs() to assign counters in the group. Log the error message "Failed to allocate counter for in domain " in /sys/fs/resctrl/info/last_cmd_status if all the counters are in use. Exit on the first failure when assigning counters across all the domains. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- fs/resctrl/internal.h | 2 + fs/resctrl/monitor.c | 156 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 1cddfff007a2..762705d7eb8d 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -396,6 +396,8 @@ int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, void *v); +void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index f714e7baaea6..106e9bdb8a9d 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -356,6 +356,55 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid, return state ? &state[idx] : NULL; } +/* + * mbm_cntr_get() - Return the counter ID for the matching @evtid and @rdtgrp. + * + * Return: + * Valid counter ID on success, or -ENOENT on failure. + */ +static int mbm_cntr_get(struct rdt_resource *r, struct rdt_mon_domain *d, + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) +{ + int cntr_id; + + if (!r->mon.mbm_cntr_assignable) + return -ENOENT; + + if (!resctrl_is_mbm_event(evtid)) + return -ENOENT; + + for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) { + if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp && + d->cntr_cfg[cntr_id].evtid == evtid) + return cntr_id; + } + + return -ENOENT; +} + +/* + * mbm_cntr_alloc() - Initialize and return a new counter ID in the domain @d. + * Caller must ensure that the specified event is not assigned already. + * + * Return: + * Valid counter ID on success, or -ENOSPC on failure. + */ +static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d, + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) +{ + int cntr_id; + + for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) { + if (!d->cntr_cfg[cntr_id].rdtgrp) { + d->cntr_cfg[cntr_id].rdtgrp = rdtgrp; + d->cntr_cfg[cntr_id].evtid = evtid; + return cntr_id; + } + } + + return -ENOSPC; +} + static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { int cpu = smp_processor_id(); @@ -887,6 +936,113 @@ u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid) return mon_event_all[evtid].evt_cfg; } +/* + * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID + * pair in the domain. + * + * Assign the counter if @assign is true else unassign the counter. Reset the + * associated non-architectural state. + */ +static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + enum resctrl_event_id evtid, u32 rmid, u32 closid, + u32 cntr_id, bool assign) +{ + struct mbm_state *m; + + resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign); + + m = get_mbm_state(d, closid, rmid, evtid); + if (m) + memset(m, 0, sizeof(*m)); +} + +/* + * rdtgroup_alloc_assign_cntr() - Allocate a counter ID and assign it to the event + * pointed to by @mevt and the resctrl group @rdtgrp within the domain @d. + * + * Return: + * 0 on success, < 0 on failure. + */ +static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + struct rdtgroup *rdtgrp, struct mon_evt *mevt) +{ + int cntr_id; + + /* No action required if the counter is assigned already. */ + cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid); + if (cntr_id >= 0) + return 0; + + cntr_id = mbm_cntr_alloc(r, d, rdtgrp, mevt->evtid); + if (cntr_id < 0) { + rdt_last_cmd_printf("Failed to allocate counter for %s in domain %d\n", + mevt->name, d->hdr.id); + return cntr_id; + } + + rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, true); + + return 0; +} + +/* + * rdtgroup_assign_cntr_event() - Assign a hardware counter for the event in + * @mevt to the resctrl group @rdtgrp. Assign counters to all domains if @d is + * NULL; otherwise, assign the counter to the specified domain @d. + * + * If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr() + * will fail. The assignment process will abort at the first failure encountered + * during domain traversal, which may result in the event being only partially + * assigned. + * + * Return: + * 0 on success, < 0 on failure. + */ +static int rdtgroup_assign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, + struct mon_evt *mevt) +{ + struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid); + int ret = 0; + + if (!d) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { + ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt); + if (ret) + return ret; + } + } else { + ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt); + } + + return ret; +} + +/* + * rdtgroup_assign_cntrs() - Assign counters to MBM events. Called when + * a new group is created. + * + * Each group can accommodate two counters per domain: one for the total + * event and one for the local event. Assignments may fail due to the limited + * number of counters. However, it is not necessary to fail the group creation + * and thus no failure is returned. Users have the option to modify the + * counter assignments after the group has been created. + */ +void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + + if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r)) + return; + + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + rdtgroup_assign_cntr_event(NULL, rdtgrp, + &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]); + + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + rdtgroup_assign_cntr_event(NULL, rdtgrp, + &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]); +} + int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { -- Gitee From 5e865f3b1d3466ce4645de7f3537a478c6cc81e4 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:17 -0500 Subject: [PATCH 18/35] fs/resctrl: Add the functionality to unassign MBM events ANBZ: #20932 commit aab2c5088cdb26e80d51ffbe72d24ab23fa1533e upstream. The "mbm_event" counter assignment mode offers "num_mbm_cntrs" number of counters that can be assigned to RMID, event pairs and monitor bandwidth usage as long as it is assigned. If all the counters are in use, the kernel logs the error message "Failed to allocate counter for in domain " in /sys/fs/resctrl/info/last_cmd_status when a new assignment is requested. To make space for a new assignment, users must unassign an already assigned counter and retry the assignment again. Add the functionality to unassign and free the counters in the domain. Also, add the helper rdtgroup_unassign_cntrs() to unassign counters in the group. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- fs/resctrl/internal.h | 2 ++ fs/resctrl/monitor.c | 66 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 762705d7eb8d..c6b66d4a6a37 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -398,6 +398,8 @@ int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_fil void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp); +void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 106e9bdb8a9d..2ed29ae831a4 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -405,6 +405,14 @@ static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d, return -ENOSPC; } +/* + * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d. + */ +static void mbm_cntr_free(struct rdt_mon_domain *d, int cntr_id) +{ + memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg)); +} + static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { int cpu = smp_processor_id(); @@ -1043,6 +1051,64 @@ void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp) &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]); } +/* + * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration + * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp. + */ +static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + struct rdtgroup *rdtgrp, struct mon_evt *mevt) +{ + int cntr_id; + + cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid); + + /* If there is no cntr_id assigned, nothing to do */ + if (cntr_id < 0) + return; + + rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, false); + + mbm_cntr_free(d, cntr_id); +} + +/* + * rdtgroup_unassign_cntr_event() - Unassign a hardware counter associated with + * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign + * the counters from all the domains if @d is NULL else unassign from @d. + */ +static void rdtgroup_unassign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, + struct mon_evt *mevt) +{ + struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid); + + if (!d) { + list_for_each_entry(d, &r->mon_domains, hdr.list) + rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt); + } else { + rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt); + } +} + +/* + * rdtgroup_unassign_cntrs() - Unassign the counters associated with MBM events. + * Called when a group is deleted. + */ +void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + + if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r)) + return; + + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + rdtgroup_unassign_cntr_event(NULL, rdtgrp, + &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]); + + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + rdtgroup_unassign_cntr_event(NULL, rdtgrp, + &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]); +} + int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { -- Gitee From 9f2c4f62542263434b4d9c60ce7f1001aafa7aed Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:18 -0500 Subject: [PATCH 19/35] fs/resctrl: Pass struct rdtgroup instead of individual members ANBZ: #20932 commit bc53eea6c2a1dea152a0073a2f2814b697ad197e upstream. Reading monitoring data for a monitoring group requires both the RMID and CLOSID. The RMID and CLOSID are members of struct rdtgroup but passed separately to several functions involved in retrieving event data. When "mbm_event" counter assignment mode is enabled, a counter ID is required to read event data. The counter ID is obtained through mbm_cntr_get(), which expects a struct rdtgroup pointer. Provide a pointer to the struct rdtgroup as parameter to functions involved in retrieving event data to simplify access to RMID, CLOSID, and counter ID. Suggested-by: Reinette Chatre Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- fs/resctrl/monitor.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 2ed29ae831a4..c815153cad07 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -413,9 +413,11 @@ static void mbm_cntr_free(struct rdt_mon_domain *d, int cntr_id) memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg)); } -static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) +static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) { int cpu = smp_processor_id(); + u32 closid = rdtgrp->closid; + u32 rmid = rdtgrp->mon.rmid; struct rdt_mon_domain *d; struct mbm_state *m; int err, ret; @@ -475,8 +477,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) /* * mbm_bw_count() - Update bw count from values previously read by * __mon_event_count(). - * @closid: The closid used to identify the cached mbm_state. - * @rmid: The rmid used to identify the cached mbm_state. + * @rdtgrp: resctrl group associated with the CLOSID and RMID to identify + * the cached mbm_state. * @rr: The struct rmid_read populated by __mon_event_count(). * * Supporting function to calculate the memory bandwidth @@ -484,9 +486,11 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) * __mon_event_count() is compared with the chunks value from the previous * invocation. This must be called once per second to maintain values in MBps. */ -static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr) +static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) { u64 cur_bw, bytes, cur_bytes; + u32 closid = rdtgrp->closid; + u32 rmid = rdtgrp->mon.rmid; struct mbm_state *m; m = get_mbm_state(rr->d, closid, rmid, rr->evtid); @@ -515,7 +519,7 @@ void mon_event_count(void *info) rdtgrp = rr->rgrp; - ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr); + ret = __mon_event_count(rdtgrp, rr); /* * For Ctrl groups read data from child monitor groups and @@ -526,8 +530,7 @@ void mon_event_count(void *info) if (rdtgrp->type == RDTCTRL_GROUP) { list_for_each_entry(entry, head, mon.crdtgrp_list) { - if (__mon_event_count(entry->closid, entry->mon.rmid, - rr) == 0) + if (__mon_event_count(entry, rr) == 0) ret = 0; } } @@ -658,7 +661,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) } static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d, - u32 closid, u32 rmid, enum resctrl_event_id evtid) + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) { struct rmid_read rr = {0}; @@ -672,30 +675,30 @@ static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain * return; } - __mon_event_count(closid, rmid, &rr); + __mon_event_count(rdtgrp, &rr); /* * If the software controller is enabled, compute the * bandwidth for this event id. */ if (is_mba_sc(NULL)) - mbm_bw_count(closid, rmid, &rr); + mbm_bw_count(rdtgrp, &rr); resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); } static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, - u32 closid, u32 rmid) + struct rdtgroup *rdtgrp) { /* * This is protected from concurrent reads from user as both * the user and overflow handler hold the global mutex. */ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) - mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID); + mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID); if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) - mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID); + mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID); } /* @@ -768,11 +771,11 @@ void mbm_handle_overflow(struct work_struct *work) d = container_of(work, struct rdt_mon_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - mbm_update(r, d, prgrp->closid, prgrp->mon.rmid); + mbm_update(r, d, prgrp); head = &prgrp->mon.crdtgrp_list; list_for_each_entry(crgrp, head, mon.crdtgrp_list) - mbm_update(r, d, crgrp->closid, crgrp->mon.rmid); + mbm_update(r, d, crgrp); if (is_mba_sc(NULL)) update_mba_bw(prgrp, d); -- Gitee From fb1a446a6d03df1474a3237105da36d4cdef85ea Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:19 -0500 Subject: [PATCH 20/35] fs/resctrl: Introduce counter ID read, reset calls in mbm_event mode ANBZ: #20932 commit 862314fd1f93d96eddb0559a807c66cb1f6ee520 upstream. When supported, "mbm_event" counter assignment mode allows users to assign a hardware counter to an RMID, event pair and monitor the bandwidth usage as long as it is assigned. The hardware continues to track the assigned counter until it is explicitly unassigned by the user. Introduce the architecture calls resctrl_arch_cntr_read() and resctrl_arch_reset_cntr() to read and reset event counters when "mbm_event" mode is supported. Function names match existing resctrl_arch_rmid_read() and resctrl_arch_reset_rmid(). Suggested-by: Reinette Chatre Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- include/linux/resctrl.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 50e38445183a..04152654827d 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -613,6 +613,44 @@ void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, enum resctrl_event_id evtid, u32 rmid, u32 closid, u32 cntr_id, bool assign); +/** + * resctrl_arch_cntr_read() - Read the event data corresponding to the counter ID + * assigned to the RMID, event pair for this resource + * and domain. + * @r: Resource that the counter should be read from. + * @d: Domain that the counter should be read from. + * @closid: CLOSID that matches the RMID. + * @rmid: The RMID to which @cntr_id is assigned. + * @cntr_id: The counter to read. + * @eventid: The MBM event to which @cntr_id is assigned. + * @val: Result of the counter read in bytes. + * + * Called on a CPU that belongs to domain @d when "mbm_event" mode is enabled. + * Called from a non-migrateable process context via smp_call_on_cpu() unless all + * CPUs are nohz_full, in which case it is called via IPI (smp_call_function_any()). + * + * Return: + * 0 on success, or -EIO, -EINVAL etc on error. + */ +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, int cntr_id, + enum resctrl_event_id eventid, u64 *val); + +/** + * resctrl_arch_reset_cntr() - Reset any private state associated with counter ID. + * @r: The domain's resource. + * @d: The counter ID's domain. + * @closid: CLOSID that matches the RMID. + * @rmid: The RMID to which @cntr_id is assigned. + * @cntr_id: The counter to reset. + * @eventid: The MBM event to which @cntr_id is assigned. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, int cntr_id, + enum resctrl_event_id eventid); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -- Gitee From 5cce7c65ec63e07a71b71d303568ca28eecf8833 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:20 -0500 Subject: [PATCH 21/35] x86/resctrl: Refactor resctrl_arch_rmid_read() ANBZ: #20932 commit 7c9ac605e202c4668e441fc8146a993577131ca1 upstream. resctrl_arch_rmid_read() adjusts the value obtained from MSR_IA32_QM_CTR to account for the overflow for MBM events and apply counter scaling for all the events. This logic is common to both reading an RMID and reading a hardware counter directly. Refactor the hardware value adjustment logic into get_corrected_val() to prepare for support of reading a hardware counter. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- arch/x86/kernel/cpu/resctrl/monitor.c | 38 ++++++++++++++++----------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 857e9075c455..51fcd99453e7 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -216,24 +216,13 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) return chunks >> shift; } -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, - u32 unused, u32 rmid, enum resctrl_event_id eventid, - u64 *val, void *ignored) +static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 rmid, enum resctrl_event_id eventid, u64 msr_val) { struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - int cpu = cpumask_any(&d->hdr.cpu_mask); struct arch_mbm_state *am; - u64 msr_val, chunks; - u32 prmid; - int ret; - - resctrl_arch_rmid_read_context_check(); - - prmid = logical_rmid_to_physical_rmid(cpu, rmid); - ret = __rmid_read_phys(prmid, eventid, &msr_val); - if (ret) - return ret; + u64 chunks; am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) { @@ -245,7 +234,26 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, chunks = msr_val; } - *val = chunks * hw_res->mon_scale; + return chunks * hw_res->mon_scale; +} + +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 unused, u32 rmid, enum resctrl_event_id eventid, + u64 *val, void *ignored) +{ + int cpu = cpumask_any(&d->hdr.cpu_mask); + u64 msr_val; + u32 prmid; + int ret; + + resctrl_arch_rmid_read_context_check(); + + prmid = logical_rmid_to_physical_rmid(cpu, rmid); + ret = __rmid_read_phys(prmid, eventid, &msr_val); + if (ret) + return ret; + + *val = get_corrected_val(r, d, rmid, eventid, msr_val); return 0; } -- Gitee From 13371c0d62ab2cf13e28796c848343007c30d832 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:21 -0500 Subject: [PATCH 22/35] x86/resctrl: Implement resctrl_arch_reset_cntr() and resctrl_arch_cntr_read() ANBZ: #20932 commit 2a65b72c1603a74f35228acbb8de2ecff9c13efe upstream. System software reads resctrl event data for a particular resource by writing the RMID and Event Identifier (EvtID) to the QM_EVTSEL register and then reading the event data from the QM_CTR register. In ABMC mode, the event data of a specific counter ID is read by setting the following fields: QM_EVTSEL.ExtendedEvtID = 1, QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting QM_EVTSEL.RMID to the desired counter ID. Reading the QM_CTR then returns the contents of the specified counter ID. RMID_VAL_ERROR bit is set if the counter configuration is invalid, or if an invalid counter ID is set in the QM_EVTSEL.RMID field. RMID_VAL_UNAVAIL bit is set if the counter data is unavailable. Introduce resctrl_arch_reset_cntr() and resctrl_arch_cntr_read() to reset and read event data for a specific counter. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- arch/x86/kernel/cpu/resctrl/internal.h | 6 +++ arch/x86/kernel/cpu/resctrl/monitor.c | 69 ++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 63166be49ea6..93eb9ccc7c53 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -43,6 +43,12 @@ struct arch_mbm_state { /* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */ #define ABMC_ENABLE_BIT 0 +/* + * Qos Event Identifiers. + */ +#define ABMC_EXTENDED_EVT_ID BIT(31) +#define ABMC_EVT_ID BIT(0) + /** * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share * a resource for a control function diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 51fcd99453e7..8749e26d5a98 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -258,6 +258,75 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, return 0; } +static int __cntr_id_read(u32 cntr_id, u64 *val) +{ + u64 msr_val; + + /* + * QM_EVTSEL Register definition: + * ======================================================= + * Bits Mnemonic Description + * ======================================================= + * 63:44 -- Reserved + * 43:32 RMID RMID or counter ID in ABMC mode + * when reading an MBM event + * 31 ExtendedEvtID Extended Event Identifier + * 30:8 -- Reserved + * 7:0 EvtID Event Identifier + * ======================================================= + * The contents of a specific counter can be read by setting the + * following fields in QM_EVTSEL.ExtendedEvtID(=1) and + * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting QM_EVTSEL.RMID + * to the desired counter ID. Reading the QM_CTR then returns the + * contents of the specified counter. The RMID_VAL_ERROR bit is set + * if the counter configuration is invalid, or if an invalid counter + * ID is set in the QM_EVTSEL.RMID field. The RMID_VAL_UNAVAIL bit + * is set if the counter data is unavailable. + */ + wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id); + rdmsrl(MSR_IA32_QM_CTR, msr_val); + + if (msr_val & RMID_VAL_ERROR) + return -EIO; + if (msr_val & RMID_VAL_UNAVAIL) + return -EINVAL; + + *val = msr_val; + return 0; +} + +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 unused, u32 rmid, int cntr_id, + enum resctrl_event_id eventid) +{ + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + struct arch_mbm_state *am; + + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) { + memset(am, 0, sizeof(*am)); + + /* Record any initial, non-zero count value. */ + __cntr_id_read(cntr_id, &am->prev_msr); + } +} + +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 unused, u32 rmid, int cntr_id, + enum resctrl_event_id eventid, u64 *val) +{ + u64 msr_val; + int ret; + + ret = __cntr_id_read(cntr_id, &msr_val); + if (ret) + return ret; + + *val = get_corrected_val(r, d, rmid, eventid, msr_val); + + return 0; +} + /* * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1 * which indicates that RMIDs are configured in legacy mode. -- Gitee From d2686501766d01a13b690fa9f4bb6c93d987711d Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:22 -0500 Subject: [PATCH 23/35] fs/resctrl: Support counter read/reset with mbm_event assignment mode ANBZ: #20932 commit 159f36cd4de7718779fd0b232de5137b4ffd2d1e upstream. When "mbm_event" counter assignment mode is enabled, the architecture requires a counter ID to read the event data. Introduce an is_mbm_cntr field in struct rmid_read to indicate whether counter assignment mode is in use. Update the logic to call resctrl_arch_cntr_read() and resctrl_arch_reset_cntr() when the assignment mode is active. Report 'Unassigned' in case the user attempts to read an event without assigning a hardware counter. Suggested-by: Reinette Chatre Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/filesystems/resctrl.rst | 6 ++++ fs/resctrl/ctrlmondata.c | 22 ++++++++++--- fs/resctrl/internal.h | 3 ++ fs/resctrl/monitor.c | 47 ++++++++++++++++++++------- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index 5a32ed287ad4..75ce882e1604 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -434,6 +434,12 @@ When monitoring is enabled all MON groups will also contain: for the L3 cache they occupy). These are named "mon_sub_L3_YY" where "YY" is the node number. + When the 'mbm_event' counter assignment mode is enabled, reading + an MBM event of a MON group returns 'Unassigned' if no hardware + counter is assigned to it. For CTRL_MON groups, 'Unassigned' is + returned if the MBM event does not have an assigned counter in the + CTRL_MON group nor in any of its associated MON groups. + "mon_hw_id": Available only with debug option. The identifier used by hardware for the monitor group. On x86 this is the RMID. diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 42b281b3852f..0d0ef54fc4de 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -563,10 +563,15 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, rr->r = r; rr->d = d; rr->first = first; - rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); - if (IS_ERR(rr->arch_mon_ctx)) { - rr->err = -EINVAL; - return; + if (resctrl_arch_mbm_cntr_assign_enabled(r) && + resctrl_is_mbm_event(evtid)) { + rr->is_mbm_cntr = true; + } else { + rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); + if (IS_ERR(rr->arch_mon_ctx)) { + rr->err = -EINVAL; + return; + } } cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU); @@ -582,7 +587,8 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, else smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); - resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx); + if (rr->arch_mon_ctx) + resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx); } int rdtgroup_mondata_show(struct seq_file *m, void *arg) @@ -653,10 +659,16 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) checkresult: + /* + * -ENOENT is a special case, set only when "mbm_event" counter assignment + * mode is enabled and no counter has been assigned. + */ if (rr.err == -EIO) seq_puts(m, "Error\n"); else if (rr.err == -EINVAL) seq_puts(m, "Unavailable\n"); + else if (rr.err == -ENOENT) + seq_puts(m, "Unassigned\n"); else seq_printf(m, "%llu\n", rr.val); diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index c6b66d4a6a37..2f1f2efe2f40 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -111,6 +111,8 @@ struct mon_data { * @evtid: Which monitor event to read. * @first: Initialize MBM counter when true. * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @is_mbm_cntr: true if "mbm_event" counter assignment mode is enabled and it + * is an MBM event. * @err: Error encountered when reading counter. * @val: Returned value of event counter. If @rgrp is a parent resource group, * @val includes the sum of event counts from its child resource groups. @@ -125,6 +127,7 @@ struct rmid_read { enum resctrl_event_id evtid; bool first; struct cacheinfo *ci; + bool is_mbm_cntr; int err; u64 val; void *arch_mon_ctx; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index c815153cad07..55327056596e 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -419,12 +419,24 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) u32 closid = rdtgrp->closid; u32 rmid = rdtgrp->mon.rmid; struct rdt_mon_domain *d; + int cntr_id = -ENOENT; struct mbm_state *m; int err, ret; u64 tval = 0; + if (rr->is_mbm_cntr) { + cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid); + if (cntr_id < 0) { + rr->err = -ENOENT; + return -EINVAL; + } + } + if (rr->first) { - resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid); + if (rr->is_mbm_cntr) + resctrl_arch_reset_cntr(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid); + else + resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid); m = get_mbm_state(rr->d, closid, rmid, rr->evtid); if (m) memset(m, 0, sizeof(struct mbm_state)); @@ -435,8 +447,12 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) /* Reading a single domain, must be on a CPU in that domain. */ if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask)) return -EINVAL; - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, - rr->evtid, &tval, rr->arch_mon_ctx); + if (rr->is_mbm_cntr) + rr->err = resctrl_arch_cntr_read(rr->r, rr->d, closid, rmid, cntr_id, + rr->evtid, &tval); + else + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); if (rr->err) return rr->err; @@ -460,8 +476,12 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { if (d->ci_id != rr->ci->id) continue; - err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, - rr->evtid, &tval, rr->arch_mon_ctx); + if (rr->is_mbm_cntr) + err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id, + rr->evtid, &tval); + else + err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); if (!err) { rr->val += tval; ret = 0; @@ -668,11 +688,15 @@ static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain * rr.r = r; rr.d = d; rr.evtid = evtid; - rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); - if (IS_ERR(rr.arch_mon_ctx)) { - pr_warn_ratelimited("Failed to allocate monitor context: %ld", - PTR_ERR(rr.arch_mon_ctx)); - return; + if (resctrl_arch_mbm_cntr_assign_enabled(r)) { + rr.is_mbm_cntr = true; + } else { + rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); + if (IS_ERR(rr.arch_mon_ctx)) { + pr_warn_ratelimited("Failed to allocate monitor context: %ld", + PTR_ERR(rr.arch_mon_ctx)); + return; + } } __mon_event_count(rdtgrp, &rr); @@ -684,7 +708,8 @@ static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain * if (is_mba_sc(NULL)) mbm_bw_count(rdtgrp, &rr); - resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); + if (rr.arch_mon_ctx) + resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); } static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, -- Gitee From 46140e3ae5d34eb07761906bbd5062931b45a6b9 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:23 -0500 Subject: [PATCH 24/35] fs/resctrl: Add event configuration directory under info/L3_MON/ ANBZ: #20932 commit ea274cbeaf8f0667267b347e3f84797439cdab4e upstream. The "mbm_event" counter assignment mode allows the user to assign a hardware counter to an RMID, event pair and monitor the bandwidth as long as it is assigned. The user can specify the memory transaction(s) for the counter to track. When this mode is supported, the /sys/fs/resctrl/info/L3_MON/event_configs directory contains a sub-directory for each MBM event that can be assigned to a counter. The MBM event sub-directory contains a file named "event_filter" that is used to view and modify which memory transactions the MBM event is configured with. Create /sys/fs/resctrl/info/L3_MON/event_configs directory on resctrl mount and pre-populate it with directories for the two existing MBM events: mbm_total_bytes and mbm_local_bytes. Create the "event_filter" file within each MBM event directory with the needed *show() that displays the memory transactions with which the MBM event is configured. Example: $ mount -t resctrl resctrl /sys/fs/resctrl $ cd /sys/fs/resctrl/ $ cat info/L3_MON/event_configs/mbm_total_bytes/event_filter local_reads,remote_reads,local_non_temporal_writes, remote_non_temporal_writes,local_reads_slow_memory, remote_reads_slow_memory,dirty_victim_writes_all $ cat info/L3_MON/event_configs/mbm_local_bytes/event_filter local_reads,local_non_temporal_writes,local_reads_slow_memory Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/filesystems/resctrl.rst | 33 +++++++++++++++ fs/resctrl/internal.h | 4 ++ fs/resctrl/monitor.c | 56 +++++++++++++++++++++++++ fs/resctrl/rdtgroup.c | 59 ++++++++++++++++++++++++++- include/linux/resctrl_types.h | 3 ++ 5 files changed, 153 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index 75ce882e1604..4e1254d4b2d5 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -310,6 +310,39 @@ with the following files: # cat /sys/fs/resctrl/info/L3_MON/available_mbm_cntrs 0=30;1=30 +"event_configs": + Directory that exists when "mbm_event" counter assignment mode is supported. + Contains a sub-directory for each MBM event that can be assigned to a counter. + + Two MBM events are supported by default: mbm_local_bytes and mbm_total_bytes. + Each MBM event's sub-directory contains a file named "event_filter" that is + used to view and modify which memory transactions the MBM event is configured + with. The file is accessible only when "mbm_event" counter assignment mode is + enabled. + + List of memory transaction types supported: + + ========================== ======================================================== + Name Description + ========================== ======================================================== + dirty_victim_writes_all Dirty Victims from the QOS domain to all types of memory + remote_reads_slow_memory Reads to slow memory in the non-local NUMA domain + local_reads_slow_memory Reads to slow memory in the local NUMA domain + remote_non_temporal_writes Non-temporal writes to non-local NUMA domain + local_non_temporal_writes Non-temporal writes to local NUMA domain + remote_reads Reads to memory in the non-local NUMA domain + local_reads Reads to memory in the local NUMA domain + ========================== ======================================================== + + For example:: + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter + local_reads,remote_reads,local_non_temporal_writes,remote_non_temporal_writes, + local_reads_slow_memory,remote_reads_slow_memory,dirty_victim_writes_all + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter + local_reads,local_non_temporal_writes,local_reads_slow_memory + "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 2f1f2efe2f40..9bf2e2fd5c19 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -241,6 +241,8 @@ struct rdtgroup { #define RFTYPE_DEBUG BIT(10) +#define RFTYPE_ASSIGN_CONFIG BIT(11) + #define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) #define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON) @@ -403,6 +405,8 @@ void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp); void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp); +int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 55327056596e..7179f9865a48 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -972,6 +972,61 @@ u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid) return mon_event_all[evtid].evt_cfg; } +/** + * struct mbm_transaction - Memory transaction an MBM event can be configured with. + * @name: Name of memory transaction (read, write ...). + * @val: The bit (eg. READS_TO_LOCAL_MEM or READS_TO_REMOTE_MEM) used to + * represent the memory transaction within an event's configuration. + */ +struct mbm_transaction { + char name[32]; + u32 val; +}; + +/* Decoded values for each type of memory transaction. */ +static struct mbm_transaction mbm_transactions[NUM_MBM_TRANSACTIONS] = { + {"local_reads", READS_TO_LOCAL_MEM}, + {"remote_reads", READS_TO_REMOTE_MEM}, + {"local_non_temporal_writes", NON_TEMP_WRITE_TO_LOCAL_MEM}, + {"remote_non_temporal_writes", NON_TEMP_WRITE_TO_REMOTE_MEM}, + {"local_reads_slow_memory", READS_TO_LOCAL_S_MEM}, + {"remote_reads_slow_memory", READS_TO_REMOTE_S_MEM}, + {"dirty_victim_writes_all", DIRTY_VICTIMS_TO_ALL_MEM}, +}; + +int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) +{ + struct mon_evt *mevt = rdt_kn_parent_priv(of->kn); + struct rdt_resource *r; + bool sep = false; + int ret = 0, i; + + mutex_lock(&rdtgroup_mutex); + rdt_last_cmd_clear(); + + r = resctrl_arch_get_resource(mevt->rid); + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) { + if (mevt->evt_cfg & mbm_transactions[i].val) { + if (sep) + seq_putc(seq, ','); + seq_printf(seq, "%s", mbm_transactions[i].name); + sep = true; + } + } + seq_putc(seq, '\n'); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + + return ret; +} + /* * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID * pair in the domain. @@ -1287,6 +1342,7 @@ int resctrl_mon_resource_init(void) RFTYPE_MON_INFO | RFTYPE_RES_CACHE); resctrl_file_fflags_init("available_mbm_cntrs", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG); } return 0; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 34a95f6f622d..01d7869ec1ee 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1923,6 +1923,12 @@ static struct rftype res_common_files[] = { .seq_show = mbm_local_bytes_config_show, .write = mbm_local_bytes_config_write, }, + { + .name = "event_filter", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = event_filter_show, + }, { .name = "mbm_assign_mode", .mode = 0444, @@ -2183,10 +2189,48 @@ int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, return ret; } +static int resctrl_mkdir_event_configs(struct rdt_resource *r, struct kernfs_node *l3_mon_kn) +{ + struct kernfs_node *kn_subdir, *kn_subdir2; + struct mon_evt *mevt; + int ret; + + kn_subdir = kernfs_create_dir(l3_mon_kn, "event_configs", l3_mon_kn->mode, NULL); + if (IS_ERR(kn_subdir)) + return PTR_ERR(kn_subdir); + + ret = rdtgroup_kn_set_ugid(kn_subdir); + if (ret) + return ret; + + for_each_mon_event(mevt) { + if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid)) + continue; + + kn_subdir2 = kernfs_create_dir(kn_subdir, mevt->name, kn_subdir->mode, mevt); + if (IS_ERR(kn_subdir2)) { + ret = PTR_ERR(kn_subdir2); + goto out; + } + + ret = rdtgroup_kn_set_ugid(kn_subdir2); + if (ret) + goto out; + + ret = rdtgroup_add_files(kn_subdir2, RFTYPE_ASSIGN_CONFIG); + if (ret) + break; + } + +out: + return ret; +} + static int rdtgroup_mkdir_info_resdir(void *priv, char *name, unsigned long fflags) { struct kernfs_node *kn_subdir; + struct rdt_resource *r; int ret; kn_subdir = kernfs_create_dir(kn_info, name, @@ -2199,8 +2243,19 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name, return ret; ret = rdtgroup_add_files(kn_subdir, fflags); - if (!ret) - kernfs_activate(kn_subdir); + if (ret) + return ret; + + if ((fflags & RFTYPE_MON_INFO) == RFTYPE_MON_INFO) { + r = priv; + if (r->mon.mbm_cntr_assignable) { + ret = resctrl_mkdir_event_configs(r, kn_subdir); + if (ret) + return ret; + } + } + + kernfs_activate(kn_subdir); return ret; } diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index d98351663c2c..acfe07860b34 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -34,6 +34,9 @@ /* Max event bits supported */ #define MAX_EVT_CONFIG_BITS GENMASK(6, 0) +/* Number of memory transactions that an MBM event can be configured with */ +#define NUM_MBM_TRANSACTIONS 7 + /* Event IDs */ enum resctrl_event_id { /* Must match value of first event below */ -- Gitee From fe0ab49b9966d9f17720da3b88a4ae963dc64a57 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:24 -0500 Subject: [PATCH 25/35] fs/resctrl: Provide interface to update the event configurations ANBZ: #20932 commit f9ae5913d47cda67481a4f54cc3273d3d1d00a01 upstream. When "mbm_event" counter assignment mode is enabled, users can modify the event configuration by writing to the 'event_filter' resctrl file. The event configurations for mbm_event mode are located in /sys/fs/resctrl/info/L3_MON/event_configs/. Update the assignments of all CTRL_MON and MON resource groups when the event configuration is modified. Example: $ mount -t resctrl resctrl /sys/fs/resctrl $ cd /sys/fs/resctrl/ $ cat info/L3_MON/event_configs/mbm_local_bytes/event_filter local_reads,local_non_temporal_writes,local_reads_slow_memory $ echo "local_reads,local_non_temporal_writes" > info/L3_MON/event_configs/mbm_total_bytes/event_filter $ cat info/L3_MON/event_configs/mbm_total_bytes/event_filter local_reads,local_non_temporal_writes Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/filesystems/resctrl.rst | 12 +++ fs/resctrl/internal.h | 3 + fs/resctrl/monitor.c | 114 ++++++++++++++++++++++++++ fs/resctrl/rdtgroup.c | 3 +- 4 files changed, 131 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index 4e1254d4b2d5..3df1d0bef06e 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -343,6 +343,18 @@ with the following files: # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter local_reads,local_non_temporal_writes,local_reads_slow_memory + Modify the event configuration by writing to the "event_filter" file within + the "event_configs" directory. The read/write "event_filter" file contains the + configuration of the event that reflects which memory transactions are counted by it. + + For example:: + + # echo "local_reads, local_non_temporal_writes" > + /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter + local_reads,local_non_temporal_writes + "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 9bf2e2fd5c19..90d3e4ab335b 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -407,6 +407,9 @@ void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp); int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v); +ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 7179f9865a48..ccb9726bba54 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1192,6 +1192,120 @@ void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp) &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]); } +static int resctrl_parse_mem_transactions(char *tok, u32 *val) +{ + u32 temp_val = 0; + char *evt_str; + bool found; + int i; + +next_config: + if (!tok || tok[0] == '\0') { + *val = temp_val; + return 0; + } + + /* Start processing the strings for each memory transaction type */ + evt_str = strim(strsep(&tok, ",")); + found = false; + for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) { + if (!strcmp(mbm_transactions[i].name, evt_str)) { + temp_val |= mbm_transactions[i].val; + found = true; + break; + } + } + + if (!found) { + rdt_last_cmd_printf("Invalid memory transaction type %s\n", evt_str); + return -EINVAL; + } + + goto next_config; +} + +/* + * rdtgroup_update_cntr_event - Update the counter assignments for the event + * in a group. + * @r: Resource to which update needs to be done. + * @rdtgrp: Resctrl group. + * @evtid: MBM monitor event. + */ +static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp, + enum resctrl_event_id evtid) +{ + struct rdt_mon_domain *d; + int cntr_id; + + list_for_each_entry(d, &r->mon_domains, hdr.list) { + cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid); + if (cntr_id >= 0) + rdtgroup_assign_cntr(r, d, evtid, rdtgrp->mon.rmid, + rdtgrp->closid, cntr_id, true); + } +} + +/* + * resctrl_update_cntr_allrdtgrp - Update the counter assignments for the event + * for all the groups. + * @mevt MBM Monitor event. + */ +static void resctrl_update_cntr_allrdtgrp(struct mon_evt *mevt) +{ + struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid); + struct rdtgroup *prgrp, *crgrp; + + /* + * Find all the groups where the event is assigned and update the + * configuration of existing assignments. + */ + list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { + rdtgroup_update_cntr_event(r, prgrp, mevt->evtid); + + list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) + rdtgroup_update_cntr_event(r, crgrp, mevt->evtid); + } +} + +ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off) +{ + struct mon_evt *mevt = rdt_kn_parent_priv(of->kn); + struct rdt_resource *r; + u32 evt_cfg = 0; + int ret = 0; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + buf[nbytes - 1] = '\0'; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + r = resctrl_arch_get_resource(mevt->rid); + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + ret = resctrl_parse_mem_transactions(buf, &evt_cfg); + if (!ret && mevt->evt_cfg != evt_cfg) { + mevt->evt_cfg = evt_cfg; + resctrl_update_cntr_allrdtgrp(mevt); + } + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return ret ?: nbytes; +} + int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 01d7869ec1ee..8976e8458abc 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1925,9 +1925,10 @@ static struct rftype res_common_files[] = { }, { .name = "event_filter", - .mode = 0444, + .mode = 0644, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = event_filter_show, + .write = event_filter_write, }, { .name = "mbm_assign_mode", -- Gitee From ab0d49d8c7c3f280ade456504958a4b486fee2ee Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:25 -0500 Subject: [PATCH 26/35] fs/resctrl: Introduce mbm_assign_on_mkdir to enable assignments on mkdir ANBZ: #20932 commit ac1df9bb0ba3ae94137fb494cd9efc598f65d826 upstream. The "mbm_event" counter assignment mode allows users to assign a hardware counter to an RMID, event pair and monitor the bandwidth as long as it is assigned. Introduce a user-configurable option that determines if a counter will automatically be assigned to an RMID, event pair when its associated monitor group is created via mkdir. Accessible when "mbm_event" counter assignment mode is enabled. Suggested-by: Peter Newman Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/filesystems/resctrl.rst | 20 ++++++++++ fs/resctrl/internal.h | 6 +++ fs/resctrl/monitor.c | 53 +++++++++++++++++++++++++++ fs/resctrl/rdtgroup.c | 7 ++++ include/linux/resctrl.h | 3 ++ 5 files changed, 89 insertions(+) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index 3df1d0bef06e..f93b9c0d8fbc 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -355,6 +355,26 @@ with the following files: # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter local_reads,local_non_temporal_writes +"mbm_assign_on_mkdir": + Exists when "mbm_event" counter assignment mode is supported. Accessible + only when "mbm_event" counter assignment mode is enabled. + + Determines if a counter will automatically be assigned to an RMID, MBM event + pair when its associated monitor group is created via mkdir. Enabled by default + on boot, also when switched from "default" mode to "mbm_event" counter assignment + mode. Users can disable this capability by writing to the interface. + + "0": + Auto assignment is disabled. + "1": + Auto assignment is enabled. + + Example:: + + # echo 0 > /sys/fs/resctrl/info/L3_MON/mbm_assign_on_mkdir + # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_on_mkdir + 0 + "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 90d3e4ab335b..66c677c1b858 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -410,6 +410,12 @@ int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); +int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, + struct seq_file *s, void *v); + +ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index ccb9726bba54..deca9535fbbb 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1027,6 +1027,57 @@ int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v return ret; } +int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, struct seq_file *s, + void *v) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + int ret = 0; + + mutex_lock(&rdtgroup_mutex); + rdt_last_cmd_clear(); + + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + seq_printf(s, "%u\n", r->mon.mbm_assign_on_mkdir); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + + return ret; +} + +ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + bool value; + int ret; + + ret = kstrtobool(buf, &value); + if (ret) + return ret; + + mutex_lock(&rdtgroup_mutex); + rdt_last_cmd_clear(); + + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + r->mon.mbm_assign_on_mkdir = value; + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + + return ret ?: nbytes; +} + /* * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID * pair in the domain. @@ -1457,6 +1508,8 @@ int resctrl_mon_resource_init(void) resctrl_file_fflags_init("available_mbm_cntrs", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG); + resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO | + RFTYPE_RES_CACHE); } return 0; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 8976e8458abc..58b30573a529 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1808,6 +1808,13 @@ static struct rftype res_common_files[] = { .seq_show = rdt_last_cmd_status_show, .fflags = RFTYPE_TOP_INFO, }, + { + .name = "mbm_assign_on_mkdir", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_mbm_assign_on_mkdir_show, + .write = resctrl_mbm_assign_on_mkdir_write, + }, { .name = "num_closids", .mode = 0444, diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 04152654827d..a7d92718b653 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -277,12 +277,15 @@ enum resctrl_schema_fmt { * monitoring events can be configured. * @num_mbm_cntrs: Number of assignable counters. * @mbm_cntr_assignable:Is system capable of supporting counter assignment? + * @mbm_assign_on_mkdir:True if counters should automatically be assigned to MBM + * events of monitor groups created via mkdir. */ struct resctrl_mon { int num_rmid; unsigned int mbm_cfg_mask; int num_mbm_cntrs; bool mbm_cntr_assignable; + bool mbm_assign_on_mkdir; }; /** -- Gitee From bbeff4bd5a5842451f9cf46eec0e1cb88111b56b Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:26 -0500 Subject: [PATCH 27/35] fs/resctrl: Auto assign counters on mkdir and clean up on group removal ANBZ: #20932 commit ef712fe97ec575657abb12d76837867dd8b8a0ed upstream. Resctrl provides a user-configurable option mbm_assign_on_mkdir that determines if a counter will automatically be assigned to an RMID, event pair when its associated monitor group is created via mkdir. Enable mbm_assign_on_mkdir by default to automatically assign counters to the two default events (MBM total and MBM local) of a new monitoring group created via mkdir. This maintains backward compatibility with original resctrl support for these two events. Unassign and free counters belonging to a monitoring group when the group is deleted. Monitor group creation does not fail if a counter cannot be assigned to one or both events. There may be limited counters and users have the flexibility to modify counter assignments at a later time. Log the error message "Failed to allocate counter for in domain " in /sys/fs/resctrl/info/last_cmd_status when a new monitoring group is created but counter assignment failed. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- fs/resctrl/monitor.c | 4 +++- fs/resctrl/rdtgroup.c | 22 ++++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index deca9535fbbb..9cb334136d21 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1231,7 +1231,8 @@ void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r)) + if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) || + !r->mon.mbm_assign_on_mkdir) return; if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) @@ -1503,6 +1504,7 @@ int resctrl_mon_resource_init(void) (READS_TO_LOCAL_MEM | READS_TO_LOCAL_S_MEM | NON_TEMP_WRITE_TO_LOCAL_MEM); + r->mon.mbm_assign_on_mkdir = true; resctrl_file_fflags_init("num_mbm_cntrs", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); resctrl_file_fflags_init("available_mbm_cntrs", diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 58b30573a529..bf9bf5bb758b 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -2713,6 +2713,8 @@ static int rdt_get_tree(struct fs_context *fc) if (ret < 0) goto out_info; + rdtgroup_assign_cntrs(&rdtgroup_default); + ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); if (ret < 0) @@ -2751,8 +2753,10 @@ static int rdt_get_tree(struct fs_context *fc) if (resctrl_arch_mon_capable()) kernfs_remove(kn_mondata); out_mongrp: - if (resctrl_arch_mon_capable()) + if (resctrl_arch_mon_capable()) { + rdtgroup_unassign_cntrs(&rdtgroup_default); kernfs_remove(kn_mongrp); + } out_info: kernfs_remove(kn_info); out_closid_exit: @@ -2897,6 +2901,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) head = &rdtgrp->mon.crdtgrp_list; list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { + rdtgroup_unassign_cntrs(sentry); free_rmid(sentry->closid, sentry->mon.rmid); list_del(&sentry->mon.crdtgrp_list); @@ -2937,6 +2942,8 @@ static void rmdir_all_sub(void) cpumask_or(&rdtgroup_default.cpu_mask, &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); + rdtgroup_unassign_cntrs(rdtgrp); + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); kernfs_remove(rdtgrp->kn); @@ -3021,6 +3028,7 @@ static void resctrl_fs_teardown(void) return; rmdir_all_sub(); + rdtgroup_unassign_cntrs(&rdtgroup_default); mon_put_kn_priv(); rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; @@ -3502,9 +3510,12 @@ static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp) } rdtgrp->mon.rmid = ret; + rdtgroup_assign_cntrs(rdtgrp); + ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn); if (ret) { rdt_last_cmd_puts("kernfs subdir error\n"); + rdtgroup_unassign_cntrs(rdtgrp); free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); return ret; } @@ -3514,8 +3525,10 @@ static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp) static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp) { - if (resctrl_arch_mon_capable()) + if (resctrl_arch_mon_capable()) { + rdtgroup_unassign_cntrs(rgrp); free_rmid(rgrp->closid, rgrp->mon.rmid); + } } /* @@ -3791,6 +3804,9 @@ static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) update_closid_rmid(tmpmask, NULL); rdtgrp->flags = RDT_DELETED; + + rdtgroup_unassign_cntrs(rdtgrp); + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); /* @@ -3838,6 +3854,8 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); update_closid_rmid(tmpmask, NULL); + rdtgroup_unassign_cntrs(rdtgrp); + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); closid_free(rdtgrp->closid); -- Gitee From 41efe7cc7b05a8af2b49469c43f61e783e8e1d7c Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:27 -0500 Subject: [PATCH 28/35] fs/resctrl: Introduce mbm_L3_assignments to list assignments in a group ANBZ: #20932 commit cba8222880b88d96f3ed6c8a115e335e552b83a1 upstream. Introduce the mbm_L3_assignments resctrl file associated with CTRL_MON and MON resource groups to display the counter assignment states of the resource group when "mbm_event" counter assignment mode is enabled. Display the list in the following format: :=;= Event: A valid MBM event listed in /sys/fs/resctrl/info/L3_MON/event_configs directory. Domain ID: A valid domain ID. The assignment state can be one of the following: _ : No counter assigned. e : Counter assigned exclusively. Example: To list the assignment states for the default group $ cd /sys/fs/resctrl $ cat /sys/fs/resctrl/mbm_L3_assignments mbm_total_bytes:0=e;1=e mbm_local_bytes:0=e;1=e Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/filesystems/resctrl.rst | 31 +++++++++++++++++ fs/resctrl/internal.h | 2 ++ fs/resctrl/monitor.c | 49 +++++++++++++++++++++++++++ fs/resctrl/rdtgroup.c | 6 ++++ 4 files changed, 88 insertions(+) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index f93b9c0d8fbc..7f5cbd7de8bf 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -509,6 +509,37 @@ When monitoring is enabled all MON groups will also contain: Available only with debug option. The identifier used by hardware for the monitor group. On x86 this is the RMID. +When monitoring is enabled all MON groups may also contain: + +"mbm_L3_assignments": + Exists when "mbm_event" counter assignment mode is supported and lists the + counter assignment states of the group. + + The assignment list is displayed in the following format: + + :=;= + + Event: A valid MBM event in the + /sys/fs/resctrl/info/L3_MON/event_configs directory. + + Domain ID: A valid domain ID. + + Assignment states: + + _ : No counter assigned. + + e : Counter assigned exclusively. + + Example: + + To display the counter assignment states for the default group. + :: + + # cd /sys/fs/resctrl + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=e;1=e + mbm_local_bytes:0=e;1=e + When the "mba_MBps" mount option is used all CTRL_MON groups will also contain: "mba_MBps_event": diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 66c677c1b858..43297b36f5dd 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -416,6 +416,8 @@ int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); +int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 9cb334136d21..b692a0ef1710 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1454,6 +1454,54 @@ int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, return ret; } +int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_mon_domain *d; + struct rdtgroup *rdtgrp; + struct mon_evt *mevt; + int ret = 0; + bool sep; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto out_unlock; + } + + rdt_last_cmd_clear(); + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + for_each_mon_event(mevt) { + if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid)) + continue; + + sep = false; + seq_printf(s, "%s:", mevt->name); + list_for_each_entry(d, &r->mon_domains, hdr.list) { + if (sep) + seq_putc(s, ';'); + + if (mbm_cntr_get(r, d, rdtgrp, mevt->evtid) < 0) + seq_printf(s, "%d=_", d->hdr.id); + else + seq_printf(s, "%d=e", d->hdr.id); + + sep = true; + } + seq_putc(s, '\n'); + } + +out_unlock: + rdtgroup_kn_unlock(of->kn); + + return ret; +} + /** * resctrl_mon_resource_init() - Initialise global monitoring structures. * @@ -1512,6 +1560,7 @@ int resctrl_mon_resource_init(void) resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG); resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE); } return 0; diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index bf9bf5bb758b..4458d6b71850 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1937,6 +1937,12 @@ static struct rftype res_common_files[] = { .seq_show = event_filter_show, .write = event_filter_write, }, + { + .name = "mbm_L3_assignments", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = mbm_L3_assignments_show, + }, { .name = "mbm_assign_mode", .mode = 0444, -- Gitee From 2adccf4c6900cb3f8c92766e01f26309ef7e5668 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:28 -0500 Subject: [PATCH 29/35] fs/resctrl: Introduce the interface to modify assignments in a group ANBZ: #20932 commit 88bee79640aea6192f98c8faae30e0013453479d upstream. Enable the mbm_l3_assignments resctrl file to be used to modify counter assignments of CTRL_MON and MON groups when the "mbm_event" counter assignment mode is enabled. Process the assignment modifications in the following format: :=;= Event: A valid MBM event in the /sys/fs/resctrl/info/L3_MON/event_configs directory. Domain ID: A valid domain ID. When writing, '*' applies the changes to all domains. Assignment states: _ : Unassign a counter. e : Assign a counter exclusively. Examples: $ cd /sys/fs/resctrl $ cat /sys/fs/resctrl/mbm_L3_assignments mbm_total_bytes:0=e;1=e mbm_local_bytes:0=e;1=e To unassign the counter associated with the mbm_total_bytes event on domain 0: $ echo "mbm_total_bytes:0=_" > mbm_L3_assignments $ cat /sys/fs/resctrl/mbm_L3_assignments mbm_total_bytes:0=_;1=e mbm_local_bytes:0=e;1=e To unassign the counter associated with the mbm_total_bytes event on all the domains: $ echo "mbm_total_bytes:*=_" > mbm_L3_assignments $ cat /sys/fs/resctrl/mbm_L3_assignments mbm_total_bytes:0=_;1=_ mbm_local_bytes:0=e;1=e Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/filesystems/resctrl.rst | 151 +++++++++++++++++++++++++- fs/resctrl/internal.h | 3 + fs/resctrl/monitor.c | 139 ++++++++++++++++++++++++ fs/resctrl/rdtgroup.c | 3 +- 4 files changed, 294 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index 7f5cbd7de8bf..d081bd70b935 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -522,7 +522,8 @@ When monitoring is enabled all MON groups may also contain: Event: A valid MBM event in the /sys/fs/resctrl/info/L3_MON/event_configs directory. - Domain ID: A valid domain ID. + Domain ID: A valid domain ID. When writing, '*' applies the changes + to all the domains. Assignment states: @@ -540,6 +541,35 @@ When monitoring is enabled all MON groups may also contain: mbm_total_bytes:0=e;1=e mbm_local_bytes:0=e;1=e + Assignments can be modified by writing to the interface. + + Examples: + + To unassign the counter associated with the mbm_total_bytes event on domain 0: + :: + + # echo "mbm_total_bytes:0=_" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=_;1=e + mbm_local_bytes:0=e;1=e + + To unassign the counter associated with the mbm_total_bytes event on all the domains: + :: + + # echo "mbm_total_bytes:*=_" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=_;1=_ + mbm_local_bytes:0=e;1=e + + To assign a counter associated with the mbm_total_bytes event on all domains in + exclusive mode: + :: + + # echo "mbm_total_bytes:*=e" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=e;1=e + mbm_local_bytes:0=e;1=e + When the "mba_MBps" mount option is used all CTRL_MON groups will also contain: "mba_MBps_event": @@ -1585,6 +1615,125 @@ View the llc occupancy snapshot:: # cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy 11234000 + +Examples on working with mbm_assign_mode +======================================== + +a. Check if MBM counter assignment mode is supported. +:: + + # mount -t resctrl resctrl /sys/fs/resctrl/ + + # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + [mbm_event] + default + +The "mbm_event" mode is detected and enabled. + +b. Check how many assignable counters are supported. +:: + + # cat /sys/fs/resctrl/info/L3_MON/num_mbm_cntrs + 0=32;1=32 + +c. Check how many assignable counters are available for assignment in each domain. +:: + + # cat /sys/fs/resctrl/info/L3_MON/available_mbm_cntrs + 0=30;1=30 + +d. To list the default group's assign states. +:: + + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=e;1=e + mbm_local_bytes:0=e;1=e + +e. To unassign the counter associated with the mbm_total_bytes event on domain 0. +:: + + # echo "mbm_total_bytes:0=_" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=_;1=e + mbm_local_bytes:0=e;1=e + +f. To unassign the counter associated with the mbm_total_bytes event on all domains. +:: + + # echo "mbm_total_bytes:*=_" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignment + mbm_total_bytes:0=_;1=_ + mbm_local_bytes:0=e;1=e + +g. To assign a counter associated with the mbm_total_bytes event on all domains in +exclusive mode. +:: + + # echo "mbm_total_bytes:*=e" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=e;1=e + mbm_local_bytes:0=e;1=e + +h. Read the events mbm_total_bytes and mbm_local_bytes of the default group. There is +no change in reading the events with the assignment. +:: + + # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_total_bytes + 779247936 + # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_total_bytes + 562324232 + # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_bytes + 212122123 + # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_local_bytes + 121212144 + +i. Check the event configurations. +:: + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter + local_reads,remote_reads,local_non_temporal_writes,remote_non_temporal_writes, + local_reads_slow_memory,remote_reads_slow_memory,dirty_victim_writes_all + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter + local_reads,local_non_temporal_writes,local_reads_slow_memory + +j. Change the event configuration for mbm_local_bytes. +:: + + # echo "local_reads, local_non_temporal_writes, local_reads_slow_memory, remote_reads" > + /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter + local_reads,local_non_temporal_writes,local_reads_slow_memory,remote_reads + +k. Now read the local events again. The first read may come back with "Unavailable" +status. The subsequent read of mbm_local_bytes will display the current value. +:: + + # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_bytes + Unavailable + # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_bytes + 2252323 + # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_local_bytes + Unavailable + # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_local_bytes + 1566565 + +l. Users have the option to go back to 'default' mbm_assign_mode if required. This can be +done using the following command. Note that switching the mbm_assign_mode may reset all +the MBM counters (and thus all MBM events) of all the resctrl groups. +:: + + # echo "default" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + mbm_event + [default] + +m. Unmount the resctrl filesystem. +:: + + # umount /sys/fs/resctrl/ + Intel RDT Errata ================ diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 43297b36f5dd..c69b1da80d3f 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -418,6 +418,9 @@ ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v); +ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index b692a0ef1710..f388dbcdbdcd 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1502,6 +1502,145 @@ int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, voi return ret; } +/* + * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching + * event name. + */ +static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *name) +{ + struct mon_evt *mevt; + + for_each_mon_event(mevt) { + if (mevt->rid == r->rid && mevt->enabled && + resctrl_is_mbm_event(mevt->evtid) && + !strcmp(mevt->name, name)) + return mevt; + } + + return NULL; +} + +static int rdtgroup_modify_assign_state(char *assign, struct rdt_mon_domain *d, + struct rdtgroup *rdtgrp, struct mon_evt *mevt) +{ + int ret = 0; + + if (!assign || strlen(assign) != 1) + return -EINVAL; + + switch (*assign) { + case 'e': + ret = rdtgroup_assign_cntr_event(d, rdtgrp, mevt); + break; + case '_': + rdtgroup_unassign_cntr_event(d, rdtgrp, mevt); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp, + char *event, char *tok) +{ + struct rdt_mon_domain *d; + unsigned long dom_id = 0; + char *dom_str, *id_str; + struct mon_evt *mevt; + int ret; + + mevt = mbm_get_mon_event_by_name(r, event); + if (!mevt) { + rdt_last_cmd_printf("Invalid event %s\n", event); + return -ENOENT; + } + +next: + if (!tok || tok[0] == '\0') + return 0; + + /* Start processing the strings for each domain */ + dom_str = strim(strsep(&tok, ";")); + + id_str = strsep(&dom_str, "="); + + /* Check for domain id '*' which means all domains */ + if (id_str && *id_str == '*') { + ret = rdtgroup_modify_assign_state(dom_str, NULL, rdtgrp, mevt); + if (ret) + rdt_last_cmd_printf("Assign operation '%s:*=%s' failed\n", + event, dom_str); + return ret; + } else if (!id_str || kstrtoul(id_str, 10, &dom_id)) { + rdt_last_cmd_puts("Missing domain id\n"); + return -EINVAL; + } + + /* Verify if the dom_id is valid */ + list_for_each_entry(d, &r->mon_domains, hdr.list) { + if (d->hdr.id == dom_id) { + ret = rdtgroup_modify_assign_state(dom_str, d, rdtgrp, mevt); + if (ret) { + rdt_last_cmd_printf("Assign operation '%s:%ld=%s' failed\n", + event, dom_id, dom_str); + return ret; + } + goto next; + } + } + + rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id); + return -EINVAL; +} + +ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdtgroup *rdtgrp; + char *token, *event; + int ret = 0; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + buf[nbytes - 1] = '\0'; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + rdtgroup_kn_unlock(of->kn); + return -ENOENT; + } + rdt_last_cmd_clear(); + + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event mode is not enabled\n"); + rdtgroup_kn_unlock(of->kn); + return -EINVAL; + } + + while ((token = strsep(&buf, "\n")) != NULL) { + /* + * The write command follows the following format: + * ":=" + * Extract the event name first. + */ + event = strsep(&token, ":"); + + ret = resctrl_parse_mbm_assignment(r, rdtgrp, event, token); + if (ret) + break; + } + + rdtgroup_kn_unlock(of->kn); + + return ret ?: nbytes; +} + /** * resctrl_mon_resource_init() - Initialise global monitoring structures. * diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 4458d6b71850..21d14cb01700 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1939,9 +1939,10 @@ static struct rftype res_common_files[] = { }, { .name = "mbm_L3_assignments", - .mode = 0444, + .mode = 0644, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = mbm_L3_assignments_show, + .write = mbm_L3_assignments_write, }, { .name = "mbm_assign_mode", -- Gitee From 34709a319441fd95882f7615a1f2590d64f16f3e Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:29 -0500 Subject: [PATCH 30/35] fs/resctrl: Disable BMEC event configuration when mbm_event mode is enabled ANBZ: #20932 commit 9f0209b857d2fc99c2a32bff1d7bfd54b314c29c upstream. The BMEC (Bandwidth Monitoring Event Configuration) feature enables per-domain event configuration. With BMEC the MBM events are configured using the mbm_total_bytes_config or mbm_local_bytes_config files in /sys/fs/resctrl/info/L3_MON/ and the per-domain event configuration affects all monitor resource groups. The mbm_event counter assignment mode enables counters to be assigned to RMID (i.e. a monitor resource group), event pairs, with potentially unique event configurations associated with every counter. There may be systems that support both BMEC and mbm_event counter assignment mode, but resctrl supporting both concurrently will present a conflicting interface to the user with both per-domain and per RMID, event configurations active at the same time. The mbm_event counter assignment provides most flexibility to user space and aligns with Arm's counter support. On systems that support both, disable BMEC event configuration when mbm_event mode is enabled by hiding the mbm_total_bytes_config or mbm_local_bytes_config files when mbm_event mode is enabled. Ensure mon_features always displays accurate information about monitor features. Suggested-by: Reinette Chatre Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- fs/resctrl/rdtgroup.c | 47 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 21d14cb01700..075a37f79d70 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1150,7 +1150,8 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, if (mevt->rid != r->rid || !mevt->enabled) continue; seq_printf(seq, "%s\n", mevt->name); - if (mevt->configurable) + if (mevt->configurable && + !resctrl_arch_mbm_cntr_assign_enabled(r)) seq_printf(seq, "%s_config\n", mevt->name); } @@ -1799,6 +1800,44 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, return ret ?: nbytes; } +/* + * resctrl_bmec_files_show() — Controls the visibility of BMEC-related resctrl + * files. When @show is true, the files are displayed; when false, the files + * are hidden. + * Don't treat kernfs_find_and_get failure as an error, since this function may + * be called regardless of whether BMEC is supported or the event is enabled. + */ +static void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_kn, + bool show) +{ + struct kernfs_node *kn_config, *mon_kn = NULL; + char name[32]; + + if (!l3_mon_kn) { + sprintf(name, "%s_MON", r->name); + mon_kn = kernfs_find_and_get(kn_info, name); + if (!mon_kn) + return; + l3_mon_kn = mon_kn; + } + + kn_config = kernfs_find_and_get(l3_mon_kn, "mbm_total_bytes_config"); + if (kn_config) { + kernfs_show(kn_config, show); + kernfs_put(kn_config); + } + + kn_config = kernfs_find_and_get(l3_mon_kn, "mbm_local_bytes_config"); + if (kn_config) { + kernfs_show(kn_config, show); + kernfs_put(kn_config); + } + + /* Release the reference only if it was acquired */ + if (mon_kn) + kernfs_put(mon_kn); +} + /* rdtgroup information files for one cache resource. */ static struct rftype res_common_files[] = { { @@ -2267,6 +2306,12 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name, ret = resctrl_mkdir_event_configs(r, kn_subdir); if (ret) return ret; + /* + * Hide BMEC related files if mbm_event mode + * is enabled. + */ + if (resctrl_arch_mbm_cntr_assign_enabled(r)) + resctrl_bmec_files_show(r, kn_subdir, false); } } -- Gitee From 70bd125370c4803107c32b1241378de4a0b2cfdb Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:30 -0500 Subject: [PATCH 31/35] fs/resctrl: Introduce the interface to switch between monitor modes ANBZ: #20932 commit 8004ea01cf6338298e0c6ab055bc3ec659ce381b upstream. Resctrl subsystem can support two monitoring modes, "mbm_event" or "default". In mbm_event mode, monitoring event can only accumulate data while it is backed by a hardware counter. In "default" mode, resctrl assumes there is a hardware counter for each event within every CTRL_MON and MON group. Introduce mbm_assign_mode resctrl file to switch between mbm_event and default modes. Example: To list the MBM monitor modes supported: $ cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode [mbm_event] default To enable the "mbm_event" counter assignment mode: $ echo "mbm_event" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode To enable the "default" monitoring mode: $ echo "default" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode Reset MBM event counters automatically as part of changing the mode. Clear both architectural and non-architectural event states to prevent overflow conditions during the next event read. Clear assignable counter configuration on all the domains. Also, enable auto assignment when switching to "mbm_event" mode. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- Documentation/filesystems/resctrl.rst | 22 +++++- fs/resctrl/internal.h | 6 ++ fs/resctrl/monitor.c | 100 ++++++++++++++++++++++++++ fs/resctrl/rdtgroup.c | 7 +- 4 files changed, 131 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index d081bd70b935..bd5901338f29 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -259,7 +259,8 @@ with the following files: "mbm_assign_mode": The supported counter assignment modes. The enclosed brackets indicate which mode - is enabled. + is enabled. The MBM events associated with counters may reset when "mbm_assign_mode" + is changed. :: # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode @@ -279,6 +280,15 @@ with the following files: of counters available is described in the "num_mbm_cntrs" file. Changing the mode may cause all counters on the resource to reset. + Moving to mbm_event counter assignment mode requires users to assign the counters + to the events. Otherwise, the MBM event counters will return 'Unassigned' when read. + + The mode is beneficial for AMD platforms that support more CTRL_MON + and MON groups than available hardware counters. By default, this + feature is enabled on AMD platforms with the ABMC (Assignable Bandwidth + Monitoring Counters) capability, ensuring counters remain assigned even + when the corresponding RMID is not actively used by any processor. + "default": In default mode, resctrl assumes there is a hardware counter for each @@ -288,6 +298,16 @@ with the following files: result in misleading values or display "Unavailable" if no counter is assigned to the event. + * To enable "mbm_event" counter assignment mode: + :: + + # echo "mbm_event" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + + * To enable "default" monitoring mode: + :: + + # echo "default" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + "num_mbm_cntrs": The maximum number of counters (total of available and assigned counters) in each domain when the system supports mbm_event mode. diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index c69b1da80d3f..cf1fd82dc5a9 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -396,6 +396,12 @@ void *rdt_kn_parent_priv(struct kernfs_node *kn); int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, struct seq_file *s, void *v); +ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off); + +void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_kn, + bool show); + int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, void *v); int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index f388dbcdbdcd..50c24460d992 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1078,6 +1078,33 @@ ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf return ret ?: nbytes; } +/* + * mbm_cntr_free_all() - Clear all the counter ID configuration details in the + * domain @d. Called when mbm_assign_mode is changed. + */ +static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs); +} + +/* + * resctrl_reset_rmid_all() - Reset all non-architecture states for all the + * supported RMIDs. + */ +static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + u32 idx_limit = resctrl_arch_system_num_rmid_idx(); + enum resctrl_event_id evt; + int idx; + + for_each_mbm_event_id(evt) { + if (!resctrl_is_mon_event_enabled(evt)) + continue; + idx = MBM_STATE_IDX(evt); + memset(d->mbm_states[idx], 0, sizeof(*d->mbm_states[0]) * idx_limit); + } +} + /* * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID * pair in the domain. @@ -1388,6 +1415,79 @@ int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, return 0; } +ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + struct rdt_mon_domain *d; + int ret = 0; + bool enable; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + buf[nbytes - 1] = '\0'; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + if (!strcmp(buf, "default")) { + enable = 0; + } else if (!strcmp(buf, "mbm_event")) { + if (r->mon.mbm_cntr_assignable) { + enable = 1; + } else { + ret = -EINVAL; + rdt_last_cmd_puts("mbm_event mode is not supported\n"); + goto out_unlock; + } + } else { + ret = -EINVAL; + rdt_last_cmd_puts("Unsupported assign mode\n"); + goto out_unlock; + } + + if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) { + ret = resctrl_arch_mbm_cntr_assign_set(r, enable); + if (ret) + goto out_unlock; + + /* Update the visibility of BMEC related files */ + resctrl_bmec_files_show(r, NULL, !enable); + + /* + * Initialize the default memory transaction values for + * total and local events. + */ + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & + (READS_TO_LOCAL_MEM | + READS_TO_LOCAL_S_MEM | + NON_TEMP_WRITE_TO_LOCAL_MEM); + /* Enable auto assignment when switching to "mbm_event" mode */ + if (enable) + r->mon.mbm_assign_on_mkdir = true; + /* + * Reset all the non-achitectural RMID state and assignable counters. + */ + list_for_each_entry(d, &r->mon_domains, hdr.list) { + mbm_cntr_free_all(r, d); + resctrl_reset_rmid_all(r, d); + } + } + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return ret ?: nbytes; +} + int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, void *v) { diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 075a37f79d70..41ce4b377af4 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1807,8 +1807,8 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, * Don't treat kernfs_find_and_get failure as an error, since this function may * be called regardless of whether BMEC is supported or the event is enabled. */ -static void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_kn, - bool show) +void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_kn, + bool show) { struct kernfs_node *kn_config, *mon_kn = NULL; char name[32]; @@ -1985,9 +1985,10 @@ static struct rftype res_common_files[] = { }, { .name = "mbm_assign_mode", - .mode = 0444, + .mode = 0644, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = resctrl_mbm_assign_mode_show, + .write = resctrl_mbm_assign_mode_write, .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE, }, { -- Gitee From 9470a2962909bea5fe66f8c86825ed3e52ac707a Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 5 Sep 2025 16:34:31 -0500 Subject: [PATCH 32/35] x86/resctrl: Configure mbm_event mode if supported ANBZ: #20932 commit 0f1576e43adc62756879a240e66e89ce386b6eb9 upstream. Configure mbm_event mode on AMD platforms. On AMD platforms, it is recommended to use the mbm_event mode, if supported, to prevent the hardware from resetting counters between reads. This can result in misleading values or display "Unavailable" if no counter is assigned to the event. Enable mbm_event mode, known as ABMC (Assignable Bandwidth Monitoring Counters) on AMD, by default if the system supports it. Update ABMC across all logical processors within the resctrl domain to ensure proper functionality. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/cover.1757108044.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- arch/x86/kernel/cpu/resctrl/core.c | 7 +++++++ arch/x86/kernel/cpu/resctrl/internal.h | 1 + arch/x86/kernel/cpu/resctrl/monitor.c | 8 ++++++++ 3 files changed, 16 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 9edf803964cd..bb6e757f2d46 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -519,6 +519,9 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d = container_of(hdr, struct rdt_mon_domain, hdr); cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + /* Update the mbm_assign_mode state for the CPU if supported */ + if (r->mon.mbm_cntr_assignable) + resctrl_arch_mbm_cntr_assign_set_one(r); return; } @@ -538,6 +541,10 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d->ci_id = ci->id; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + /* Update the mbm_assign_mode state for the CPU if supported */ + if (r->mon.mbm_cntr_assignable) + resctrl_arch_mbm_cntr_assign_set_one(r); + arch_mon_domain_online(r, d); if (arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) { diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 93eb9ccc7c53..6da9bd1a188b 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -218,5 +218,6 @@ bool rdt_cpu_has(int flag); void __init intel_rdt_mbm_apply_quirk(void); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); +void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 8749e26d5a98..490d582b72aa 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -456,6 +456,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) r->mon.mbm_cntr_assignable = true; cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx); r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1; + hw_res->mbm_cntr_assign_enabled = true; } r->mon_capable = true; @@ -557,3 +558,10 @@ void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, if (am) memset(am, 0, sizeof(*am)); } + +void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + resctrl_abmc_set_one_amd(&hw_res->mbm_cntr_assign_enabled); +} -- Gitee From 39f3d0cdec02f7f496866332d0ffb5e70a9dc4eb Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Tue, 16 Sep 2025 12:25:49 -0500 Subject: [PATCH 33/35] fs/resctrl: Fix counter auto-assignment on mkdir with mbm_event enabled ANBZ: #20932 commit dd86b69d20fb9fa7e941ed01ff05f1e662fcc3ff upstream. rdt_resource::resctrl_mon::mbm_assign_on_mkdir determines if a counter will automatically be assigned to an RMID, MBM event pair when its associated monitor group is created via mkdir. Testing shows that counters are always automatically assigned to new monitor groups, whether mbm_assign_on_mkdir is set or not. To support automatic counter assignment the check for mbm_assign_on_mkdir should be in rdtgroup_assign_cntrs() that assigns counters during monitor group creation. Instead, the check for mbm_assign_on_mkdir is in rdtgroup_unassign_cntrs() that is called on monitor group deletion from where counters should always be unassigned, whether mbm_assign_on_mkdir is set or not. Fix automatic counter assignment by moving the mbm_assign_on_mkdir check from rdtgroup_unassign_cntrs() to rdtgroup_assign_cntrs(). [ bp: Replace commit message with Reinette's version. ] Fixes: ef712fe97ec57 ("fs/resctrl: Auto assign counters on mkdir and clean up on group removal") Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Acked-by: Reinette Chatre Signed-off-by: Qinyun Tan --- fs/resctrl/monitor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 50c24460d992..4076336fbba6 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1200,7 +1200,8 @@ void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r)) + if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) || + !r->mon.mbm_assign_on_mkdir) return; if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) @@ -1258,8 +1259,7 @@ void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) || - !r->mon.mbm_assign_on_mkdir) + if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r)) return; if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) -- Gitee From 2027d12209d75c5d17cac005a7a99f48dcd03ab2 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 10 Oct 2025 12:08:35 -0500 Subject: [PATCH 34/35] x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID ANBZ: #20932 commit 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 upstream. Users can create as many monitoring groups as the number of RMIDs supported by the hardware. However, on AMD systems, only a limited number of RMIDs are guaranteed to be actively tracked by the hardware. RMIDs that exceed this limit are placed in an "Unavailable" state. When a bandwidth counter is read for such an RMID, the hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable remains set on first read after tracking re-starts and is clear on all subsequent reads as long as the RMID is tracked. resctrl miscounts the bandwidth events after an RMID transitions from the "Unavailable" state back to being tracked. This happens because when the hardware starts counting again after resetting the counter to zero, resctrl in turn compares the new count against the counter value stored from the previous time the RMID was tracked. This results in resctrl computing an event value that is either undercounting (when new counter is more than stored counter) or a mistaken overflow (when new counter is less than stored counter). Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to zero whenever the RMID is in the "Unavailable" state to ensure accurate counting after the RMID resets to zero when it starts to be tracked again. Example scenario that results in mistaken overflow ================================================== 1. The resctrl filesystem is mounted, and a task is assigned to a monitoring group. $mount -t resctrl resctrl /sys/fs/resctrl $mkdir /sys/fs/resctrl/mon_groups/test1/ $echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 21323 <- Total bytes on domain 0 "Unavailable" <- Total bytes on domain 1 Task is running on domain 0. Counter on domain 1 is "Unavailable". 2. The task runs on domain 0 for a while and then moves to domain 1. The counter starts incrementing on domain 1. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 7345357 <- Total bytes on domain 0 4545 <- Total bytes on domain 1 3. At some point, the RMID in domain 0 transitions to the "Unavailable" state because the task is no longer executing in that domain. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes "Unavailable" <- Total bytes on domain 0 434341 <- Total bytes on domain 1 4. Since the task continues to migrate between domains, it may eventually return to domain 0. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 17592178699059 <- Overflow on domain 0 3232332 <- Total bytes on domain 1 In this case, the RMID on domain 0 transitions from "Unavailable" state to active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when the counter is read and begins tracking the RMID counting from 0. Subsequent reads succeed but return a value smaller than the previously saved MSR value (7345357). Consequently, the resctrl's overflow logic is triggered, it compares the previous value (7345357) with the new, smaller value and incorrectly interprets this as a counter overflow, adding a large delta. In reality, this is a false positive: the counter did not overflow but was simply reset when the RMID transitioned from "Unavailable" back to active state. Here is the text from APM [1] available from [2]. "In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the first QM_CTR read when it begins tracking an RMID that it was not previously tracking. The U bit will be zero for all subsequent reads from that RMID while it is still tracked by the hardware. Therefore, a QM_CTR read with the U bit set when that RMID is in use by a processor can be considered 0 when calculating the difference with a subsequent read." [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory Bandwidth (MBM). [ bp: Split commit message into smaller paragraph chunks for better consumption. ] Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature") Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Reinette Chatre Cc: stable@vger.kernel.org # needs adjustments for <= v6.17 Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] Signed-off-by: Qinyun Tan --- arch/x86/kernel/cpu/resctrl/monitor.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 490d582b72aa..7822d7522200 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -241,7 +241,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); int cpu = cpumask_any(&d->hdr.cpu_mask); + struct arch_mbm_state *am; u64 msr_val; u32 prmid; int ret; @@ -250,12 +252,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, prmid = logical_rmid_to_physical_rmid(cpu, rmid); ret = __rmid_read_phys(prmid, eventid, &msr_val); - if (ret) - return ret; - *val = get_corrected_val(r, d, rmid, eventid, msr_val); + if (!ret) { + *val = get_corrected_val(r, d, rmid, eventid, msr_val); + } else if (ret == -EINVAL) { + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) + am->prev_msr = 0; + } - return 0; + return ret; } static int __cntr_id_read(u32 cntr_id, u64 *val) -- Gitee From 650a74878f3e25447aa5160fc30bc7ca7fbea69a Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 16 Oct 2025 08:34:19 -0500 Subject: [PATCH 35/35] x86,fs/resctrl: Fix NULL pointer dereference with events force-disabled in mbm_event mode ANBZ: #20932 commit 19de7113bfac33ba92c004a9b510612bb745cfa0 upstream. The following NULL pointer dereference is encountered on mount of resctrl fs after booting a system that supports assignable counters with the "rdt=!mbmtotal,!mbmlocal" kernel parameters: BUG: kernel NULL pointer dereference, address: 0000000000000008 RIP: 0010:mbm_cntr_get Call Trace: rdtgroup_assign_cntr_event rdtgroup_assign_cntrs rdt_get_tree Specifying the kernel parameter "rdt=!mbmtotal,!mbmlocal" effectively disables the legacy X86_FEATURE_CQM_MBM_TOTAL and X86_FEATURE_CQM_MBM_LOCAL features and the MBM events they represent. This results in the per-domain MBM event related data structures to not be allocated during early initialization. resctrl fs initialization follows by implicitly enabling both MBM total and local events on a system that supports assignable counters (mbm_event mode), but this enabling occurs after the per-domain data structures have been created. After booting, resctrl fs assumes that an enabled event can access all its state. This results in NULL pointer dereference when resctrl attempts to access the un-allocated structures of an enabled event. Remove the late MBM event enabling from resctrl fs. This leaves a problem where the X86_FEATURE_CQM_MBM_TOTAL and X86_FEATURE_CQM_MBM_LOCAL features may be disabled while assignable counter (mbm_event) mode is enabled without any events to support. Switching between the "default" and "mbm_event" mode without any events is not practical. Create a dependency between the X86_FEATURE_{CQM_MBM_TOTAL,CQM_MBM_LOCAL} and X86_FEATURE_ABMC (assignable counter) hardware features. An x86 system that supports assignable counters now requires support of X86_FEATURE_CQM_MBM_TOTAL or X86_FEATURE_CQM_MBM_LOCAL. This ensures all needed MBM related data structures are created before use and that it is only possible to switch between "default" and "mbm_event" mode when the same events are available in both modes. This dependency does not exist in the hardware but this usage of these feature settings work for known systems. [ bp: Massage commit message. ] Fixes: 13390861b426e ("x86,fs/resctrl: Detect Assignable Bandwidth Monitoring feature details") Co-developed-by: Reinette Chatre Signed-off-by: Reinette Chatre Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/a62e6ac063d0693475615edd213d5be5e55443e6.1760560934.git.babu.moger@amd.com Signed-off-by: Qinyun Tan --- arch/x86/kernel/cpu/resctrl/monitor.c | 11 ++++++++++- fs/resctrl/monitor.c | 16 +++++++--------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 7822d7522200..52c14a7d36da 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -458,7 +458,16 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } - if (rdt_cpu_has(X86_FEATURE_ABMC)) { + /* + * resctrl assumes a system that supports assignable counters can + * switch to "default" mode. Ensure that there is a "default" mode + * to switch to. This enforces a dependency between the independent + * X86_FEATURE_ABMC and X86_FEATURE_CQM_MBM_TOTAL/X86_FEATURE_CQM_MBM_LOCAL + * hardware features. + */ + if (rdt_cpu_has(X86_FEATURE_ABMC) && + (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL) || + rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))) { r->mon.mbm_cntr_assignable = true; cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx); r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 4076336fbba6..572a9925bd6c 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1782,15 +1782,13 @@ int resctrl_mon_resource_init(void) mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; if (r->mon.mbm_cntr_assignable) { - if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) - resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); - if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) - resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); - mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; - mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & - (READS_TO_LOCAL_MEM | - READS_TO_LOCAL_S_MEM | - NON_TEMP_WRITE_TO_LOCAL_MEM); + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & + (READS_TO_LOCAL_MEM | + READS_TO_LOCAL_S_MEM | + NON_TEMP_WRITE_TO_LOCAL_MEM); r->mon.mbm_assign_on_mkdir = true; resctrl_file_fflags_init("num_mbm_cntrs", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); -- Gitee