From 84f2daf5dd3adc399300732b29513047d860c059 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 10 Mar 2022 12:48:53 -0800 Subject: [PATCH 01/22] x86/split_lock: Make life miserable for split lockers mainline inclusion from mainline-v5.19-rc1 commit b041b525dab95352fbd666b14dc73ab898df465f bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit b041b525dab95352fbd666b14dc73ab898df465f upstream In https://lore.kernel.org/all/87y22uujkm.ffs@tglx/ Thomas said: Its's simply wishful thinking that stuff gets fixed because of a WARN_ONCE(). This has never worked. The only thing which works is to make stuff fail hard or slow it down in a way which makes it annoying enough to users to complain. He was talking about WBINVD. But it made me think about how we use the split lock detection feature in Linux. Existing code has three options for applications: 1) Don't enable split lock detection (allow arbitrary split locks) 2) Warn once when a process uses split lock, but let the process keep running with split lock detection disabled 3) Kill process that use split locks Option 2 falls into the "wishful thinking" territory that Thomas warns does nothing. But option 3 might not be viable in a situation with legacy applications that need to run. Hence make option 2 much stricter to "slow it down in a way which makes it annoying". Primary reason for this change is to provide better quality of service to the rest of the applications running on the system. Internal testing shows that even with many processes splitting locks, performance for the rest of the system is much more responsive. The new "warn" mode operates like this. When an application tries to execute a bus lock the #AC handler. 1) Delays (interruptibly) 10 ms before moving to next step. 2) Blocks (interruptibly) until it can get the semaphore If interrupted, just return. Assume the signal will either kill the task, or direct execution away from the instruction that is trying to get the bus lock. 3) Disables split lock detection for the current core 4) Schedules a work queue to re-enable split lock detect in 2 jiffies 5) Returns The work queue that re-enables split lock detection also releases the semaphore. There is a corner case where a CPU may be taken offline while split lock detection is disabled. A CPU hotplug handler handles this case. Old behaviour was to only print the split lock warning on the first occurrence of a split lock from a task. Preserve that by adding a flag to the task structure that suppresses subsequent split lock messages from that task. Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220310204854.31752-2-tony.luck@intel.com Signed-off-by: Abhishek Rajput Signed-off-by: mohanasv2 --- arch/x86/kernel/cpu/intel.c | 63 +++++++++++++++++++++++++++++++------ include/linux/sched.h | 3 ++ kernel/fork.c | 5 +++ 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a410c00e9603..c5c6c974a6f7 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -7,10 +7,13 @@ #include #include #include +#include #include #include #include +#include #include +#include #include #include @@ -1179,6 +1182,8 @@ static const struct { static struct ratelimit_state bld_ratelimit; +static DEFINE_SEMAPHORE(buslock_sem); + static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt), ratelimit; @@ -1289,18 +1294,52 @@ static void split_lock_init(void) split_lock_verify_msr(sld_state != sld_off); } +static void __split_lock_reenable(struct work_struct *work) +{ + sld_update_msr(true); + up(&buslock_sem); +} + +/* + * If a CPU goes offline with pending delayed work to re-enable split lock + * detection then the delayed work will be executed on some other CPU. That + * handles releasing the buslock_sem, but because it executes on a + * different CPU probably won't re-enable split lock detection. This is a + * problem on HT systems since the sibling CPU on the same core may then be + * left running with split lock detection disabled. + * + * Unconditionally re-enable detection here. + */ +static int splitlock_cpu_offline(unsigned int cpu) +{ + sld_update_msr(true); + + return 0; +} + +static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable); + static void split_lock_warn(unsigned long ip) { - pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", - current->comm, current->pid, ip); + int cpu; - /* - * Disable the split lock detection for this task so it can make - * progress and set TIF_SLD so the detection is re-enabled via - * switch_to_sld() when the task is scheduled out. - */ + if (!current->reported_split_lock) + pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", + current->comm, current->pid, ip); + current->reported_split_lock = 1; + + /* misery factor #1, sleep 10ms before trying to execute split lock */ + if (msleep_interruptible(10) > 0) + return; + /* Misery factor #2, only allow one buslocked disabled core at a time */ + if (down_interruptible(&buslock_sem) == -EINTR) + return; + cpu = get_cpu(); + schedule_delayed_work_on(cpu, &split_lock_reenable, 2); + + /* Disable split lock detection on this CPU to make progress */ sld_update_msr(false); - set_tsk_thread_flag(current, TIF_SLD); + put_cpu(); } bool handle_guest_split_lock(unsigned long ip) @@ -1441,10 +1480,14 @@ static void sld_state_show(void) pr_info("disabled\n"); break; case sld_warn: - if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n"); - else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "x86/splitlock", NULL, splitlock_cpu_offline) < 0) + pr_warn("No splitlock CPU offline handler\n"); + } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { pr_info("#DB: warning on user-space bus_locks\n"); + } break; case sld_fatal: if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { diff --git a/include/linux/sched.h b/include/linux/sched.h index c03f65cc0bb5..6aa0549775f5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -888,6 +888,9 @@ struct task_struct { #ifdef CONFIG_IOMMU_SVA KABI_FILL_HOLE(unsigned pasid_activated:1) #endif +#ifdef CONFIG_CPU_SUP_INTEL + unsigned reported_split_lock:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/kernel/fork.c b/kernel/fork.c index b4af7886d6e3..b593ed7a3fb3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1020,6 +1020,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->xinfo = NULL; #endif + +#ifdef CONFIG_CPU_SUP_INTEL + tsk->reported_split_lock = 0; +#endif + return tsk; free_stack: -- Gitee From fd789194fd4fecbd2456b358e033fec4c88ac69c Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 10 Mar 2022 12:48:54 -0800 Subject: [PATCH 02/22] x86/split-lock: Remove unused TIF_SLD bit mainline inclusion from mainline-v5.19-rc1 commit ef79970d7ccdc4e8855aa6079fc2f4797a6807fb bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit ef79970d7ccdc4e8855aa6079fc2f4797a6807fb upstream Changes to the "warn" mode of split lock handling mean that TIF_SLD is never set. Remove the bit, and the functions that use it. Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220310204854.31752-3-tony.luck@intel.com Signed-off-by: Abhishek Rajput Signed-off-by: mohanasv2 --- arch/x86/include/asm/cpu.h | 2 -- arch/x86/include/asm/thread_info.h | 4 +--- arch/x86/kernel/cpu/intel.c | 12 ------------ arch/x86/kernel/process.c | 3 --- 4 files changed, 1 insertion(+), 20 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index bdd4bd691e7b..c436b1bd6c7a 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -42,14 +42,12 @@ unsigned int x86_model(unsigned int sig); unsigned int x86_stepping(unsigned int sig); #ifdef CONFIG_CPU_SUP_INTEL extern void __init sld_setup(struct cpuinfo_x86 *c); -extern void switch_to_sld(unsigned long tifn); extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); extern bool handle_guest_split_lock(unsigned long ip); extern void handle_bus_lock(struct pt_regs *regs); u8 get_this_hybrid_cpu_type(void); #else static inline void __init sld_setup(struct cpuinfo_x86 *c) {} -static inline void switch_to_sld(unsigned long tifn) {} static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) { return false; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 5501f825ae27..32647d6204f6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -98,7 +98,6 @@ struct thread_info { #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ -#define TIF_SLD 18 /* Restore split lock detection on context switch */ #define TIF_NOTIFY_SIGNAL 19 /* signal notifications exist */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ @@ -130,7 +129,6 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) -#define _TIF_SLD (1 << TIF_SLD) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) @@ -143,7 +141,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \ - _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD) + _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) /* * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c5c6c974a6f7..fb48e5a5ba4d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1413,18 +1413,6 @@ void handle_bus_lock(struct pt_regs *regs) } } -/* - * This function is called only when switching between tasks with - * different split-lock detection modes. It sets the MSR for the - * mode of the new task. This is right most of the time, but since - * the MSR is shared by hyperthreads on a physical core there can - * be glitches when the two threads need different modes. - */ -void switch_to_sld(unsigned long tifn) -{ - sld_update_msr(!(tifn & _TIF_SLD)); -} - /* * CPU models that are known to have the per-core split-lock detection * feature even though they do not enumerate IA32_CORE_CAPABILITIES. diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f9343b826651..1dbf7151063a 100755 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -691,9 +691,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) /* Enforce MSR update to ensure consistent state */ __speculation_ctrl_update(~tifn, tifn); } - - if ((tifp ^ tifn) & _TIF_SLD) - switch_to_sld(tifn); } /* -- Gitee From 51add2e32ef9102c126623364563b370ba3ac53d Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Mon, 24 Oct 2022 17:02:54 -0300 Subject: [PATCH 03/22] x86/split_lock: Add sysctl to control the misery mode mainline inclusion from mainline-v6.2-rc1 commit 727209376f4998bc84db1d5d8af15afea846a92b bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 727209376f4998bc84db1d5d8af15afea846a92b upstream Commit b041b525dab9 ("x86/split_lock: Make life miserable for split lockers") changed the way the split lock detector works when in "warn" mode; basically, it not only shows the warn message, but also intentionally introduces a slowdown through sleeping plus serialization mechanism on such task. Based on discussions in [0], seems the warning alone wasn't enough motivation for userspace developers to fix their applications. This slowdown is enough to totally break some proprietary (aka. unfixable) userspace[1]. Happens that originally the proposal in [0] was to add a new mode which would warns + slowdown the "split locking" task, keeping the old warn mode untouched. In the end, that idea was discarded and the regular/default "warn" mode now slows down the applications. This is quite aggressive with regards proprietary/legacy programs that basically are unable to properly run in kernel with this change. While it is understandable that a malicious application could DoS by split locking, it seems unacceptable to regress old/proprietary userspace programs through a default configuration that previously worked. An example of such breakage was reported in [1]. Add a sysctl to allow controlling the "misery mode" behavior, as per Thomas suggestion on [2]. This way, users running legacy and/or proprietary software are allowed to still execute them with a decent performance while still observing the warning messages on kernel log. [0] https://lore.kernel.org/lkml/20220217012721.9694-1-tony.luck@intel.com/ [1] https://github.com/doitsujin/dxvk/issues/2938 [2] https://lore.kernel.org/lkml/87pmf4bter.ffs@tglx/ [ dhansen: minor changelog tweaks, including clarifying the actual problem ] Fixes: b041b525dab9 ("x86/split_lock: Make life miserable for split lockers") Suggested-by: Thomas Gleixner Signed-off-by: Guilherme G. Piccoli Signed-off-by: Dave Hansen Reviewed-by: Tony Luck Tested-by: Andre Almeida Link: https://lore.kernel.org/all/20221024200254.635256-1-gpiccoli%40igalia.com Signed-off-by: Abhishek Rajput Signed-off-by: mohanasv2 --- Documentation/admin-guide/sysctl/kernel.rst | 23 ++++++++ arch/x86/kernel/cpu/intel.c | 63 +++++++++++++++++---- 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 6a8c60b0cda7..41499c82185a 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1305,6 +1305,29 @@ the watchdog timer function, otherwise the NMI watchdog — if enabled — can detect a hard lockup condition. +split_lock_mitigate (x86 only) +============================== + +On x86, each "split lock" imposes a system-wide performance penalty. On larger +systems, large numbers of split locks from unprivileged users can result in +denials of service to well-behaved and potentially more important users. + +The kernel mitigates these bad users by detecting split locks and imposing +penalties: forcing them to wait and only allowing one core to execute split +locks at a time. + +These mitigations can make those bad applications unbearably slow. Setting +split_lock_mitigate=0 may restore some application performance, but will also +increase system exposure to denial of service attacks from split lock users. + += =================================================================== +0 Disable the mitigation mode - just warns the split lock on kernel log + and exposes the system to denials of service from the split lockers. +1 Enable the mitigation mode (this is the default) - penalizes the split + lockers with intentional performance degradation. += =================================================================== + + stack_erasing ============= diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fb48e5a5ba4d..636b2849ac1e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1182,8 +1182,32 @@ static const struct { static struct ratelimit_state bld_ratelimit; +static unsigned int sysctl_sld_mitigate = 1; static DEFINE_SEMAPHORE(buslock_sem); +#ifdef CONFIG_PROC_SYSCTL +static struct ctl_table sld_sysctls[] = { + { + .procname = "split_lock_mitigate", + .data = &sysctl_sld_mitigate, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static int __init sld_mitigate_sysctl_init(void) +{ + register_sysctl_init("kernel", sld_sysctls); + return 0; +} + +late_initcall(sld_mitigate_sysctl_init); +#endif + static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt), ratelimit; @@ -1294,12 +1318,20 @@ static void split_lock_init(void) split_lock_verify_msr(sld_state != sld_off); } -static void __split_lock_reenable(struct work_struct *work) +static void __split_lock_reenable_unlock(struct work_struct *work) { sld_update_msr(true); up(&buslock_sem); } +static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock); + +static void __split_lock_reenable(struct work_struct *work) +{ + sld_update_msr(true); +} +static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable); + /* * If a CPU goes offline with pending delayed work to re-enable split lock * detection then the delayed work will be executed on some other CPU. That @@ -1317,10 +1349,9 @@ static int splitlock_cpu_offline(unsigned int cpu) return 0; } -static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable); - static void split_lock_warn(unsigned long ip) { + struct delayed_work *work; int cpu; if (!current->reported_split_lock) @@ -1328,14 +1359,26 @@ static void split_lock_warn(unsigned long ip) current->comm, current->pid, ip); current->reported_split_lock = 1; - /* misery factor #1, sleep 10ms before trying to execute split lock */ - if (msleep_interruptible(10) > 0) - return; - /* Misery factor #2, only allow one buslocked disabled core at a time */ - if (down_interruptible(&buslock_sem) == -EINTR) - return; + if (sysctl_sld_mitigate) { + /* + * misery factor #1: + * sleep 10ms before trying to execute split lock. + */ + if (msleep_interruptible(10) > 0) + return; + /* + * Misery factor #2: + * only allow one buslocked disabled core at a time. + */ + if (down_interruptible(&buslock_sem) == -EINTR) + return; + work = &sl_reenable_unlock; + } else { + work = &sl_reenable; + } + cpu = get_cpu(); - schedule_delayed_work_on(cpu, &split_lock_reenable, 2); + schedule_delayed_work_on(cpu, work, 2); /* Disable split lock detection on this CPU to make progress */ sld_update_msr(false); -- Gitee From 22e9d9ecc4c2b7d63dff63a123db8e8ed3d9d9d5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 Mar 2023 12:14:42 +0200 Subject: [PATCH 04/22] Change DEFINE_SEMAPHORE() to take a number argument mainline inclusion from mainline-v6.4-rc1 commit 48380368dec14859723b9e3fbd43e042638d9a76 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 48380368dec14859723b9e3fbd43e042638d9a76 upstream Fundamentally semaphores are a counted primitive, but DEFINE_SEMAPHORE() does not expose this and explicitly creates a binary semaphore. Change DEFINE_SEMAPHORE() to take a number argument and use that in the few places that open-coded it using __SEMAPHORE_INITIALIZER(). [Backport Changes] 1. In the non-upstream file drivers/net/ethernet/huawei/bma/kbox_drv/kbox_ram_op.c, added in commit a5b9149cc6ef0, the macro DEFINE_SEMAPHORE() was updated to accept an initializer argument (i.e., DEFINE_SEMAPHORE(user_sem, 1)), in alignment with the current upstream commit, which modifies the DEFINE_SEMAPHORE() macro to require an initial value. This change ensures compatibility with the updated macro definition. 2. In the non-upstream file drivers/net/ethernet/linkdata/sxe/sxepf/sxe_host_hdc.c, added in commit 8cee206b55582, the conditional block using #ifdef DEFINE_SEMAPHORE_NEED_CNT and the legacy DEFINE_SEMAPHORE(g_hdc_sema) definition were removed. This is because the current upstream commit redefines DEFINE_SEMAPHORE() to always require an initializer count, making this workaround macro unnecessary. This change ensures compatibility with the updated macro definition. 3. In the current backport, changes intended for drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c, were instead applied to the existing file drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c, since the upstream commit 7b9148dcb74a00 (staging: vchiq: Combine vchiq platform code into single file), which Combine the vchiq platform initialization code into a single file by merging vchiq_2835_arm.c into vchiq_arm.c, has not been applied in the current source tree. Backporting commit 7b9148dcb74a00 introduces additional dependencies and unnecessary conflicts. So, the necessary DEFINE_SEMAPHORE() macro changes were applied directly to vchiq_2835_arm.c. Signed-off-by: Peter Zijlstra (Intel) [mcgrof: add some tribal knowledge about why some folks prefer binary sempahores over mutexes] Reviewed-by: Sergey Senozhatsky Reviewed-by: Davidlohr Bueso Signed-off-by: Luis Chamberlain Signed-off-by: Abhishek Rajput Signed-off-by: mohanasv2 --- arch/mips/cavium-octeon/setup.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- drivers/firmware/efi/runtime-wrappers.c | 2 +- drivers/firmware/efi/vars.c | 2 +- drivers/macintosh/adb.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- drivers/net/ethernet/huawei/bma/kbox_drv/kbox_ram_op.c | 2 +- drivers/net/ethernet/linkdata/sxe/sxepf/sxe_host_hdc.c | 4 ---- drivers/platform/x86/intel/ifs/sysfs.c | 2 +- drivers/scsi/esas2r/esas2r_ioctl.c | 2 +- .../vc04_services/interface/vchiq_arm/vchiq_2835_arm.c | 2 +- include/linux/semaphore.h | 10 ++++++++-- kernel/printk/printk.c | 2 +- net/rxrpc/call_object.c | 6 ++---- 14 files changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index b329cdb6134d..14f9aa82d040 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -72,7 +72,7 @@ extern void pci_console_init(const char *arg); static unsigned long long max_memory = ULLONG_MAX; static unsigned long long reserve_low_mem; -DEFINE_SEMAPHORE(octeon_bootbus_sem); +DEFINE_SEMAPHORE(octeon_bootbus_sem, 1); EXPORT_SYMBOL(octeon_bootbus_sem); static struct octeon_boot_descriptor *octeon_boot_desc_ptr; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 636b2849ac1e..08d8729e8c08 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1183,7 +1183,7 @@ static const struct { static struct ratelimit_state bld_ratelimit; static unsigned int sysctl_sld_mitigate = 1; -static DEFINE_SEMAPHORE(buslock_sem); +static DEFINE_SEMAPHORE(buslock_sem, 1); #ifdef CONFIG_PROC_SYSCTL static struct ctl_table sld_sysctls[] = { diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 60075e0e4943..1a43bba92ca2 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -157,7 +157,7 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call) * none of the remaining functions are actually ever called at runtime. * So let's just use a single lock to serialize all Runtime Services calls. */ -static DEFINE_SEMAPHORE(efi_runtime_lock); +static DEFINE_SEMAPHORE(efi_runtime_lock, 1); /* * Expose the EFI runtime lock to the UV platform diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index eaed1ddcc803..d1789135498e 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -30,7 +30,7 @@ static struct efivars *__efivars; * 2) ->ops calls * 3) (un)registration of __efivars */ -static DEFINE_SEMAPHORE(efivars_lock); +static DEFINE_SEMAPHORE(efivars_lock, 1); static bool validate_device_path(efi_char16_t *var_name, int match, u8 *buffer, diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index afb0942ccc29..6ae96ba9a260 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -80,7 +80,7 @@ static struct adb_driver *adb_controller; BLOCKING_NOTIFIER_HEAD(adb_client_list); static int adb_got_sleep; static int adb_inited; -static DEFINE_SEMAPHORE(adb_probe_mutex); +static DEFINE_SEMAPHORE(adb_probe_mutex, 1); static int sleepy_trackpad; static int autopoll_devs; int __adb_probe_sync; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c8cbf3ed128d..cfae04862ac0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -298,7 +298,7 @@ const u32 dmae_reg_go_c[] = { /* Global resources for unloading a previously loaded device */ #define BNX2X_PREV_WAIT_NEEDED 1 -static DEFINE_SEMAPHORE(bnx2x_prev_sem); +static DEFINE_SEMAPHORE(bnx2x_prev_sem, 1); static LIST_HEAD(bnx2x_prev_list); /* Forward declaration */ diff --git a/drivers/net/ethernet/huawei/bma/kbox_drv/kbox_ram_op.c b/drivers/net/ethernet/huawei/bma/kbox_drv/kbox_ram_op.c index 9f6dfe55e3fb..57723f9e1d7a 100644 --- a/drivers/net/ethernet/huawei/bma/kbox_drv/kbox_ram_op.c +++ b/drivers/net/ethernet/huawei/bma/kbox_drv/kbox_ram_op.c @@ -30,7 +30,7 @@ #endif static DEFINE_SPINLOCK(g_kbox_super_block_lock); -static DEFINE_SEMAPHORE(user_sem); +static DEFINE_SEMAPHORE(user_sem, 1); union char_int_transfer_u { int data_int; diff --git a/drivers/net/ethernet/linkdata/sxe/sxepf/sxe_host_hdc.c b/drivers/net/ethernet/linkdata/sxe/sxepf/sxe_host_hdc.c index 43bcfa9eb7b1..5bf55b57d4cd 100644 --- a/drivers/net/ethernet/linkdata/sxe/sxepf/sxe_host_hdc.c +++ b/drivers/net/ethernet/linkdata/sxe/sxepf/sxe_host_hdc.c @@ -36,11 +36,7 @@ static DEFINE_PER_CPU(union sxe_trace_info, sxe_trace_id); #define NS_TO_MS_UNIT (1000000) -#ifdef DEFINE_SEMAPHORE_NEED_CNT DEFINE_SEMAPHORE(g_hdc_sema, 1); -#else -DEFINE_SEMAPHORE(g_hdc_sema); -#endif static void sxe_trace_id_alloc(u64 *trace_id) { diff --git a/drivers/platform/x86/intel/ifs/sysfs.c b/drivers/platform/x86/intel/ifs/sysfs.c index d856d6b8fc03..01b7502f46b0 100644 --- a/drivers/platform/x86/intel/ifs/sysfs.c +++ b/drivers/platform/x86/intel/ifs/sysfs.c @@ -13,7 +13,7 @@ * Protects against simultaneous tests on multiple cores, or * reloading can file while a test is in progress */ -static DEFINE_SEMAPHORE(ifs_sem); +static DEFINE_SEMAPHORE(ifs_sem, 1); /* * The sysfs interface to check additional details of last test diff --git a/drivers/scsi/esas2r/esas2r_ioctl.c b/drivers/scsi/esas2r/esas2r_ioctl.c index 08f4e43c7d9e..b8faaecc5ae7 100644 --- a/drivers/scsi/esas2r/esas2r_ioctl.c +++ b/drivers/scsi/esas2r/esas2r_ioctl.c @@ -56,7 +56,7 @@ dma_addr_t esas2r_buffered_ioctl_addr; u32 esas2r_buffered_ioctl_size; struct pci_dev *esas2r_buffered_ioctl_pcid; -static DEFINE_SEMAPHORE(buffered_ioctl_semaphore); +static DEFINE_SEMAPHORE(buffered_ioctl_semaphore, 1); typedef int (*BUFFERED_IOCTL_CALLBACK)(struct esas2r_adapter *, struct esas2r_request *, struct esas2r_sg_context *, diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c index 8782ebe0b39a..56a574daa6fb 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c @@ -64,7 +64,7 @@ static char *g_free_fragments; static struct semaphore g_free_fragments_sema; static struct device *g_dev; -static DEFINE_SEMAPHORE(g_free_fragments_mutex); +static DEFINE_SEMAPHORE(g_free_fragments_mutex, 1); static irqreturn_t vchiq_doorbell_irq(int irq, void *dev_id); diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 6694d0019a68..04655faadc2d 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -25,8 +25,14 @@ struct semaphore { .wait_list = LIST_HEAD_INIT((name).wait_list), \ } -#define DEFINE_SEMAPHORE(name) \ - struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) +/* + * Unlike mutexes, binary semaphores do not have an owner, so up() can + * be called in a different thread from the one which called down(). + * It is also safe to call down_trylock() and up() from interrupt + * context. + */ +#define DEFINE_SEMAPHORE(_name, _n) \ + struct semaphore _name = __SEMAPHORE_INITIALIZER(_name, _n) static inline void sema_init(struct semaphore *sem, int val) { diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b1795f846f94..c1edf3eb2ea6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -83,7 +83,7 @@ EXPORT_SYMBOL(oops_in_progress); * provides serialisation for access to the entire console * driver system. */ -static DEFINE_SEMAPHORE(console_sem); +static DEFINE_SEMAPHORE(console_sem, 1); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 10dad2834d5b..792dbef1814a 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -40,10 +40,8 @@ const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { struct kmem_cache *rxrpc_call_jar; -static struct semaphore rxrpc_call_limiter = - __SEMAPHORE_INITIALIZER(rxrpc_call_limiter, 1000); -static struct semaphore rxrpc_kernel_call_limiter = - __SEMAPHORE_INITIALIZER(rxrpc_kernel_call_limiter, 1000); +static DEFINE_SEMAPHORE(rxrpc_call_limiter, 1000); +static DEFINE_SEMAPHORE(rxrpc_kernel_call_limiter, 1000); static void rxrpc_call_timer_expired(struct timer_list *t) { -- Gitee From c84afab5097487c27d144407e232be9ec236fe8a Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 16 Apr 2024 14:19:03 -0700 Subject: [PATCH 05/22] x86/cpu/vfm: Add/initialize x86_vfm field to struct cpuinfo_x86 mainline inclusion from mainline-v6.10-rc1 commit a9d0adce69075192961f3be466c4810a21b7bc9e bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit a9d0adce69075192961f3be466c4810a21b7bc9e upstream Refactor struct cpuinfo_x86 so that the vendor, family, and model fields are overlaid in a union with a 32-bit field that combines all three (together with a one byte reserved field in the upper byte). This will make it easy, cheap, and reliable to check all three values at once. See https://lore.kernel.org/r/Zgr6kT8oULbnmEXx@agluck-desk3 for why the ordering is (low-to-high bits): (vendor, family, model) [ bp: Move comments over the line, add the backstory about the particular order of the fields. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240416211941.9369-2-tony.luck@intel.com Signed-off-by: Abhishek Rajput Signed-off-by: mohanasv2 --- arch/x86/include/asm/processor.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9fe94ae7a6ce..44f8df2c0dae 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -80,9 +80,23 @@ extern u16 __read_mostly tlb_lld_1g[NR_INFO]; */ struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; + union { + /* + * The particular ordering (low-to-high) of (vendor, + * family, model) is done in case range of models, like + * it is usually done on AMD, need to be compared. + */ + struct { + __u8 x86_model; + /* CPU family */ + __u8 x86; + /* CPU vendor */ + __u8 x86_vendor; + __u8 x86_reserved; + }; + /* combined vendor, family, model */ + __u32 x86_vfm; + }; __u8 x86_stepping; #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ -- Gitee From ef1b7ce005aa37db2c9483fcceadfccfcf7cf6b3 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 16 Apr 2024 14:19:04 -0700 Subject: [PATCH 06/22] x86/cpu/vfm: Add new macros to work with (vendor/family/model) values mainline inclusion from mainline-v6.10-rc1 commit e6dfdc2e89a0adedf455814c91b977d6a584cc88 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit e6dfdc2e89a0adedf455814c91b977d6a584cc88 upstream To avoid adding a slew of new macros for each new Intel CPU family switch over from providing CPU model number #defines to a new scheme that encodes vendor, family, and model in a single number. [ bp: s/casted/cast/g ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240416211941.9369-3-tony.luck@intel.com Signed-off-by: Abhishek Rajput Signed-off-by: mohanasv2 --- arch/x86/include/asm/cpu_device_id.h | 93 ++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index eb8fcede9e3b..dd7b9463696f 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -2,6 +2,39 @@ #ifndef _ASM_X86_CPU_DEVICE_ID #define _ASM_X86_CPU_DEVICE_ID +/* + * Can't use because it generates expressions that + * cannot be used in structure initializers. Bitfield construction + * here must match the union in struct cpuinfo_86: + * union { + * struct { + * __u8 x86_model; + * __u8 x86; + * __u8 x86_vendor; + * __u8 x86_reserved; + * }; + * __u32 x86_vfm; + * }; + */ +#define VFM_MODEL_BIT 0 +#define VFM_FAMILY_BIT 8 +#define VFM_VENDOR_BIT 16 +#define VFM_RSVD_BIT 24 + +#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) +#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) +#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) + +#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) +#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) +#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) + +#define VFM_MAKE(_vendor, _family, _model) ( \ + ((_model) << VFM_MODEL_BIT) | \ + ((_family) << VFM_FAMILY_BIT) | \ + ((_vendor) << VFM_VENDOR_BIT) \ +) + /* * Declare drivers belonging to specific x86 CPUs * Similar in spirit to pci_device_id and related PCI functions @@ -49,6 +82,16 @@ .driver_data = (unsigned long) _data \ } +#define X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ + _steppings, _feature, _data) { \ + .vendor = _vendor, \ + .family = _family, \ + .model = _model, \ + .steppings = _steppings, \ + .feature = _feature, \ + .driver_data = (unsigned long) _data \ +} + /** * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY @@ -164,6 +207,56 @@ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ steppings, X86_FEATURE_ANY, data) +/** + * X86_MATCH_VFM - Match encoded vendor/family/model + * @vfm: Encoded 8-bits each for vendor, family, model + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + * + * Stepping and feature are set to wildcards + */ +#define X86_MATCH_VFM(vfm, data) \ + X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ + VFM_VENDOR(vfm), \ + VFM_FAMILY(vfm), \ + VFM_MODEL(vfm), \ + X86_STEPPING_ANY, X86_FEATURE_ANY, data) + +/** + * X86_MATCH_VFM_STEPPINGS - Match encoded vendor/family/model/stepping + * @vfm: Encoded 8-bits each for vendor, family, model + * @steppings: Bitmask of steppings to match + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + * + * feature is set to wildcard + */ +#define X86_MATCH_VFM_STEPPINGS(vfm, steppings, data) \ + X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ + VFM_VENDOR(vfm), \ + VFM_FAMILY(vfm), \ + VFM_MODEL(vfm), \ + steppings, X86_FEATURE_ANY, data) + +/** + * X86_MATCH_VFM_FEATURE - Match encoded vendor/family/model/feature + * @vfm: Encoded 8-bits each for vendor, family, model + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + * + * Steppings is set to wildcard + */ +#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \ + X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ + VFM_VENDOR(vfm), \ + VFM_FAMILY(vfm), \ + VFM_MODEL(vfm), \ + X86_STEPPING_ANY, feature, data) + /* * Match specific microcode revisions. * -- Gitee From abcd39f8e58b2501223036cb49da940511d5b0be Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 16 Apr 2024 14:19:05 -0700 Subject: [PATCH 07/22] x86/cpu/vfm: Update arch/x86/include/asm/intel-family.h mainline inclusion from mainline-v6.10-rc1 commit f055b6260eb3ef20a6e310d1e555a5d5a0a28ca0 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit f055b6260eb3ef20a6e310d1e555a5d5a0a28ca0 upstream New CPU #defines encode vendor and family as well as model. Update the example usage comment in arch/x86/kernel/cpu/match.c Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240416211941.9369-4-tony.luck@intel.com Signed-off-by: Abhishek Rajput Signed-off-by: mohanasv2 --- arch/x86/include/asm/intel-family.h | 84 +++++++++++++++++++++++++++++ arch/x86/kernel/cpu/match.c | 3 +- 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 1eae393a81a9..ef13cd99e85d 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -38,128 +38,212 @@ * their own names :-( */ +#define IFM(_fam, _model) VFM_MAKE(X86_VENDOR_INTEL, _fam, _model) + /* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ #define INTEL_FAM6_ANY X86_MODEL_ANY +/* Wildcard match for FAM6 so X86_MATCH_VFM(ANY) works */ +#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY) #define INTEL_FAM6_CORE_YONAH 0x0E +#define INTEL_CORE_YONAH IFM(6, 0x0E) #define INTEL_FAM6_CORE2_MEROM 0x0F +#define INTEL_CORE2_MEROM IFM(6, 0x0F) #define INTEL_FAM6_CORE2_MEROM_L 0x16 +#define INTEL_CORE2_MEROM_L IFM(6, 0x16) #define INTEL_FAM6_CORE2_PENRYN 0x17 +#define INTEL_CORE2_PENRYN IFM(6, 0x17) #define INTEL_FAM6_CORE2_DUNNINGTON 0x1D +#define INTEL_CORE2_DUNNINGTON IFM(6, 0x1D) #define INTEL_FAM6_NEHALEM 0x1E +#define INTEL_NEHALEM IFM(6, 0x1E) #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ +#define INTEL_NEHALEM_G IFM(6, 0x1F) /* Auburndale / Havendale */ #define INTEL_FAM6_NEHALEM_EP 0x1A +#define INTEL_NEHALEM_EP IFM(6, 0x1A) #define INTEL_FAM6_NEHALEM_EX 0x2E +#define INTEL_NEHALEM_EX IFM(6, 0x2E) #define INTEL_FAM6_WESTMERE 0x25 +#define INTEL_WESTMERE IFM(6, 0x25) #define INTEL_FAM6_WESTMERE_EP 0x2C +#define INTEL_WESTMERE_EP IFM(6, 0x2C) #define INTEL_FAM6_WESTMERE_EX 0x2F +#define INTEL_WESTMERE_EX IFM(6, 0x2F) #define INTEL_FAM6_SANDYBRIDGE 0x2A +#define INTEL_SANDYBRIDGE IFM(6, 0x2A) #define INTEL_FAM6_SANDYBRIDGE_X 0x2D +#define INTEL_SANDYBRIDGE_X IFM(6, 0x2D) #define INTEL_FAM6_IVYBRIDGE 0x3A +#define INTEL_IVYBRIDGE IFM(6, 0x3A) #define INTEL_FAM6_IVYBRIDGE_X 0x3E +#define INTEL_IVYBRIDGE_X IFM(6, 0x3E) #define INTEL_FAM6_HASWELL 0x3C +#define INTEL_HASWELL IFM(6, 0x3C) #define INTEL_FAM6_HASWELL_X 0x3F +#define INTEL_HASWELL_X IFM(6, 0x3F) #define INTEL_FAM6_HASWELL_L 0x45 +#define INTEL_HASWELL_L IFM(6, 0x45) #define INTEL_FAM6_HASWELL_G 0x46 +#define INTEL_HASWELL_G IFM(6, 0x46) #define INTEL_FAM6_BROADWELL 0x3D +#define INTEL_BROADWELL IFM(6, 0x3D) #define INTEL_FAM6_BROADWELL_G 0x47 +#define INTEL_BROADWELL_G IFM(6, 0x47) #define INTEL_FAM6_BROADWELL_X 0x4F +#define INTEL_BROADWELL_X IFM(6, 0x4F) #define INTEL_FAM6_BROADWELL_D 0x56 +#define INTEL_BROADWELL_D IFM(6, 0x56) #define INTEL_FAM6_SKYLAKE_L 0x4E /* Sky Lake */ +#define INTEL_SKYLAKE_L IFM(6, 0x4E) /* Sky Lake */ #define INTEL_FAM6_SKYLAKE 0x5E /* Sky Lake */ +#define INTEL_SKYLAKE IFM(6, 0x5E) /* Sky Lake */ #define INTEL_FAM6_SKYLAKE_X 0x55 /* Sky Lake */ +#define INTEL_SKYLAKE_X IFM(6, 0x55) /* Sky Lake */ #define INTEL_FAM6_KABYLAKE_L 0x8E /* Sky Lake */ +#define INTEL_KABYLAKE_L IFM(6, 0x8E) /* Sky Lake */ /* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */ /* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */ /* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */ #define INTEL_FAM6_KABYLAKE 0x9E /* Sky Lake */ +#define INTEL_KABYLAKE IFM(6, 0x9E) /* Sky Lake */ /* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */ #define INTEL_FAM6_COMETLAKE 0xA5 /* Sky Lake */ +#define INTEL_COMETLAKE IFM(6, 0xA5) /* Sky Lake */ #define INTEL_FAM6_COMETLAKE_L 0xA6 /* Sky Lake */ +#define INTEL_COMETLAKE_L IFM(6, 0xA6) /* Sky Lake */ #define INTEL_FAM6_CANNONLAKE_L 0x66 /* Palm Cove */ +#define INTEL_CANNONLAKE_L IFM(6, 0x66) /* Palm Cove */ #define INTEL_FAM6_ICELAKE_X 0x6A /* Sunny Cove */ +#define INTEL_ICELAKE_X IFM(6, 0x6A) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_D 0x6C /* Sunny Cove */ +#define INTEL_ICELAKE_D IFM(6, 0x6C) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE 0x7D /* Sunny Cove */ +#define INTEL_ICELAKE IFM(6, 0x7D) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ +#define INTEL_ICELAKE_L IFM(6, 0x7E) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ +#define INTEL_ICELAKE_NNPI IFM(6, 0x9D) /* Sunny Cove */ #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ +#define INTEL_ROCKETLAKE IFM(6, 0xA7) /* Cypress Cove */ #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ +#define INTEL_TIGERLAKE_L IFM(6, 0x8C) /* Willow Cove */ #define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */ +#define INTEL_TIGERLAKE IFM(6, 0x8D) /* Willow Cove */ #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ +#define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */ #define INTEL_FAM6_EMERALDRAPIDS_X 0xCF +#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF) #define INTEL_FAM6_GRANITERAPIDS_X 0xAD +#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) #define INTEL_FAM6_GRANITERAPIDS_D 0xAE +#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE) /* "Hybrid" Processors (P-Core/E-Core) */ #define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ +#define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ +#define INTEL_ALDERLAKE IFM(6, 0x97) /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ +#define INTEL_ALDERLAKE_L IFM(6, 0x9A) /* Golden Cove / Gracemont */ #define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */ +#define INTEL_RAPTORLAKE IFM(6, 0xB7) /* Raptor Cove / Enhanced Gracemont */ #define INTEL_FAM6_RAPTORLAKE_P 0xBA +#define INTEL_RAPTORLAKE_P IFM(6, 0xBA) +#define INTEL_RAPTORLAKE_S IFM(6, 0xBF) #define INTEL_FAM6_METEORLAKE 0xAC +#define INTEL_METEORLAKE IFM(6, 0xAC) #define INTEL_FAM6_METEORLAKE_L 0xAA +#define INTEL_METEORLAKE_L IFM(6, 0xAA) +#define INTEL_ARROWLAKE_H IFM(6, 0xC5) +#define INTEL_ARROWLAKE IFM(6, 0xC6) +#define INTEL_ARROWLAKE_U IFM(6, 0xB5) +#define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ +#define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */ #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ +#define INTEL_ATOM_BONNELL_MID IFM(6, 0x26) /* Silverthorne, Lincroft */ #define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */ +#define INTEL_ATOM_SALTWELL IFM(6, 0x36) /* Cedarview */ #define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */ +#define INTEL_ATOM_SALTWELL_MID IFM(6, 0x27) /* Penwell */ #define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ +#define INTEL_ATOM_SALTWELL_TABLET IFM(6, 0x35) /* Cloverview */ #define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ +#define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */ #define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */ +#define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */ #define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ +#define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ +#define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */ #define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ +#define INTEL_ATOM_AIRMONT_MID IFM(6, 0x5A) /* Moorefield */ #define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */ +#define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ +#define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */ #define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */ +#define INTEL_ATOM_GOLDMONT_D IFM(6, 0x5F) /* Denverton */ /* Note: the micro-architecture is "Goldmont Plus" */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ +#define INTEL_ATOM_GOLDMONT_PLUS IFM(6, 0x7A) /* Gemini Lake */ #define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */ +#define INTEL_ATOM_TREMONT_D IFM(6, 0x86) /* Jacobsville */ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ +#define INTEL_ATOM_TREMONT IFM(6, 0x96) /* Elkhart Lake */ #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ +#define INTEL_ATOM_TREMONT_L IFM(6, 0x9C) /* Jasper Lake */ #define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */ +#define INTEL_ATOM_GRACEMONT IFM(6, 0xBE) /* Alderlake N */ #define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ +#define INTEL_ATOM_CRESTMONT_X IFM(6, 0xAF) /* Sierra Forest */ #define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ +#define INTEL_ATOM_CRESTMONT IFM(6, 0xB6) /* Grand Ridge */ #define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */ +#define INTEL_ATOM_DARKMONT_X IFM(6, 0xDD) /* Clearwater Forest */ /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ +#define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */ #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ +#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */ /* Family 5 */ #define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ +#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index ad6776081e60..2243083f0bc2 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -17,8 +17,7 @@ * * A typical table entry would be to match a specific CPU * - * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL, - * X86_FEATURE_ANY, NULL); + * X86_MATCH_VFM_FEATURE(INTEL_BROADWELL, X86_FEATURE_ANY, NULL); * * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor) -- Gitee From 9894c3afb21218ad7b9905bfc053057d47309573 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 20 May 2024 15:45:59 -0700 Subject: [PATCH 08/22] x86/cpu/intel: Switch to new Intel CPU model defines mainline inclusion from mainline-v6.11-rc1 commit 6568fc18c2f62b4f35092e9680fe39f3500f4767 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 6568fc18c2f62b4f35092e9680fe39f3500f4767 upstream New CPU #defines encode vendor and family as well as model. Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240520224620.9480-29-tony.luck%40intel.com Signed-off-by: Abhishek Rajput Signed-off-by: mohanasv2 --- arch/x86/kernel/cpu/intel.c | 108 ++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 55 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 08d8729e8c08..059e9cecbf0c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -72,19 +72,19 @@ static bool cpu_model_supports_sld __ro_after_init; */ static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) { - switch (c->x86_model) { - case INTEL_FAM6_CORE_YONAH: - case INTEL_FAM6_CORE2_MEROM: - case INTEL_FAM6_CORE2_MEROM_L: - case INTEL_FAM6_CORE2_PENRYN: - case INTEL_FAM6_CORE2_DUNNINGTON: - case INTEL_FAM6_NEHALEM: - case INTEL_FAM6_NEHALEM_G: - case INTEL_FAM6_NEHALEM_EP: - case INTEL_FAM6_NEHALEM_EX: - case INTEL_FAM6_WESTMERE: - case INTEL_FAM6_WESTMERE_EP: - case INTEL_FAM6_SANDYBRIDGE: + switch (c->x86_vfm) { + case INTEL_CORE_YONAH: + case INTEL_CORE2_MEROM: + case INTEL_CORE2_MEROM_L: + case INTEL_CORE2_PENRYN: + case INTEL_CORE2_DUNNINGTON: + case INTEL_NEHALEM: + case INTEL_NEHALEM_G: + case INTEL_NEHALEM_EP: + case INTEL_NEHALEM_EX: + case INTEL_WESTMERE: + case INTEL_WESTMERE_EP: + case INTEL_SANDYBRIDGE: setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); } } @@ -106,9 +106,9 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) */ if (c->x86 != 6) return; - switch (c->x86_model) { - case INTEL_FAM6_XEON_PHI_KNL: - case INTEL_FAM6_XEON_PHI_KNM: + switch (c->x86_vfm) { + case INTEL_XEON_PHI_KNL: + case INTEL_XEON_PHI_KNM: break; default: return; @@ -134,32 +134,32 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) * - Release note from 20180108 microcode release */ struct sku_microcode { - u8 model; + u32 vfm; u8 stepping; u32 microcode; }; static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 }, - { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 }, - { INTEL_FAM6_KABYLAKE, 0x09, 0x80 }, - { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 }, - { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 }, - { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, - { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_BROADWELL, 0x04, 0x28 }, - { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b }, - { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 }, - { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 }, - { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, - { INTEL_FAM6_HASWELL_L, 0x01, 0x21 }, - { INTEL_FAM6_HASWELL_G, 0x01, 0x18 }, - { INTEL_FAM6_HASWELL, 0x03, 0x23 }, - { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, - { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, - { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, + { INTEL_KABYLAKE, 0x0B, 0x80 }, + { INTEL_KABYLAKE, 0x0A, 0x80 }, + { INTEL_KABYLAKE, 0x09, 0x80 }, + { INTEL_KABYLAKE_L, 0x0A, 0x80 }, + { INTEL_KABYLAKE_L, 0x09, 0x80 }, + { INTEL_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_BROADWELL, 0x04, 0x28 }, + { INTEL_BROADWELL_G, 0x01, 0x1b }, + { INTEL_BROADWELL_D, 0x02, 0x14 }, + { INTEL_BROADWELL_D, 0x03, 0x07000011 }, + { INTEL_BROADWELL_X, 0x01, 0x0b000025 }, + { INTEL_HASWELL_L, 0x01, 0x21 }, + { INTEL_HASWELL_G, 0x01, 0x18 }, + { INTEL_HASWELL, 0x03, 0x23 }, + { INTEL_HASWELL_X, 0x02, 0x3b }, + { INTEL_HASWELL_X, 0x04, 0x10 }, + { INTEL_IVYBRIDGE_X, 0x04, 0x42a }, /* Observed in the wild */ - { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, - { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, + { INTEL_SANDYBRIDGE_X, 0x06, 0x61b }, + { INTEL_SANDYBRIDGE_X, 0x07, 0x712 }, }; static bool bad_spectre_microcode(struct cpuinfo_x86 *c) @@ -173,11 +173,8 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_HYPERVISOR)) return false; - if (c->x86 != 6) - return false; - for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { - if (c->x86_model == spectre_bad_microcodes[i].model && + if (c->x86_vfm == spectre_bad_microcodes[i].vfm && c->x86_stepping == spectre_bad_microcodes[i].stepping) return (c->microcode <= spectre_bad_microcodes[i].microcode); } @@ -403,7 +400,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ - if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && + if (c->x86_vfm == INTEL_ATOM_BONNELL && c->x86_stepping <= 2 && c->microcode < 0x20e) { pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); @@ -436,11 +433,11 @@ static void early_init_intel(struct cpuinfo_x86 *c) /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ if (c->x86 == 6) { - switch (c->x86_model) { - case INTEL_FAM6_ATOM_SALTWELL_MID: - case INTEL_FAM6_ATOM_SALTWELL_TABLET: - case INTEL_FAM6_ATOM_SILVERMONT_MID: - case INTEL_FAM6_ATOM_AIRMONT_NP: + switch (c->x86_vfm) { + case INTEL_ATOM_SALTWELL_MID: + case INTEL_ATOM_SALTWELL_TABLET: + case INTEL_ATOM_SILVERMONT_MID: + case INTEL_ATOM_AIRMONT_NP: set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); break; default: @@ -484,7 +481,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE * to be modified. */ - if (c->x86 == 5 && c->x86_model == 9) { + if (c->x86_vfm == INTEL_QUARK_X1000) { pr_info("Disabling PGE capability bit\n"); setup_clear_cpu_cap(X86_FEATURE_PGE); } @@ -831,12 +828,13 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_PEBS); } - if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) && - (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) + if (boot_cpu_has(X86_FEATURE_CLFLUSH) && + (c->x86_vfm == INTEL_CORE2_DUNNINGTON || + c->x86_vfm == INTEL_NEHALEM_EX || + c->x86_vfm == INTEL_WESTMERE_EX)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); - if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) && - ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT))) + if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT) set_cpu_bug(c, X86_BUG_MONITOR); #ifdef CONFIG_X86_64 @@ -1461,9 +1459,9 @@ void handle_bus_lock(struct pt_regs *regs) * feature even though they do not enumerate IA32_CORE_CAPABILITIES. */ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_L, 0), + X86_MATCH_VFM(INTEL_ICELAKE_D, 0), {} }; -- Gitee From 23ef7ceb082c7a5e56c0d17e3f4681cce4d02ac7 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 8 Aug 2024 06:29:34 +0000 Subject: [PATCH 09/22] x86/split_lock: Move Split and Bus lock code to a dedicated file mainline inclusion from mainline-v6.13-rc1 commit 350afa8a1101f62ce31bc4ed6f69cf4b90ec4fa2 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 350afa8a1101f62ce31bc4ed6f69cf4b90ec4fa2 upstream Bus Lock Detect functionality on AMD platforms works identical to Intel. Move split_lock and bus_lock specific code from intel.c to a dedicated file so that it can be compiled and supported on non-Intel platforms. Also, introduce CONFIG_X86_BUS_LOCK_DETECT, make it dependent on CONFIG_CPU_SUP_INTEL and add compilation dependency of the new bus_lock.c file on CONFIG_X86_BUS_LOCK_DETECT. [Backport Changes] 1. In file arch/x86/kernel/cpu/bus_lock.c, in sld_sysctls[], an array of struct ctl_table, the empty initializer '{}' has been retained. Initializer {} sets all members of the structure to zero/Null. Retain this addtion because, in openEuler source code, the size of ctl_table array has not been explictly defined. Signed-off-by: Ravi Bangoria Signed-off-by: Thomas Gleixner Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/all/20240808062937.1149-2-ravi.bangoria@amd.com Signed-off-by: Abhishek Rajput Signed-off-by: mohanasv2 --- arch/x86/Kconfig | 8 + arch/x86/include/asm/cpu.h | 11 +- arch/x86/kernel/cpu/Makefile | 2 + arch/x86/kernel/cpu/bus_lock.c | 407 +++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/intel.c | 407 --------------------------------- include/linux/sched.h | 2 +- kernel/fork.c | 2 +- 7 files changed, 428 insertions(+), 411 deletions(-) create mode 100644 arch/x86/kernel/cpu/bus_lock.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 319a4c0fdbac..ad95ecf7ed54 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2516,6 +2516,14 @@ config STRICT_SIGALTSTACK_SIZE source "kernel/livepatch/Kconfig" +config X86_BUS_LOCK_DETECT + bool "Split Lock Detect and Bus Lock Detect support" + depends on CPU_SUP_INTEL + default y + help + Enable Split Lock Detect and Bus Lock Detect functionalities. + See for more information. + endmenu config CC_HAS_SLS diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index c436b1bd6c7a..243affcb3063 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -40,12 +40,13 @@ int mwait_usable(const struct cpuinfo_x86 *); unsigned int x86_family(unsigned int sig); unsigned int x86_model(unsigned int sig); unsigned int x86_stepping(unsigned int sig); -#ifdef CONFIG_CPU_SUP_INTEL +#ifdef CONFIG_X86_BUS_LOCK_DETECT extern void __init sld_setup(struct cpuinfo_x86 *c); extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); extern bool handle_guest_split_lock(unsigned long ip); extern void handle_bus_lock(struct pt_regs *regs); -u8 get_this_hybrid_cpu_type(void); +void split_lock_init(void); +void bus_lock_init(void); #else static inline void __init sld_setup(struct cpuinfo_x86 *c) {} static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) @@ -59,7 +60,13 @@ static inline bool handle_guest_split_lock(unsigned long ip) } static inline void handle_bus_lock(struct pt_regs *regs) {} +static inline void split_lock_init(void) {} +static inline void bus_lock_init(void) {} +#endif +#ifdef CONFIG_CPU_SUP_INTEL +u8 get_this_hybrid_cpu_type(void); +#else static inline u8 get_this_hybrid_cpu_type(void) { return 0; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 637b499450d1..d4c3b88e7337 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -56,6 +56,8 @@ obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o obj-$(CONFIG_ACRN_GUEST) += acrn.o ifdef CONFIG_X86_FEATURE_NAMES +obj-$(CONFIG_X86_BUS_LOCK_DETECT) += bus_lock.o + quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $@ $^ diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c new file mode 100644 index 000000000000..7b8b0f595d1b --- /dev/null +++ b/arch/x86/kernel/cpu/bus_lock.c @@ -0,0 +1,407 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) "x86/split lock detection: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +enum split_lock_detect_state { + sld_off = 0, + sld_warn, + sld_fatal, + sld_ratelimit, +}; + +/* + * Default to sld_off because most systems do not support split lock detection. + * sld_state_setup() will switch this to sld_warn on systems that support + * split lock/bus lock detect, unless there is a command line override. + */ +static enum split_lock_detect_state sld_state __ro_after_init = sld_off; +static u64 msr_test_ctrl_cache __ro_after_init; + +/* + * With a name like MSR_TEST_CTL it should go without saying, but don't touch + * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it + * on CPUs that do not support SLD can cause fireworks, even when writing '0'. + */ +static bool cpu_model_supports_sld __ro_after_init; + +static const struct { + const char *option; + enum split_lock_detect_state state; +} sld_options[] __initconst = { + { "off", sld_off }, + { "warn", sld_warn }, + { "fatal", sld_fatal }, + { "ratelimit:", sld_ratelimit }, +}; + +static struct ratelimit_state bld_ratelimit; + +static unsigned int sysctl_sld_mitigate = 1; +static DEFINE_SEMAPHORE(buslock_sem, 1); + +#ifdef CONFIG_PROC_SYSCTL +static struct ctl_table sld_sysctls[] = { + { + .procname = "split_lock_mitigate", + .data = &sysctl_sld_mitigate, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static int __init sld_mitigate_sysctl_init(void) +{ + register_sysctl_init("kernel", sld_sysctls); + return 0; +} + +late_initcall(sld_mitigate_sysctl_init); +#endif + +static inline bool match_option(const char *arg, int arglen, const char *opt) +{ + int len = strlen(opt), ratelimit; + + if (strncmp(arg, opt, len)) + return false; + + /* + * Min ratelimit is 1 bus lock/sec. + * Max ratelimit is 1000 bus locks/sec. + */ + if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 && + ratelimit > 0 && ratelimit <= 1000) { + ratelimit_state_init(&bld_ratelimit, HZ, ratelimit); + ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE); + return true; + } + + return len == arglen; +} + +static bool split_lock_verify_msr(bool on) +{ + u64 ctrl, tmp; + + if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl)) + return false; + if (on) + ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; + else + ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT; + if (wrmsrl_safe(MSR_TEST_CTRL, ctrl)) + return false; + rdmsrl(MSR_TEST_CTRL, tmp); + return ctrl == tmp; +} + +static void __init sld_state_setup(void) +{ + enum split_lock_detect_state state = sld_warn; + char arg[20]; + int i, ret; + + if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + return; + + ret = cmdline_find_option(boot_command_line, "split_lock_detect", + arg, sizeof(arg)); + if (ret >= 0) { + for (i = 0; i < ARRAY_SIZE(sld_options); i++) { + if (match_option(arg, ret, sld_options[i].option)) { + state = sld_options[i].state; + break; + } + } + } + sld_state = state; +} + +static void __init __split_lock_setup(void) +{ + if (!split_lock_verify_msr(false)) { + pr_info("MSR access failed: Disabled\n"); + return; + } + + rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); + + if (!split_lock_verify_msr(true)) { + pr_info("MSR access failed: Disabled\n"); + return; + } + + /* Restore the MSR to its cached value. */ + wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); + + setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT); +} + +/* + * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking + * is not implemented as one thread could undo the setting of the other + * thread immediately after dropping the lock anyway. + */ +static void sld_update_msr(bool on) +{ + u64 test_ctrl_val = msr_test_ctrl_cache; + + if (on) + test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; + + wrmsrl(MSR_TEST_CTRL, test_ctrl_val); +} + +void split_lock_init(void) +{ + /* + * #DB for bus lock handles ratelimit and #AC for split lock is + * disabled. + */ + if (sld_state == sld_ratelimit) { + split_lock_verify_msr(false); + return; + } + + if (cpu_model_supports_sld) + split_lock_verify_msr(sld_state != sld_off); +} + +static void __split_lock_reenable_unlock(struct work_struct *work) +{ + sld_update_msr(true); + up(&buslock_sem); +} + +static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock); + +static void __split_lock_reenable(struct work_struct *work) +{ + sld_update_msr(true); +} +static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable); + +/* + * If a CPU goes offline with pending delayed work to re-enable split lock + * detection then the delayed work will be executed on some other CPU. That + * handles releasing the buslock_sem, but because it executes on a + * different CPU probably won't re-enable split lock detection. This is a + * problem on HT systems since the sibling CPU on the same core may then be + * left running with split lock detection disabled. + * + * Unconditionally re-enable detection here. + */ +static int splitlock_cpu_offline(unsigned int cpu) +{ + sld_update_msr(true); + + return 0; +} + +static void split_lock_warn(unsigned long ip) +{ + struct delayed_work *work; + int cpu; + + if (!current->reported_split_lock) + pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", + current->comm, current->pid, ip); + current->reported_split_lock = 1; + + if (sysctl_sld_mitigate) { + /* + * misery factor #1: + * sleep 10ms before trying to execute split lock. + */ + if (msleep_interruptible(10) > 0) + return; + /* + * Misery factor #2: + * only allow one buslocked disabled core at a time. + */ + if (down_interruptible(&buslock_sem) == -EINTR) + return; + work = &sl_reenable_unlock; + } else { + work = &sl_reenable; + } + + cpu = get_cpu(); + schedule_delayed_work_on(cpu, work, 2); + + /* Disable split lock detection on this CPU to make progress */ + sld_update_msr(false); + put_cpu(); +} + +bool handle_guest_split_lock(unsigned long ip) +{ + if (sld_state == sld_warn) { + split_lock_warn(ip); + return true; + } + + pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n", + current->comm, current->pid, + sld_state == sld_fatal ? "fatal" : "bogus", ip); + + current->thread.error_code = 0; + current->thread.trap_nr = X86_TRAP_AC; + force_sig_fault(SIGBUS, BUS_ADRALN, NULL); + return false; +} +EXPORT_SYMBOL_GPL(handle_guest_split_lock); + +void bus_lock_init(void) +{ + u64 val; + + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + return; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, val); + + if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + (sld_state == sld_warn || sld_state == sld_fatal)) || + sld_state == sld_off) { + /* + * Warn and fatal are handled by #AC for split lock if #AC for + * split lock is supported. + */ + val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + } else { + val |= DEBUGCTLMSR_BUS_LOCK_DETECT; + } + + wrmsrl(MSR_IA32_DEBUGCTLMSR, val); +} + +bool handle_user_split_lock(struct pt_regs *regs, long error_code) +{ + if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) + return false; + split_lock_warn(regs->ip); + return true; +} + +void handle_bus_lock(struct pt_regs *regs) +{ + switch (sld_state) { + case sld_off: + break; + case sld_ratelimit: + /* Enforce no more than bld_ratelimit bus locks/sec. */ + while (!__ratelimit(&bld_ratelimit)) + msleep(20); + /* Warn on the bus lock. */ + fallthrough; + case sld_warn: + pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n", + current->comm, current->pid, regs->ip); + break; + case sld_fatal: + force_sig_fault(SIGBUS, BUS_ADRALN, NULL); + break; + } +} + +/* + * CPU models that are known to have the per-core split-lock detection + * feature even though they do not enumerate IA32_CORE_CAPABILITIES. + */ +static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_L, 0), + X86_MATCH_VFM(INTEL_ICELAKE_D, 0), + {} +}; + +static void __init split_lock_setup(struct cpuinfo_x86 *c) +{ + const struct x86_cpu_id *m; + u64 ia32_core_caps; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return; + + /* Check for CPUs that have support but do not enumerate it: */ + m = x86_match_cpu(split_lock_cpu_ids); + if (m) + goto supported; + + if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) + return; + + /* + * Not all bits in MSR_IA32_CORE_CAPS are architectural, but + * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set + * it have split lock detection. + */ + rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); + if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) + goto supported; + + /* CPU is not in the model list and does not have the MSR bit: */ + return; + +supported: + cpu_model_supports_sld = true; + __split_lock_setup(); +} + +static void sld_state_show(void) +{ + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && + !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) + return; + + switch (sld_state) { + case sld_off: + pr_info("disabled\n"); + break; + case sld_warn: + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { + pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n"); + if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "x86/splitlock", NULL, splitlock_cpu_offline) < 0) + pr_warn("No splitlock CPU offline handler\n"); + } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { + pr_info("#DB: warning on user-space bus_locks\n"); + } + break; + case sld_fatal: + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { + pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n"); + } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { + pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n", + boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ? + " from non-WB" : ""); + } + break; + case sld_ratelimit: + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst); + break; + } +} + +void __init sld_setup(struct cpuinfo_x86 *c) +{ + split_lock_setup(c); + sld_state_setup(); + sld_state_show(); +} diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 059e9cecbf0c..23216a694391 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -7,13 +7,9 @@ #include #include #include -#include #include #include #include -#include -#include -#include #include #include @@ -24,8 +20,6 @@ #include #include #include -#include -#include #include #include #include @@ -41,28 +35,6 @@ #include #endif -enum split_lock_detect_state { - sld_off = 0, - sld_warn, - sld_fatal, - sld_ratelimit, -}; - -/* - * Default to sld_off because most systems do not support split lock detection. - * sld_state_setup() will switch this to sld_warn on systems that support - * split lock/bus lock detect, unless there is a command line override. - */ -static enum split_lock_detect_state sld_state __ro_after_init = sld_off; -static u64 msr_test_ctrl_cache __ro_after_init; - -/* - * With a name like MSR_TEST_CTL it should go without saying, but don't touch - * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it - * on CPUs that do not support SLD can cause fireworks, even when writing '0'. - */ -static bool cpu_model_supports_sld __ro_after_init; - /* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists @@ -779,9 +751,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c) wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); } -static void split_lock_init(void); -static void bus_lock_init(void); - static void init_intel(struct cpuinfo_x86 *c) { early_init_intel(c); @@ -1165,382 +1134,6 @@ static const struct cpu_dev intel_cpu_dev = { cpu_dev_register(intel_cpu_dev); -#undef pr_fmt -#define pr_fmt(fmt) "x86/split lock detection: " fmt - -static const struct { - const char *option; - enum split_lock_detect_state state; -} sld_options[] __initconst = { - { "off", sld_off }, - { "warn", sld_warn }, - { "fatal", sld_fatal }, - { "ratelimit:", sld_ratelimit }, -}; - -static struct ratelimit_state bld_ratelimit; - -static unsigned int sysctl_sld_mitigate = 1; -static DEFINE_SEMAPHORE(buslock_sem, 1); - -#ifdef CONFIG_PROC_SYSCTL -static struct ctl_table sld_sysctls[] = { - { - .procname = "split_lock_mitigate", - .data = &sysctl_sld_mitigate, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_douintvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - {} -}; - -static int __init sld_mitigate_sysctl_init(void) -{ - register_sysctl_init("kernel", sld_sysctls); - return 0; -} - -late_initcall(sld_mitigate_sysctl_init); -#endif - -static inline bool match_option(const char *arg, int arglen, const char *opt) -{ - int len = strlen(opt), ratelimit; - - if (strncmp(arg, opt, len)) - return false; - - /* - * Min ratelimit is 1 bus lock/sec. - * Max ratelimit is 1000 bus locks/sec. - */ - if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 && - ratelimit > 0 && ratelimit <= 1000) { - ratelimit_state_init(&bld_ratelimit, HZ, ratelimit); - ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE); - return true; - } - - return len == arglen; -} - -static bool split_lock_verify_msr(bool on) -{ - u64 ctrl, tmp; - - if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl)) - return false; - if (on) - ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; - else - ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT; - if (wrmsrl_safe(MSR_TEST_CTRL, ctrl)) - return false; - rdmsrl(MSR_TEST_CTRL, tmp); - return ctrl == tmp; -} - -static void __init sld_state_setup(void) -{ - enum split_lock_detect_state state = sld_warn; - char arg[20]; - int i, ret; - - if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && - !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) - return; - - ret = cmdline_find_option(boot_command_line, "split_lock_detect", - arg, sizeof(arg)); - if (ret >= 0) { - for (i = 0; i < ARRAY_SIZE(sld_options); i++) { - if (match_option(arg, ret, sld_options[i].option)) { - state = sld_options[i].state; - break; - } - } - } - sld_state = state; -} - -static void __init __split_lock_setup(void) -{ - if (!split_lock_verify_msr(false)) { - pr_info("MSR access failed: Disabled\n"); - return; - } - - rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); - - if (!split_lock_verify_msr(true)) { - pr_info("MSR access failed: Disabled\n"); - return; - } - - /* Restore the MSR to its cached value. */ - wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); - - setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT); -} - -/* - * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking - * is not implemented as one thread could undo the setting of the other - * thread immediately after dropping the lock anyway. - */ -static void sld_update_msr(bool on) -{ - u64 test_ctrl_val = msr_test_ctrl_cache; - - if (on) - test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; - - wrmsrl(MSR_TEST_CTRL, test_ctrl_val); -} - -static void split_lock_init(void) -{ - /* - * #DB for bus lock handles ratelimit and #AC for split lock is - * disabled. - */ - if (sld_state == sld_ratelimit) { - split_lock_verify_msr(false); - return; - } - - if (cpu_model_supports_sld) - split_lock_verify_msr(sld_state != sld_off); -} - -static void __split_lock_reenable_unlock(struct work_struct *work) -{ - sld_update_msr(true); - up(&buslock_sem); -} - -static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock); - -static void __split_lock_reenable(struct work_struct *work) -{ - sld_update_msr(true); -} -static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable); - -/* - * If a CPU goes offline with pending delayed work to re-enable split lock - * detection then the delayed work will be executed on some other CPU. That - * handles releasing the buslock_sem, but because it executes on a - * different CPU probably won't re-enable split lock detection. This is a - * problem on HT systems since the sibling CPU on the same core may then be - * left running with split lock detection disabled. - * - * Unconditionally re-enable detection here. - */ -static int splitlock_cpu_offline(unsigned int cpu) -{ - sld_update_msr(true); - - return 0; -} - -static void split_lock_warn(unsigned long ip) -{ - struct delayed_work *work; - int cpu; - - if (!current->reported_split_lock) - pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", - current->comm, current->pid, ip); - current->reported_split_lock = 1; - - if (sysctl_sld_mitigate) { - /* - * misery factor #1: - * sleep 10ms before trying to execute split lock. - */ - if (msleep_interruptible(10) > 0) - return; - /* - * Misery factor #2: - * only allow one buslocked disabled core at a time. - */ - if (down_interruptible(&buslock_sem) == -EINTR) - return; - work = &sl_reenable_unlock; - } else { - work = &sl_reenable; - } - - cpu = get_cpu(); - schedule_delayed_work_on(cpu, work, 2); - - /* Disable split lock detection on this CPU to make progress */ - sld_update_msr(false); - put_cpu(); -} - -bool handle_guest_split_lock(unsigned long ip) -{ - if (sld_state == sld_warn) { - split_lock_warn(ip); - return true; - } - - pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n", - current->comm, current->pid, - sld_state == sld_fatal ? "fatal" : "bogus", ip); - - current->thread.error_code = 0; - current->thread.trap_nr = X86_TRAP_AC; - force_sig_fault(SIGBUS, BUS_ADRALN, NULL); - return false; -} -EXPORT_SYMBOL_GPL(handle_guest_split_lock); - -static void bus_lock_init(void) -{ - u64 val; - - if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) - return; - - rdmsrl(MSR_IA32_DEBUGCTLMSR, val); - - if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && - (sld_state == sld_warn || sld_state == sld_fatal)) || - sld_state == sld_off) { - /* - * Warn and fatal are handled by #AC for split lock if #AC for - * split lock is supported. - */ - val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; - } else { - val |= DEBUGCTLMSR_BUS_LOCK_DETECT; - } - - wrmsrl(MSR_IA32_DEBUGCTLMSR, val); -} - -bool handle_user_split_lock(struct pt_regs *regs, long error_code) -{ - if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) - return false; - split_lock_warn(regs->ip); - return true; -} - -void handle_bus_lock(struct pt_regs *regs) -{ - switch (sld_state) { - case sld_off: - break; - case sld_ratelimit: - /* Enforce no more than bld_ratelimit bus locks/sec. */ - while (!__ratelimit(&bld_ratelimit)) - msleep(20); - /* Warn on the bus lock. */ - fallthrough; - case sld_warn: - pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n", - current->comm, current->pid, regs->ip); - break; - case sld_fatal: - force_sig_fault(SIGBUS, BUS_ADRALN, NULL); - break; - } -} - -/* - * CPU models that are known to have the per-core split-lock detection - * feature even though they do not enumerate IA32_CORE_CAPABILITIES. - */ -static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { - X86_MATCH_VFM(INTEL_ICELAKE_X, 0), - X86_MATCH_VFM(INTEL_ICELAKE_L, 0), - X86_MATCH_VFM(INTEL_ICELAKE_D, 0), - {} -}; - -static void __init split_lock_setup(struct cpuinfo_x86 *c) -{ - const struct x86_cpu_id *m; - u64 ia32_core_caps; - - if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) - return; - - /* Check for CPUs that have support but do not enumerate it: */ - m = x86_match_cpu(split_lock_cpu_ids); - if (m) - goto supported; - - if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) - return; - - /* - * Not all bits in MSR_IA32_CORE_CAPS are architectural, but - * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set - * it have split lock detection. - */ - rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); - if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) - goto supported; - - /* CPU is not in the model list and does not have the MSR bit: */ - return; - -supported: - cpu_model_supports_sld = true; - __split_lock_setup(); -} - -static void sld_state_show(void) -{ - if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && - !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) - return; - - switch (sld_state) { - case sld_off: - pr_info("disabled\n"); - break; - case sld_warn: - if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { - pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n"); - if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, - "x86/splitlock", NULL, splitlock_cpu_offline) < 0) - pr_warn("No splitlock CPU offline handler\n"); - } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { - pr_info("#DB: warning on user-space bus_locks\n"); - } - break; - case sld_fatal: - if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { - pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n"); - } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { - pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n", - boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ? - " from non-WB" : ""); - } - break; - case sld_ratelimit: - if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) - pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst); - break; - } -} - -void __init sld_setup(struct cpuinfo_x86 *c) -{ - split_lock_setup(c); - sld_state_setup(); - sld_state_show(); -} - #define X86_HYBRID_CPU_TYPE_ID_SHIFT 24 diff --git a/include/linux/sched.h b/include/linux/sched.h index 6aa0549775f5..8c9523f1a93a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -888,7 +888,7 @@ struct task_struct { #ifdef CONFIG_IOMMU_SVA KABI_FILL_HOLE(unsigned pasid_activated:1) #endif -#ifdef CONFIG_CPU_SUP_INTEL +#ifdef CONFIG_X86_BUS_LOCK_DETECT unsigned reported_split_lock:1; #endif diff --git a/kernel/fork.c b/kernel/fork.c index b593ed7a3fb3..0168f080c28d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1021,7 +1021,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) #endif -#ifdef CONFIG_CPU_SUP_INTEL +#ifdef CONFIG_X86_BUS_LOCK_DETECT tsk->reported_split_lock = 0; #endif -- Gitee From b955639a75675440c2e68ea6109fd7b26780c860 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 8 Aug 2024 06:29:35 +0000 Subject: [PATCH 10/22] x86/bus_lock: Add support for AMD mainline inclusion from mainline-v6.13-rc1 commit 408eb7417a92c5354c7be34f7425b305dfe30ad9 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 408eb7417a92c5354c7be34f7425b305dfe30ad9 upstream Add Bus Lock Detect (called Bus Lock Trap in AMD docs) support for AMD platforms. Bus Lock Detect is enumerated with CPUID Fn0000_0007_ECX_x0 bit [24 / BUSLOCKTRAP]. It can be enabled through MSR_IA32_DEBUGCTLMSR. When enabled, hardware clears DR6[11] and raises a #DB exception on occurrence of Bus Lock if CPL > 0. More detail about the feature can be found in AMD APM[1]. [1]: AMD64 Architecture Programmer's Manual Pub. 40332, Rev. 4.07 - June 2023, Vol 2, 13.1.3.6 Bus Lock Trap https://bugzilla.kernel.org/attachment.cgi?id=304653 [Backport Changes] 1. In the current backport, changes intended for Documentation/arch/x86/buslock.rst were instead applied to the equivalent file in location Documentation/x86/buslock.rst, since the upstream commit ff61f0791ce96 which moved the x86 related documentations to Documentation/arch/ has not been applied in the current source tree. Backporting commit ff61f0791ce96 introduces additional dependencies and unnecessary conflicts. So, the necessary changes to buslock.rst are applied at its existing location (Documentation/x86/buslock.rst) to maintain compatibility. Signed-off-by: Ravi Bangoria Signed-off-by: Thomas Gleixner Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/all/20240808062937.1149-3-ravi.bangoria@amd.com Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- Documentation/x86/buslock.rst | 3 ++- arch/x86/Kconfig | 2 +- arch/x86/kernel/cpu/common.c | 2 ++ arch/x86/kernel/cpu/intel.c | 1 - 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/x86/buslock.rst b/Documentation/x86/buslock.rst index 159ff6ba830e..9c539323066f 100644 --- a/Documentation/x86/buslock.rst +++ b/Documentation/x86/buslock.rst @@ -26,7 +26,8 @@ Detection ========= Intel processors may support either or both of the following hardware -mechanisms to detect split locks and bus locks. +mechanisms to detect split locks and bus locks. Some AMD processors also +support bus lock detect. #AC exception for split lock detection -------------------------------------- diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ad95ecf7ed54..3a33648536dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2518,7 +2518,7 @@ source "kernel/livepatch/Kconfig" config X86_BUS_LOCK_DETECT bool "Split Lock Detect and Bus Lock Detect support" - depends on CPU_SUP_INTEL + depends on CPU_SUP_INTEL || CPU_SUP_AMD default y help Enable Split Lock Detect and Bus Lock Detect functionalities. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5b8afa7258a..633e93490531 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1730,6 +1730,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_init) this_cpu->c_init(c); + bus_lock_init(); + /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 23216a694391..abaac0cac5a5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -868,7 +868,6 @@ static void init_intel(struct cpuinfo_x86 *c) tsx_disable(); split_lock_init(); - bus_lock_init(); intel_init_thermal(c); } -- Gitee From 13013b661e2513fd408d8b9be21db404c0a4615c Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 8 Aug 2024 06:29:36 +0000 Subject: [PATCH 11/22] KVM: SVM: Don't advertise Bus Lock Detect to guest if SVM support is missing mainline inclusion from mainline-v6.11-rc7 commit 54950bfe2b69cdc06ef753872b5225e54eb73506 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 54950bfe2b69cdc06ef753872b5225e54eb73506 upstream If host supports Bus Lock Detect, KVM advertises it to guests even if SVM support is absent. Additionally, guest wouldn't be able to use it despite guest CPUID bit being set. Fix it by unconditionally clearing the feature bit in KVM cpu capability. Reported-by: Jim Mattson Closes: https://lore.kernel.org/r/CALMp9eRet6+v8Y1Q-i6mqPm4hUow_kJNhmVHfOV8tMfuSS=tVg@mail.gmail.com Fixes: 76ea438b4afc ("KVM: X86: Expose bus lock debug exception to guest") Cc: stable@vger.kernel.org Signed-off-by: Ravi Bangoria Reviewed-by: Jim Mattson Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20240808062937.1149-4-ravi.bangoria@amd.com Signed-off-by: Sean Christopherson Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- arch/x86/kvm/svm/svm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4933aa525229..1e72e3a9d2c8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -925,6 +925,9 @@ static __init void svm_set_cpu_caps(void) /* Enable INVPCID feature */ kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID); + + /* Don't advertise Bus Lock Detect to guest if SVM support is absent */ + kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT); } static __init int svm_hardware_setup(void) -- Gitee From 50b0910503603585d58ae568f077e66588c6a675 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:06 -0800 Subject: [PATCH 12/22] KVM: SVM: Drop DEBUGCTL[5:2] from guest's effective value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.14-rc6 commit ee89e8013383d50a27ea9bf3c8a69eed6799856f bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit ee89e8013383d50a27ea9bf3c8a69eed6799856f upstream Drop bits 5:2 from the guest's effective DEBUGCTL value, as AMD changed the architectural behavior of the bits and broke backwards compatibility. On CPUs without BusLockTrap (or at least, in APMs from before ~2023), bits 5:2 controlled the behavior of external pins: Performance-Monitoring/Breakpoint Pin-Control (PBi)—Bits 5:2, read/write. Software uses thesebits to control the type of information reported by the four external performance-monitoring/breakpoint pins on the processor. When a PBi bit is cleared to 0, the corresponding external pin (BPi) reports performance-monitor information. When a PBi bit is set to 1, the corresponding external pin (BPi) reports breakpoint information. With the introduction of BusLockTrap, presumably to be compatible with Intel CPUs, AMD redefined bit 2 to be BLCKDB: Bus Lock #DB Trap (BLCKDB)—Bit 2, read/write. Software sets this bit to enable generation of a #DB trap following successful execution of a bus lock when CPL is > 0. and redefined bits 5:3 (and bit 6) as "6:3 Reserved MBZ". Ideally, KVM would treat bits 5:2 as reserved. Defer that change to a feature cleanup to avoid breaking existing guest in LTS kernels. For now, drop the bits to retain backwards compatibility (of a sort). Note, dropping bits 5:2 is still a guest-visible change, e.g. if the guest is enabling LBRs *and* the legacy PBi bits, then the state of the PBi bits is visible to the guest, whereas now the guest will always see '0'. [Backport Changes] 1. In openEuler source code, file arch/x86/kvm/svm/svm.h, the macro DEBUGCTL_RESERVED_BITS was migrated from svm.c with updated changes, similar to linux upstream, that moved this definition from arch/x86/kvm/svm/svm.c to arch/x86/kvm/svm/svm.h in commit d20c796ca37098. Backporting linux upstream commit d20c796ca37098, would introduce additional dependencies and unnecessary conflicts. Therefore, to avoid such conflicts, the macro DEBUGCTL_RESERVED_BITS has been migrated directly with necessary adjustments. Reported-by: Ravi Bangoria Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- arch/x86/kvm/svm/svm.c | 13 ++++++++++++- arch/x86/kvm/svm/svm.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1e72e3a9d2c8..c6f526ae6b9b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -71,7 +71,6 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); #define SVM_FEATURE_DECODE_ASSIST (1 << 7) #define SVM_FEATURE_PAUSE_FILTER (1 << 10) -#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) #define TSC_RATIO_RSVD 0xffffff0000000000ULL #define TSC_RATIO_MIN 0x0000000000000001ULL @@ -2777,6 +2776,18 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) __func__, data); break; } + + /* + * AMD changed the architectural behavior of bits 5:2. On CPUs + * without BusLockTrap, bits 5:2 control "external pins", but + * on CPUs that support BusLockDetect, bit 2 enables BusLockTrap + * and bits 5:3 are reserved-to-zero. Sadly, old KVM allowed + * the guest to set bits 5:2 despite not actually virtualizing + * Performance-Monitoring/Breakpoint external pins. Drop bits + * 5:2 for backwards compatibility. + */ + data &= ~GENMASK(5, 2); + if (data & DEBUGCTL_RESERVED_BITS) return 1; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 866429e579fa..7f0955e9f372 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -350,6 +350,7 @@ static inline bool gif_set(struct vcpu_svm *svm) /* svm.c */ #define MSR_INVALID 0xffffffffU +#define DEBUGCTL_RESERVED_BITS (~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR)) u32 svm_msrpm_offset(u32 msr); u32 *svm_vcpu_alloc_msrpm(void); void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm); -- Gitee From 5ecc82f67ce94bf32207eb2f8a5938685fc3a95b Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 14 Sep 2021 18:48:19 +0300 Subject: [PATCH 13/22] KVM: x86: SVM: add module param to control LBR virtualization mainline inclusion from mainline-v5.16-rc1 commit 4c84926e229e0efdafa2756d7e6c4ae2fb0b7945 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 4c84926e229e0efdafa2756d7e6c4ae2fb0b7945 upstream This is useful for debug and also makes it consistent with the rest of the SVM optional features. Signed-off-by: Maxim Levitsky Message-Id: <20210914154825.104886-9-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- arch/x86/kvm/svm/svm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c6f526ae6b9b..f91fb63fc247 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -190,6 +190,10 @@ module_param(vgif, int, 0444); /* enable/disable SEV support */ static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +/* enable/disable LBR virtualization */ +static int lbrv = true; +module_param(lbrv, int, 0444); + static bool __read_mostly dump_invalid_vmcb = 0; module_param(dump_invalid_vmcb, bool, 0644); @@ -1041,6 +1045,13 @@ static __init int svm_hardware_setup(void) pr_info("Virtual GIF supported\n"); } + if (lbrv) { + if (!boot_cpu_has(X86_FEATURE_LBRV)) + lbrv = false; + else + pr_info("LBR virtualization supported\n"); + } + svm_set_cpu_caps(); /* @@ -2771,7 +2782,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) wrmsrl(MSR_TSC_AUX, svm->tsc_aux); break; case MSR_IA32_DEBUGCTLMSR: - if (!boot_cpu_has(X86_FEATURE_LBRV)) { + if (!lbrv) { vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", __func__, data); break; -- Gitee From a1585c59d300a55eee761d904357454631724d8b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 24 Jan 2023 23:49:01 +0000 Subject: [PATCH 14/22] KVM: x86/pmu: Gate all "unimplemented MSR" prints on report_ignored_msrs mainline inclusion from mainline-v6.3-rc1 commit e76ae52747a82a548742107b4100e90da41a624d bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit e76ae52747a82a548742107b4100e90da41a624d upstream Add helpers to print unimplemented MSR accesses and condition all such prints on report_ignored_msrs, i.e. honor userspace's request to not print unimplemented MSRs. Even though vcpu_unimpl() is ratelimited, printing can still be problematic, e.g. if a print gets stalled when host userspace is writing MSRs during live migration, an effective stall can result in very noticeable disruption in the guest. E.g. the profile below was taken while calling KVM_SET_MSRS on the PMU counters while the PMU was disabled in KVM. - 99.75% 0.00% [.] __ioctl - __ioctl - 99.74% entry_SYSCALL_64_after_hwframe do_syscall_64 sys_ioctl - do_vfs_ioctl - 92.48% kvm_vcpu_ioctl - kvm_arch_vcpu_ioctl - 85.12% kvm_set_msr_ignored_check svm_set_msr kvm_set_msr_common printk vprintk_func vprintk_default vprintk_emit console_unlock call_console_drivers univ8250_console_write serial8250_console_write uart_console_write Reported-by: Aaron Lewis Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20230124234905.3774678-3-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Rahul Kumar Signed-off-by: mohanasv2 --- arch/x86/kvm/hyperv.c | 10 ++++------ arch/x86/kvm/svm/svm.c | 5 ++--- arch/x86/kvm/vmx/vmx.c | 4 +--- arch/x86/kvm/x86.c | 18 +++++------------- arch/x86/kvm/x86.h | 12 ++++++++++++ 5 files changed, 24 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index e097faf12c82..b0a52e61f4cf 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1205,8 +1205,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: return syndbg_set_msr(vcpu, msr, data, host); default: - vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", - msr, data); + kvm_pr_unimpl_wrmsr(vcpu, msr, data); return 1; } return 0; @@ -1325,8 +1324,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; break; default: - vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", - msr, data); + kvm_pr_unimpl_wrmsr(vcpu, msr, data); return 1; } @@ -1375,7 +1373,7 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: return syndbg_get_msr(vcpu, msr, pdata, host); default: - vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); + kvm_pr_unimpl_rdmsr(vcpu, msr); return 1; } @@ -1437,7 +1435,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, data = APIC_BUS_FREQUENCY; break; default: - vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); + kvm_pr_unimpl_rdmsr(vcpu, msr); return 1; } *pdata = data; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f91fb63fc247..aee78273cde7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2783,8 +2783,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_DEBUGCTLMSR: if (!lbrv) { - vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", - __func__, data); + kvm_pr_unimpl_wrmsr(vcpu, ecx, data); break; } @@ -2824,7 +2823,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_CR: return svm_set_vm_cr(vcpu, data); case MSR_VM_IGNNE: - vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); + kvm_pr_unimpl_wrmsr(vcpu, ecx, data); break; case MSR_AMD64_DE_CFG: { struct kvm_msr_entry msr_entry; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 695417dd5fcc..f7d3c28291dc 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2197,9 +2197,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_DEBUGCTLMSR: { u64 invalid = data & ~vcpu_supported_debugctl(vcpu); if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) { - if (report_ignored_msrs) - vcpu_unimpl(vcpu, "%s: BTF|LBR in IA32_DEBUGCTLMSR 0x%llx, nop\n", - __func__, data); + kvm_pr_unimpl_wrmsr(vcpu, msr_index, data); data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR); invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3d66aa96a857..8e6d15ec277c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3358,7 +3358,6 @@ static void record_steal_time(struct kvm_vcpu *vcpu) int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - bool pr = false; u32 msr = msr_info->index; u64 data = msr_info->data; @@ -3407,15 +3406,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data == BIT_ULL(18)) { vcpu->arch.msr_hwcr = data; } else if (data != 0) { - vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", - data); + kvm_pr_unimpl_wrmsr(vcpu, msr, data); return 1; } break; case MSR_FAM10H_MMIO_CONF_BASE: if (data != 0) { - vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: " - "0x%llx\n", data); + kvm_pr_unimpl_wrmsr(vcpu, msr, data); return 1; } break; @@ -3578,16 +3575,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: - pr = true; - fallthrough; case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); - if (pr || data != 0) - vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " - "0x%x data 0x%llx\n", msr, data); + if (data) + kvm_pr_unimpl_wrmsr(vcpu, msr, data); break; case MSR_K7_CLK_CTL: /* @@ -3614,9 +3608,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. */ - if (report_ignored_msrs) - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", - msr, data); + kvm_pr_unimpl_wrmsr(vcpu, msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index d6833a43d7ac..1bc59111b4e7 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -285,6 +285,18 @@ extern bool report_ignored_msrs; extern bool eager_page_split; +static inline void kvm_pr_unimpl_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "Unhandled WRMSR(0x%x) = 0x%llx\n", msr, data); +} + +static inline void kvm_pr_unimpl_rdmsr(struct kvm_vcpu *vcpu, u32 msr) +{ + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "Unhandled RDMSR(0x%x)\n", msr); +} + static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) { return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, -- Gitee From 63f5646cbc861e29738507cb04fb05189ffb88cd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:07 -0800 Subject: [PATCH 15/22] KVM: SVM: Suppress DEBUGCTL.BTF on AMD mainline inclusion from mainline-v6.14-rc6 commit d0eac42f5cecce009d315655bee341304fbe075e bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit d0eac42f5cecce009d315655bee341304fbe075e upstream Mark BTF as reserved in DEBUGCTL on AMD, as KVM doesn't actually support BTF, and fully enabling BTF virtualization is non-trivial due to interactions with the emulator, guest_debug, #DB interception, nested SVM, etc. Don't inject #GP if the guest attempts to set BTF, as there's no way to communicate lack of support to the guest, and instead suppress the flag and treat the WRMSR as (partially) unsupported. In short, make KVM behave the same on AMD and Intel (VMX already squashes BTF). Note, due to other bugs in KVM's handling of DEBUGCTL, the only way BTF has "worked" in any capacity is if the guest simultaneously enables LBRs. Reported-by: Ravi Bangoria Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-3-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Mukesh-Ogare Signed-off-by: mohanasv2 --- arch/x86/kvm/svm/svm.c | 9 +++++++++ arch/x86/kvm/svm/svm.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index aee78273cde7..3c6b0d0db9bd 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2798,6 +2798,15 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) */ data &= ~GENMASK(5, 2); + /* + * Suppress BTF as KVM doesn't virtualize BTF, but there's no + * way to communicate lack of support to the guest. + */ + if (data & DEBUGCTLMSR_BTF) { + kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data); + data &= ~DEBUGCTLMSR_BTF; + } + if (data & DEBUGCTL_RESERVED_BITS) return 1; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7f0955e9f372..15f0b8c0ebde 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -350,7 +350,7 @@ static inline bool gif_set(struct vcpu_svm *svm) /* svm.c */ #define MSR_INVALID 0xffffffffU -#define DEBUGCTL_RESERVED_BITS (~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR)) +#define DEBUGCTL_RESERVED_BITS (~DEBUGCTLMSR_LBR) u32 svm_msrpm_offset(u32 msr); u32 *svm_vcpu_alloc_msrpm(void); void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm); -- Gitee From c93da04145172e10031095e3715a8b65eb5893f4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:08 -0800 Subject: [PATCH 16/22] KVM: x86: Snapshot the host's DEBUGCTL in common x86 mainline inclusion from mainline-v6.14-rc6 commit fb71c795935652fa20eaf9517ca9547f5af99a76 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit fb71c795935652fa20eaf9517ca9547f5af99a76 upstream Move KVM's snapshot of DEBUGCTL to kvm_vcpu_arch and take the snapshot in common x86, so that SVM can also use the snapshot. Opportunistically change the field to a u64. While bits 63:32 are reserved on AMD, not mentioned at all in Intel's SDM, and managed as an "unsigned long" by the kernel, DEBUGCTL is an MSR and therefore a 64-bit value. Reviewed-by: Xiaoyao Li Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-4-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Mukesh-Ogare Signed-off-by: mohanasv2 --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 8 ++------ arch/x86/kvm/vmx/vmx.h | 2 -- arch/x86/kvm/x86.c | 1 + 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 728c1b6c02b8..fdbcf71759bf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -593,6 +593,7 @@ struct kvm_vcpu_arch { u32 pkru; u32 hflags; u64 efer; + u64 host_debugctl; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool apicv_active; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f7d3c28291dc..af164e6eb385 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1497,13 +1497,9 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, */ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - vmx_vcpu_load_vmcs(vcpu, cpu, NULL); vmx_vcpu_pi_load(vcpu, cpu); - - vmx->host_debugctlmsr = get_debugctlmsr(); } static void vmx_vcpu_put(struct kvm_vcpu *vcpu) @@ -7248,8 +7244,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index; /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ - if (vmx->host_debugctlmsr) - update_debugctlmsr(vmx->host_debugctlmsr); + if (vcpu->arch.host_debugctl) + update_debugctlmsr(vcpu->arch.host_debugctl); #ifndef CONFIG_X86_64 /* diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a9b101834f10..fa9cacbc3632 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -317,8 +317,6 @@ struct vcpu_vmx { u64 current_tsc_ratio; - unsigned long host_debugctlmsr; - /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8e6d15ec277c..ea91bbfd80dc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4405,6 +4405,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Save host pkru register if supported */ vcpu->arch.host_pkru = read_pkru(); + vcpu->arch.host_debugctl = get_debugctlmsr(); /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { -- Gitee From 6f4fa4adffa7b4c2acd537a455776df6f9c52ba5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:09 -0800 Subject: [PATCH 17/22] KVM: SVM: Manually context switch DEBUGCTL if LBR virtualization is disabled mainline inclusion from mainline-v6.14-rc6 commit 433265870ab3455b418885bff48fa5fd02f7e448 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 433265870ab3455b418885bff48fa5fd02f7e448 upstream Manually load the guest's DEBUGCTL prior to VMRUN (and restore the host's value on #VMEXIT) if it diverges from the host's value and LBR virtualization is disabled, as hardware only context switches DEBUGCTL if LBR virtualization is fully enabled. Running the guest with the host's value has likely been mildly problematic for quite some time, e.g. it will result in undesirable behavior if BTF diverges (with the caveat that KVM now suppresses guest BTF due to lack of support). But the bug became fatal with the introduction of Bus Lock Trap ("Detect" in kernel paralance) support for AMD (commit 408eb7417a92 ("x86/bus_lock: Add support for AMD")), as a bus lock in the guest will trigger an unexpected #DB. Note, suppressing the bus lock #DB, i.e. simply resuming the guest without injecting a #DB, is not an option. It wouldn't address the general issue with DEBUGCTL, e.g. for things like BTF, and there are other guest-visible side effects if BusLockTrap is left enabled. If BusLockTrap is disabled, then DR6.BLD is reserved-to-1; any attempts to clear it by software are ignored. But if BusLockTrap is enabled, software can clear DR6.BLD: Software enables bus lock trap by setting DebugCtl MSR[BLCKDB] (bit 2) to 1. When bus lock trap is enabled, ... The processor indicates that this #DB was caused by a bus lock by clearing DR6[BLD] (bit 11). DR6[11] previously had been defined to be always 1. and clearing DR6.BLD is "sticky" in that it's not set (i.e. lowered) by other #DBs: All other #DB exceptions leave DR6[BLD] unmodified E.g. leaving BusLockTrap enable can confuse a legacy guest that writes '0' to reset DR6. Reported-by: rangemachine@gmail.com Reported-by: whanos@sergal.fun Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219787 Closes: https://lore.kernel.org/all/bug-219787-28872@https.bugzilla.kernel.org%2F Cc: Ravi Bangoria Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-5-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Mukesh-Ogare Signed-off-by: mohanasv2 --- arch/x86/kvm/svm/svm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3c6b0d0db9bd..a0b01a88f99e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3667,6 +3667,16 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) clgi(); kvm_load_guest_xsave_state(vcpu); + /* + * Hardware only context switches DEBUGCTL if LBR virtualization is + * enabled. Manually load DEBUGCTL if necessary (and restore it after + * VM-Exit), as running with the host's DEBUGCTL can negatively affect + * guest state and can even be fatal, e.g. due to Bus Lock Detect. + */ + if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) && + vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl) + update_debugctlmsr(svm->vmcb->save.dbgctl); + kvm_wait_lapic_expire(vcpu); /* @@ -3709,6 +3719,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_interrupt(&svm->vcpu); + if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) && + vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl) + update_debugctlmsr(vcpu->arch.host_debugctl); + kvm_load_host_xsave_state(vcpu); stgi(); -- Gitee From acb2cd743c5dc05bdb455cde02dbe47d1fe114a4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:10 -0800 Subject: [PATCH 18/22] KVM: x86: Snapshot the host's DEBUGCTL after disabling IRQs mainline inclusion from mainline-v6.14-rc6 commit 189ecdb3e112da703ac0699f4ec76aa78122f911 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 189ecdb3e112da703ac0699f4ec76aa78122f911 upstream Snapshot the host's DEBUGCTL after disabling IRQs, as perf can toggle debugctl bits from IRQ context, e.g. when enabling/disabling events via smp_call_function_single(). Taking the snapshot (long) before IRQs are disabled could result in KVM effectively clobbering DEBUGCTL due to using a stale snapshot. Cc: stable@vger.kernel.org Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-6-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Mukesh-Ogare Signed-off-by: mohanasv2 --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ea91bbfd80dc..15caf65c8973 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4405,7 +4405,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Save host pkru register if supported */ vcpu->arch.host_pkru = read_pkru(); - vcpu->arch.host_debugctl = get_debugctlmsr(); /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { @@ -9706,6 +9705,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } exit_fastpath = kvm_x86_ops.run(vcpu); + vcpu->arch.host_debugctl = get_debugctlmsr(); + /* * Do this here before restoring debug registers on the host. And -- Gitee From 79f506ae6aff10813fbbcdb2f41d31d9e216a6c2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 27 Feb 2025 14:24:11 -0800 Subject: [PATCH 19/22] KVM: SVM: Treat DEBUGCTL[5:2] as reserved mainline inclusion from mainline-v6.16-rc1 commit 5ecdb48dd9188c355ddde5c3686c0835a223ca21 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 5ecdb48dd9188c355ddde5c3686c0835a223ca21 upstream Stop ignoring DEBUGCTL[5:2] on AMD CPUs and instead treat them as reserved. KVM has never properly virtualized AMD's legacy PBi bits, but did allow the guest (and host userspace) to set the bits. To avoid breaking guests when running on CPUs with BusLockTrap, which redefined bit 2 to BLCKDB and made bits 5:3 reserved, a previous KVM change ignored bits 5:3, e.g. so that legacy guest software wouldn't inadvertently enable BusLockTrap or hit a VMRUN failure due to setting reserved. To allow for virtualizing BusLockTrap and whatever future features may use bits 5:3, treat bits 5:2 as reserved (and hope that doing so doesn't break any existing guests). Reviewed-and-tested-by: Ravi Bangoria Link: https://lore.kernel.org/r/20250227222411.3490595-7-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Mukesh-Ogare Signed-off-by: mohanasv2 --- arch/x86/kvm/svm/svm.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a0b01a88f99e..6598aea0d911 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2787,17 +2787,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; } - /* - * AMD changed the architectural behavior of bits 5:2. On CPUs - * without BusLockTrap, bits 5:2 control "external pins", but - * on CPUs that support BusLockDetect, bit 2 enables BusLockTrap - * and bits 5:3 are reserved-to-zero. Sadly, old KVM allowed - * the guest to set bits 5:2 despite not actually virtualizing - * Performance-Monitoring/Breakpoint external pins. Drop bits - * 5:2 for backwards compatibility. - */ - data &= ~GENMASK(5, 2); - /* * Suppress BTF as KVM doesn't virtualize BTF, but there's no * way to communicate lack of support to the guest. -- Gitee From 6db65a5fded8b39c3aee382dd6502c7bb02fc5ba Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 15 Mar 2023 15:57:22 -0700 Subject: [PATCH 20/22] Documentation/x86: Update split lock documentation mainline inclusion from mainline-v6.4-rc1 commit 054ed6349c1bdc5f4575aea05a1d37ee45ec5fb7 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 054ed6349c1bdc5f4575aea05a1d37ee45ec5fb7 upstream commit b041b525dab9 ("x86/split_lock: Make life miserable for split lockers") added a delay and serialization of split locks. Commit 727209376f49 ("x86/split_lock: Add sysctl to control the misery mode") provided a sysctl to turn off the misery. Update the split lock documentation to describe the current state of the code. Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20230315225722.104607-1-tony.luck@intel.com Signed-off-by: Jonathan Corbet Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- Documentation/x86/buslock.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/x86/buslock.rst b/Documentation/x86/buslock.rst index 9c539323066f..f15195ff0a11 100644 --- a/Documentation/x86/buslock.rst +++ b/Documentation/x86/buslock.rst @@ -54,8 +54,14 @@ parameter "split_lock_detect". Here is a summary of different options: |off |Do nothing |Do nothing | +------------------+----------------------------+-----------------------+ |warn |Kernel OOPs |Warn once per task and | -|(default) |Warn once per task and |and continues to run. | -| |disable future checking | | +|(default) |Warn once per task, add a |and continues to run. | +| |delay, add synchronization | | +| |to prevent more than one | | +| |core from executing a | | +| |split lock in parallel. | | +| |sysctl split_lock_mitigate | | +| |can be used to avoid the | | +| |delay and synchronization | | | |When both features are | | | |supported, warn in #AC | | +------------------+----------------------------+-----------------------+ -- Gitee From db948edb557e0ecbc8293865d88be4503154740d Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Wed, 29 May 2024 19:36:05 +0100 Subject: [PATCH 21/22] x86/cpu/intel: Drop stray FAM6 check with new Intel CPU model defines mainline inclusion from mainline-v6.11-rc1 commit 34b3fc558b537bdf99644dcde539e151716f6331 bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit 34b3fc558b537bdf99644dcde539e151716f6331 upstream The outer if () should have been dropped when switching to c->x86_vfm. Fixes: 6568fc18c2f6 ("x86/cpu/intel: Switch to new Intel CPU model defines") Signed-off-by: Andrew Cooper Signed-off-by: Borislav Petkov (AMD) Acked-by: Tony Luck Link: https://lore.kernel.org/r/20240529183605.17520-1-andrew.cooper3@citrix.com Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- arch/x86/kernel/cpu/intel.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index abaac0cac5a5..7deb3749804a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -404,17 +404,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ - if (c->x86 == 6) { - switch (c->x86_vfm) { - case INTEL_ATOM_SALTWELL_MID: - case INTEL_ATOM_SALTWELL_TABLET: - case INTEL_ATOM_SILVERMONT_MID: - case INTEL_ATOM_AIRMONT_NP: - set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); - break; - default: - break; - } + switch (c->x86_vfm) { + case INTEL_ATOM_SALTWELL_MID: + case INTEL_ATOM_SALTWELL_TABLET: + case INTEL_ATOM_SILVERMONT_MID: + case INTEL_ATOM_AIRMONT_NP: + set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); + break; } /* -- Gitee From ab1cd17d57c381e915682cddf0b15e488d3e57b4 Mon Sep 17 00:00:00 2001 From: Maksim Davydov Date: Wed, 15 Jan 2025 16:17:04 +0300 Subject: [PATCH 22/22] x86/split_lock: Fix the delayed detection logic mainline inclusion from mainline-v6.15-rc1 commit c929d08df8bee855528b9d15b853c892c54e1eee bugzilla: https://gitee.com/openeuler/kernel/issues/ICSPRX ---------------------------------------- commit c929d08df8bee855528b9d15b853c892c54e1eee upstream If the warning mode with disabled mitigation mode is used, then on each CPU where the split lock occurred detection will be disabled in order to make progress and delayed work will be scheduled, which then will enable detection back. Now it turns out that all CPUs use one global delayed work structure. This leads to the fact that if a split lock occurs on several CPUs at the same time (within 2 jiffies), only one CPU will schedule delayed work, but the rest will not. The return value of schedule_delayed_work_on() would have shown this, but it is not checked in the code. A diagram that can help to understand the bug reproduction: - sld_update_msr() enables/disables SLD on both CPUs on the same core - schedule_delayed_work_on() internally checks WORK_STRUCT_PENDING_BIT. If a work has the 'pending' status, then schedule_delayed_work_on() will return an error code and, most importantly, the work will not be placed in the workqueue. Let's say we have a multicore system on which split_lock_mitigate=0 and a multithreaded application is running that calls splitlock in multiple threads. Due to the fact that sld_update_msr() affects the entire core (both CPUs), we will consider 2 CPUs from different cores. Let the 2 threads of this application schedule to CPU0 (core 0) and to CPU 2 (core 1), then: | || | | CPU 0 (core 0) || CPU 2 (core 1) | |_________________________________||___________________________________| | || | | 1) SPLIT LOCK occured || | | || | | 2) split_lock_warn() || | | || | | 3) sysctl_sld_mitigate == 0 || | | (work = &sl_reenable) || | | || | | 4) schedule_delayed_work_on() || | | (reenable will be called || | | after 2 jiffies on CPU 0) || | | || | | 5) disable SLD for core 0 || | | || | | ------------------------- || | | || | | || 6) SPLIT LOCK occured | | || | | || 7) split_lock_warn() | | || | | || 8) sysctl_sld_mitigate == 0 | | || (work = &sl_reenable, | | || the same address as in 3) ) | | || | | 2 jiffies || 9) schedule_delayed_work_on() | | || fials because the work is in | | || the pending state since 4). | | || The work wasn't placed to the | | || workqueue. reenable won't be | | || called on CPU 2 | | || | | || 10) disable SLD for core 0 | | || | | || From now on SLD will | | || never be reenabled on core 1 | | || | | ------------------------- || | | || | | 11) enable SLD for core 0 by || | | __split_lock_reenable || | | || | If the application threads can be scheduled to all processor cores, then over time there will be only one core left, on which SLD will be enabled and split lock will be able to be detected; and on all other cores SLD will be disabled all the time. Most likely, this bug has not been noticed for so long because sysctl_sld_mitigate default value is 1, and in this case a semaphore is used that does not allow 2 different cores to have SLD disabled at the same time, that is, strictly only one work is placed in the workqueue. In order to fix the warning mode with disabled mitigation mode, delayed work has to be per-CPU. Implement it. Fixes: 727209376f49 ("x86/split_lock: Add sysctl to control the misery mode") Signed-off-by: Maksim Davydov Signed-off-by: Ingo Molnar Tested-by: Guilherme G. Piccoli Cc: Thomas Gleixner Cc: Ravi Bangoria Cc: Tom Lendacky Link: https://lore.kernel.org/r/20250115131704.132609-1-davydov-max@yandex-team.ru Signed-off-by: Gokul K Signed-off-by: mohanasv2 --- arch/x86/kernel/cpu/bus_lock.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c index 7b8b0f595d1b..48a625c0b57a 100644 --- a/arch/x86/kernel/cpu/bus_lock.c +++ b/arch/x86/kernel/cpu/bus_lock.c @@ -193,7 +193,13 @@ static void __split_lock_reenable(struct work_struct *work) { sld_update_msr(true); } -static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable); +/* + * In order for each CPU to schedule its delayed work independently of the + * others, delayed work struct must be per-CPU. This is not required when + * sysctl_sld_mitigate is enabled because of the semaphore that limits + * the number of simultaneously scheduled delayed works to 1. + */ +static DEFINE_PER_CPU(struct delayed_work, sl_reenable); /* * If a CPU goes offline with pending delayed work to re-enable split lock @@ -214,7 +220,7 @@ static int splitlock_cpu_offline(unsigned int cpu) static void split_lock_warn(unsigned long ip) { - struct delayed_work *work; + struct delayed_work *work = NULL; int cpu; if (!current->reported_split_lock) @@ -236,11 +242,17 @@ static void split_lock_warn(unsigned long ip) if (down_interruptible(&buslock_sem) == -EINTR) return; work = &sl_reenable_unlock; - } else { - work = &sl_reenable; } cpu = get_cpu(); + + if (!work) { + work = this_cpu_ptr(&sl_reenable); + /* Deferred initialization of per-CPU struct */ + if (!work->work.func) + INIT_DELAYED_WORK(work, __split_lock_reenable); + } + schedule_delayed_work_on(cpu, work, 2); /* Disable split lock detection on this CPU to make progress */ -- Gitee