From d78aa4d9b77b77832dbe9fb1f03ac0054d5eaca8 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 22 Mar 2021 13:53:23 +0000 Subject: [PATCH 1/7] x86/cpufeatures: Enumerate #DB for bus lock detection mainline inclusion from mainline-v5.12-rc4 commit f21d4d3b97a8603567e5d4250bd75e8ebbd520af category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5G10C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=f21d4d3b97a8603567e5d4250bd75e8ebbd520af Intel-SIG: commit f21d4d3 x86/cpufeatures: Enumerate #DB for bus lock detection -------------------------------- A bus lock is acquired through either a split locked access to writeback (WB) memory or any locked access to non-WB memory. This is typically >1000 cycles slower than an atomic operation within a cache line. It also disrupts performance on other cores. Some CPUs have the ability to notify the kernel by a #DB trap after a user instruction acquires a bus lock and is executed. This allows the kernel to enforce user application throttling or mitigation. Both breakpoint and bus lock can trigger the #DB trap in the same instruction and the ordering of handling them is the kernel #DB handler's choice. The CPU feature flag to be shown in /proc/cpuinfo will be "bus_lock_detect". Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Link: https://lore.kernel.org/r/20210322135325.682257-2-fenghua.yu@intel.com (cherry picked from commit f21d4d3b97a8603567e5d4250bd75e8ebbd520af) Signed-off-by: Ethan Zhao --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4da419226377..a0de1f6b8e62 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -353,6 +353,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ -- Gitee From 3f9272839c21f5b6ff3e912c45fc1b0d605c7cd6 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 22 Mar 2021 13:53:24 +0000 Subject: [PATCH 2/7] x86/traps: Handle #DB for bus lock mainline inclusion from mainline-v5.12-rc4 commit ebb1064e7c2e90b56e4d40ab154ef9796060a1c3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5G10C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=ebb1064e7c2e90b56e4d40ab154ef9796060a1c3 Intel-SIG: commit ebb1064 x86/traps: Handle #DB for bus lock -------------------------------- Bus locks degrade performance for the whole system, not just for the CPU that requested the bus lock. Two CPU features "#AC for split lock" and "#DB for bus lock" provide hooks so that the operating system may choose one of several mitigation strategies. bus lock feature to cover additional situations with new options to mitigate. split_lock_detect= #AC for split lock #DB for bus lock off Do nothing Do nothing warn Kernel OOPs Warn once per task and Warn once per task and and continues to run. disable future checking When both features are supported, warn in #AC fatal Kernel OOPs Send SIGBUS to user. Send SIGBUS to user When both features are supported, fatal in #AC ratelimit:N Do nothing Limit bus lock rate to N per second in the current non-root user. Default option is "warn". Hardware only generates #DB for bus lock detect when CPL>0 to avoid nested #DB from multiple bus locks while the first #DB is being handled. So no need to handle #DB for bus lock detected in the kernel. while #AC for split lock is enabled by split lock detection bit 29 in TEST_CTRL MSR. Both breakpoint and bus lock in the same instruction can trigger one #DB. The bus lock is handled before the breakpoint in the #DB handler. Delivery of #DB for bus lock in userspace clears DR6[11], which is set by the #DB handler right after reading DR6. Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Link: https://lore.kernel.org/r/20210322135325.682257-3-fenghua.yu@intel.com (cherry picked from commit ebb1064e7c2e90b56e4d40ab154ef9796060a1c3) Signed-off-by: Ethan Zhao --- arch/x86/include/asm/cpu.h | 7 +- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/uapi/asm/debugreg.h | 1 + arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/intel.c | 113 ++++++++++++++++++++++----- arch/x86/kernel/traps.c | 4 + 6 files changed, 106 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index da78ccbd493b..0d7fc0e2bfc9 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -41,12 +41,13 @@ unsigned int x86_family(unsigned int sig); unsigned int x86_model(unsigned int sig); unsigned int x86_stepping(unsigned int sig); #ifdef CONFIG_CPU_SUP_INTEL -extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c); +extern void __init sld_setup(struct cpuinfo_x86 *c); extern void switch_to_sld(unsigned long tifn); extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); extern bool handle_guest_split_lock(unsigned long ip); +extern void handle_bus_lock(struct pt_regs *regs); #else -static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {} +static inline void __init sld_setup(struct cpuinfo_x86 *c) {} static inline void switch_to_sld(unsigned long tifn) {} static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) { @@ -57,6 +58,8 @@ static inline bool handle_guest_split_lock(unsigned long ip) { return false; } + +static inline void handle_bus_lock(struct pt_regs *regs) {} #endif #ifdef CONFIG_IA32_FEAT_CTL void init_ia32_feat_ctl(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2b0af5eb5131..e2103a35b45a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -289,6 +289,7 @@ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) #define DEBUGCTLMSR_BTINT (1UL << 8) diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h index d95d080b30e3..0007ba077c0c 100644 --- a/arch/x86/include/uapi/asm/debugreg.h +++ b/arch/x86/include/uapi/asm/debugreg.h @@ -24,6 +24,7 @@ #define DR_TRAP3 (0x8) /* db3 */ #define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) +#define DR_BUS_LOCK (0x800) /* bus_lock */ #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4917c2698ac1..b06254f8643c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1374,7 +1374,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_set_bug_bits(c); - cpu_set_core_cap_bits(c); + sld_setup(c); fpu__init_system(c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 816fdbec795a..dfda4f42ec9b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -43,9 +43,9 @@ enum split_lock_detect_state { }; /* - * Default to sld_off because most systems do not support split lock detection - * split_lock_setup() will switch this to sld_warn on systems that support - * split lock detect, unless there is a command line override. + * Default to sld_off because most systems do not support split lock detection. + * sld_state_setup() will switch this to sld_warn on systems that support + * split lock/bus lock detect, unless there is a command line override. */ static enum split_lock_detect_state sld_state __ro_after_init = sld_off; static u64 msr_test_ctrl_cache __ro_after_init; @@ -602,6 +602,7 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c) } static void split_lock_init(void); +static void bus_lock_init(void); static void init_intel(struct cpuinfo_x86 *c) { @@ -719,6 +720,9 @@ static void init_intel(struct cpuinfo_x86 *c) tsx_disable(); split_lock_init(); + bus_lock_init(); + + intel_init_thermal(c); } #ifdef CONFIG_X86_32 @@ -1017,16 +1021,15 @@ static bool split_lock_verify_msr(bool on) return ctrl == tmp; } -static void __init split_lock_setup(void) +static void __init sld_state_setup(void) { enum split_lock_detect_state state = sld_warn; char arg[20]; int i, ret; - if (!split_lock_verify_msr(false)) { - pr_info("MSR access failed: Disabled\n"); + if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) return; - } ret = cmdline_find_option(boot_command_line, "split_lock_detect", arg, sizeof(arg)); @@ -1038,17 +1041,14 @@ static void __init split_lock_setup(void) } } } + sld_state = state; +} - switch (state) { - case sld_off: - pr_info("disabled\n"); +static void __init __split_lock_setup(void) +{ + if (!split_lock_verify_msr(false)) { + pr_info("MSR access failed: Disabled\n"); return; - case sld_warn: - pr_info("warning about user-space split_locks\n"); - break; - case sld_fatal: - pr_info("sending SIGBUS on user-space split_locks\n"); - break; } rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); @@ -1058,7 +1058,9 @@ static void __init split_lock_setup(void) return; } - sld_state = state; + /* Restore the MSR to its cached value. */ + wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); + setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT); } @@ -1115,6 +1117,29 @@ bool handle_guest_split_lock(unsigned long ip) } EXPORT_SYMBOL_GPL(handle_guest_split_lock); +static void bus_lock_init(void) +{ + u64 val; + + /* + * Warn and fatal are handled by #AC for split lock if #AC for + * split lock is supported. + */ + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) || + (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + (sld_state == sld_warn || sld_state == sld_fatal)) || + sld_state == sld_off) + return; + + /* + * Enable #DB for bus lock. All bus locks are handled in #DB except + * split locks are handled in #AC in the fatal case. + */ + rdmsrl(MSR_IA32_DEBUGCTLMSR, val); + val |= DEBUGCTLMSR_BUS_LOCK_DETECT; + wrmsrl(MSR_IA32_DEBUGCTLMSR, val); +} + bool handle_user_split_lock(struct pt_regs *regs, long error_code) { if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) @@ -1123,6 +1148,21 @@ bool handle_user_split_lock(struct pt_regs *regs, long error_code) return true; } +void handle_bus_lock(struct pt_regs *regs) +{ + switch (sld_state) { + case sld_off: + break; + case sld_warn: + pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n", + current->comm, current->pid, regs->ip); + break; + case sld_fatal: + force_sig_fault(SIGBUS, BUS_ADRALN, NULL); + break; + } +} + /* * This function is called only when switching between tasks with * different split-lock detection modes. It sets the MSR for the @@ -1163,7 +1203,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { {} }; -void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) +static void __init split_lock_setup(struct cpuinfo_x86 *c) { const struct x86_cpu_id *m; u64 ia32_core_caps; @@ -1190,5 +1230,40 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) } cpu_model_supports_sld = true; - split_lock_setup(); + __split_lock_setup(); +} + +static void sld_state_show(void) +{ + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && + !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) + return; + + switch (sld_state) { + case sld_off: + pr_info("disabled\n"); + break; + case sld_warn: + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) + pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n"); + else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + pr_info("#DB: warning on user-space bus_locks\n"); + break; + case sld_fatal: + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { + pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n"); + } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { + pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n", + boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ? + " from non-WB" : ""); + } + break; + } +} + +void __init sld_setup(struct cpuinfo_x86 *c) +{ + split_lock_setup(c); + sld_state_setup(); + sld_state_show(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 696ec85164e6..68fa31d334e3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1023,6 +1023,10 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, goto out_irq; } + /* #DB for bus lock can only be triggered from userspace. */ + if (dr6 & DR_BUS_LOCK) + handle_bus_lock(regs); + /* Add the virtual_dr6 bits for signals. */ dr6 |= current->thread.virtual_dr6; if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp) -- Gitee From 58276802a77ed31f164b4a97d2204a7d6ee2c4fe Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 22 Mar 2021 13:53:25 +0000 Subject: [PATCH 3/7] Documentation/admin-guide: Change doc for split_lock_detect parameter mainline inclusion from mainline-v5.12-rc4 commit ebca17707e38f2050b188d837bd4646b29a1b0c2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5G10C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=ebca17707e38f2050b188d837bd4646b29a1b0c2 Intel-SIG: commit ebca177 Documentation/admin-guide: Change doc for split_lock_ detect parameter -------------------------------- Since #DB for bus lock detect changes the split_lock_detect parameter, update the documentation for the changes. Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210322135325.682257-4-fenghua.yu@intel.com (cherry picked from commit ebca17707e38f2050b188d837bd4646b29a1b0c2) Signed-off-by: Ethan Zhao --- .../admin-guide/kernel-parameters.txt | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2b04cf8fbab4..fbbe09ac2f86 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5275,27 +5275,37 @@ spia_peddr= split_lock_detect= - [X86] Enable split lock detection + [X86] Enable split lock detection or bus lock detection When enabled (and if hardware support is present), atomic instructions that access data across cache line - boundaries will result in an alignment check exception. + boundaries will result in an alignment check exception + for split lock detection or a debug exception for + bus lock detection. off - not enabled - warn - the kernel will emit rate limited warnings + warn - the kernel will emit rate-limited warnings about applications triggering the #AC - exception. This mode is the default on CPUs - that supports split lock detection. + exception or the #DB exception. This mode is + the default on CPUs that support split lock + detection or bus lock detection. Default + behavior is by #AC if both features are + enabled in hardware. fatal - the kernel will send SIGBUS to applications - that trigger the #AC exception. + that trigger the #AC exception or the #DB + exception. Default behavior is by #AC if + both features are enabled in hardware. If an #AC exception is hit in the kernel or in firmware (i.e. not while executing in user mode) the kernel will oops in either "warn" or "fatal" mode. + #DB exception for bus lock is triggered only when + CPL > 0. + srbds= [X86,INTEL] Control the Special Register Buffer Data Sampling (SRBDS) mitigation. -- Gitee From 9fbda1057252b584a3b24a507740aac38fe41c7d Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 19 Apr 2021 21:49:55 +0000 Subject: [PATCH 4/7] Documentation/x86: Add buslock.rst mainline inclusion from mainline-v5.12-rc4 commit 1897907cca5aa22cdfcdb7fb8f0644a6add0877d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5G10C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=1897907cca5aa22cdfcdb7fb8f0644a6add0877d Intel-SIG: commit 1897907 Documentation/x86: Add buslock.rst -------------------------------- Add buslock.rst to explain bus lock problem and how to detect and handle it. [ tglx: Included it into index.rst and added the missing include ... ] Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Link: https://lore.kernel.org/r/20210419214958.4035512-2-fenghua.yu@intel.com (cherry picked from commit 1897907cca5aa22cdfcdb7fb8f0644a6add0877d) Signed-off-by: Ethan Zhao --- Documentation/x86/buslock.rst | 104 ++++++++++++++++++++++++++++++++++ Documentation/x86/index.rst | 1 + 2 files changed, 105 insertions(+) create mode 100644 Documentation/x86/buslock.rst diff --git a/Documentation/x86/buslock.rst b/Documentation/x86/buslock.rst new file mode 100644 index 000000000000..159ff6ba830e --- /dev/null +++ b/Documentation/x86/buslock.rst @@ -0,0 +1,104 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: + +=============================== +Bus lock detection and handling +=============================== + +:Copyright: |copy| 2021 Intel Corporation +:Authors: - Fenghua Yu + - Tony Luck + +Problem +======= + +A split lock is any atomic operation whose operand crosses two cache lines. +Since the operand spans two cache lines and the operation must be atomic, +the system locks the bus while the CPU accesses the two cache lines. + +A bus lock is acquired through either split locked access to writeback (WB) +memory or any locked access to non-WB memory. This is typically thousands of +cycles slower than an atomic operation within a cache line. It also disrupts +performance on other cores and brings the whole system to its knees. + +Detection +========= + +Intel processors may support either or both of the following hardware +mechanisms to detect split locks and bus locks. + +#AC exception for split lock detection +-------------------------------------- + +Beginning with the Tremont Atom CPU split lock operations may raise an +Alignment Check (#AC) exception when a split lock operation is attemped. + +#DB exception for bus lock detection +------------------------------------ + +Some CPUs have the ability to notify the kernel by an #DB trap after a user +instruction acquires a bus lock and is executed. This allows the kernel to +terminate the application or to enforce throttling. + +Software handling +================= + +The kernel #AC and #DB handlers handle bus lock based on the kernel +parameter "split_lock_detect". Here is a summary of different options: + ++------------------+----------------------------+-----------------------+ +|split_lock_detect=|#AC for split lock |#DB for bus lock | ++------------------+----------------------------+-----------------------+ +|off |Do nothing |Do nothing | ++------------------+----------------------------+-----------------------+ +|warn |Kernel OOPs |Warn once per task and | +|(default) |Warn once per task and |and continues to run. | +| |disable future checking | | +| |When both features are | | +| |supported, warn in #AC | | ++------------------+----------------------------+-----------------------+ +|fatal |Kernel OOPs |Send SIGBUS to user. | +| |Send SIGBUS to user | | +| |When both features are | | +| |supported, fatal in #AC | | ++------------------+----------------------------+-----------------------+ + +Usages +====== + +Detecting and handling bus lock may find usages in various areas: + +It is critical for real time system designers who build consolidated real +time systems. These systems run hard real time code on some cores and run +"untrusted" user processes on other cores. The hard real time cannot afford +to have any bus lock from the untrusted processes to hurt real time +performance. To date the designers have been unable to deploy these +solutions as they have no way to prevent the "untrusted" user code from +generating split lock and bus lock to block the hard real time code to +access memory during bus locking. + +It's also useful for general computing to prevent guests or user +applications from slowing down the overall system by executing instructions +with bus lock. + + +Guidance +======== +off +--- + +Disable checking for split lock and bus lock. This option can be useful if +there are legacy applications that trigger these events at a low rate so +that mitigation is not needed. + +warn +---- + +A warning is emitted when a bus lock is detected which allows to identify +the offending application. This is the default behavior. + +fatal +----- + +In this case, the bus lock is not tolerated and the process is killed. diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index 647a570d4931..295a59cbaa69 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -29,6 +29,7 @@ x86-specific Documentation microcode resctrl_ui tsx_async_abort + buslock usb-legacy-support i386/index x86_64/index -- Gitee From 25f4c3c5f204a006fbcc003054c3cc08932b6e0c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 19 Apr 2021 21:49:57 +0000 Subject: [PATCH 5/7] Documentation/admin-guide: Add bus lock ratelimit mainline inclusion from mainline-v5.12-rc4 commit 9d839c280b64817345c2fa462c0027a9bd742361 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5G10C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=9d839c280b64817345c2fa462c0027a9bd742361 Intel-SIG: commit 9d839c Documentation/admin-guide: Add bus lock ratelimit -------------------------------- Since bus lock rate limit changes the split_lock_detect parameter, update the documentation for the change. Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Link: https://lore.kernel.org/r/20210419214958.4035512-4-fenghua.yu@intel.com (cherry picked from commit 9d839c280b64817345c2fa462c0027a9bd742361) Signed-off-by: Ethan Zhao --- Documentation/admin-guide/kernel-parameters.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fbbe09ac2f86..4241caac8b83 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5298,6 +5298,14 @@ exception. Default behavior is by #AC if both features are enabled in hardware. + ratelimit:N - + Set system wide rate limit to N bus locks + per second for bus lock detection. + 0 < N <= 1000. + + N/A for split lock detection. + + If an #AC exception is hit in the kernel or in firmware (i.e. not while executing in user mode) the kernel will oops in either "warn" or "fatal" -- Gitee From afc7742cc862c618ea92366a49b9437e51ad387a Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 19 Apr 2021 21:49:56 +0000 Subject: [PATCH 6/7] x86/bus_lock: Set rate limit for bus lock mainline inclusion from mainline-v5.12-rc4 commit ef4ae6e4413159d2329a172c12e9274e2cb0a3a8 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5G10C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=ef4ae6e4413159d2329a172c12e9274e2cb0a3a8 Intel-SIG: commit ef4ae6e x86/bus_lock: Set rate limit for bus lock_ -------------------------------- A bus lock can be thousands of cycles slower than atomic operation within one cache line. It also disrupts performance on other cores. Malicious users can generate multiple bus locks to degrade the whole system performance. The current mitigation is to kill the offending process, but for certain scenarios it's desired to identify and throttle the offending application. Add a system wide rate limit for bus locks. When the system detects bus locks at a rate higher than N/sec (where N can be set by the kernel boot argument in the range [1..1000]) any task triggering a bus lock will be forced to sleep for at least 20ms until the overall system rate of bus locks drops below the threshold. Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Link: https://lore.kernel.org/r/20210419214958.4035512-3-fenghua.yu@intel.com (cherry picked from commit ef4ae6e4413159d2329a172c12e9274e2cb0a3a8) Signed-off-by: Ethan Zhao --- arch/x86/kernel/cpu/intel.c | 42 +++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index dfda4f42ec9b..1780db8192a0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,7 @@ enum split_lock_detect_state { sld_off = 0, sld_warn, sld_fatal, + sld_ratelimit, }; /* @@ -996,13 +998,30 @@ static const struct { { "off", sld_off }, { "warn", sld_warn }, { "fatal", sld_fatal }, + { "ratelimit:", sld_ratelimit }, }; +static struct ratelimit_state bld_ratelimit; + static inline bool match_option(const char *arg, int arglen, const char *opt) { - int len = strlen(opt); + int len = strlen(opt), ratelimit; + + if (strncmp(arg, opt, len)) + return false; + + /* + * Min ratelimit is 1 bus lock/sec. + * Max ratelimit is 1000 bus locks/sec. + */ + if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 && + ratelimit > 0 && ratelimit <= 1000) { + ratelimit_state_init(&bld_ratelimit, HZ, ratelimit); + ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE); + return true; + } - return len == arglen && !strncmp(arg, opt, len); + return len == arglen; } static bool split_lock_verify_msr(bool on) @@ -1081,6 +1100,15 @@ static void sld_update_msr(bool on) static void split_lock_init(void) { + /* + * #DB for bus lock handles ratelimit and #AC for split lock is + * disabled. + */ + if (sld_state == sld_ratelimit) { + split_lock_verify_msr(false); + return; + } + if (cpu_model_supports_sld) split_lock_verify_msr(sld_state != sld_off); } @@ -1153,6 +1181,12 @@ void handle_bus_lock(struct pt_regs *regs) switch (sld_state) { case sld_off: break; + case sld_ratelimit: + /* Enforce no more than bld_ratelimit bus locks/sec. */ + while (!__ratelimit(&bld_ratelimit)) + msleep(20); + /* Warn on the bus lock. */ + fallthrough; case sld_warn: pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n", current->comm, current->pid, regs->ip); @@ -1258,6 +1292,10 @@ static void sld_state_show(void) " from non-WB" : ""); } break; + case sld_ratelimit: + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst); + break; } } -- Gitee From a7a9a47d38a5e6d0e32e9167e51986b645502532 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 10 Mar 2022 12:48:53 -0800 Subject: [PATCH 7/7] x86/split_lock: Make life miserable for split lockers mainline inclusion from mainline-v5.18-rc4 commit b041b525dab95352fbd666b14dc73ab898df465f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5G10C CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ commit/?id=b041b525dab95352fbd666b14dc73ab898df465f Intel-SIG: commit b041b52 x86/split_lock: Make life miserable for split lockers -------------------------------- In https://lore.kernel.org/all/87y22uujkm.ffs@tglx/ Thomas said: Its's simply wishful thinking that stuff gets fixed because of a WARN_ONCE(). This has never worked. The only thing which works is to make stuff fail hard or slow it down in a way which makes it annoying enough to users to complain. He was talking about WBINVD. But it made me think about how we use the split lock detection feature in Linux. Existing code has three options for applications: 1) Don't enable split lock detection (allow arbitrary split locks) 2) Warn once when a process uses split lock, but let the process keep running with split lock detection disabled 3) Kill process that use split locks Option 2 falls into the "wishful thinking" territory that Thomas warns does nothing. But option 3 might not be viable in a situation with legacy applications that need to run. Hence make option 2 much stricter to "slow it down in a way which makes it annoying". Primary reason for this change is to provide better quality of service to the rest of the applications running on the system. Internal testing shows that even with many processes splitting locks, performance for the rest of the system is much more responsive. The new "warn" mode operates like this. When an application tries to execute a bus lock the #AC handler. 1) Delays (interruptibly) 10 ms before moving to next step. 2) Blocks (interruptibly) until it can get the semaphore If interrupted, just return. Assume the signal will either kill the task, or direct execution away from the instruction that is trying to get the bus lock. 3) Disables split lock detection for the current core 4) Schedules a work queue to re-enable split lock detect in 2 jiffies 5) Returns The work queue that re-enables split lock detection also releases the semaphore. There is a corner case where a CPU may be taken offline while split lock detection is disabled. A CPU hotplug handler handles this case. Old behaviour was to only print the split lock warning on the first occurrence of a split lock from a task. Preserve that by adding a flag to the task structure that suppresses subsequent split lock messages from that task. Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220310204854.31752-2-tony.luck@intel.com (cherry picked from commit b041b525dab95352fbd666b14dc73ab898df465f) Signed-off-by: Ethan Zhao --- arch/x86/kernel/cpu/intel.c | 63 +++++++++++++++++++++++++++++++------ include/linux/sched.h | 3 ++ kernel/fork.c | 5 +++ 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1780db8192a0..87f07093df53 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -7,10 +7,13 @@ #include #include #include +#include #include #include #include +#include #include +#include #include #include @@ -1003,6 +1006,8 @@ static const struct { static struct ratelimit_state bld_ratelimit; +static DEFINE_SEMAPHORE(buslock_sem); + static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt), ratelimit; @@ -1113,18 +1118,52 @@ static void split_lock_init(void) split_lock_verify_msr(sld_state != sld_off); } +static void __split_lock_reenable(struct work_struct *work) +{ + sld_update_msr(true); + up(&buslock_sem); +} + +/* + * If a CPU goes offline with pending delayed work to re-enable split lock + * detection then the delayed work will be executed on some other CPU. That + * handles releasing the buslock_sem, but because it executes on a + * different CPU probably won't re-enable split lock detection. This is a + * problem on HT systems since the sibling CPU on the same core may then be + * left running with split lock detection disabled. + * + * Unconditionally re-enable detection here. + */ +static int splitlock_cpu_offline(unsigned int cpu) +{ + sld_update_msr(true); + + return 0; +} + +static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable); + static void split_lock_warn(unsigned long ip) { - pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", - current->comm, current->pid, ip); + int cpu; - /* - * Disable the split lock detection for this task so it can make - * progress and set TIF_SLD so the detection is re-enabled via - * switch_to_sld() when the task is scheduled out. - */ + if (!current->reported_split_lock) + pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", + current->comm, current->pid, ip); + current->reported_split_lock = 1; + + /* misery factor #1, sleep 10ms before trying to execute split lock */ + if (msleep_interruptible(10) > 0) + return; + /* Misery factor #2, only allow one buslocked disabled core at a time */ + if (down_interruptible(&buslock_sem) == -EINTR) + return; + cpu = get_cpu(); + schedule_delayed_work_on(cpu, &split_lock_reenable, 2); + + /* Disable split lock detection on this CPU to make progress */ sld_update_msr(false); - set_tsk_thread_flag(current, TIF_SLD); + put_cpu(); } bool handle_guest_split_lock(unsigned long ip) @@ -1278,10 +1317,14 @@ static void sld_state_show(void) pr_info("disabled\n"); break; case sld_warn: - if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n"); - else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "x86/splitlock", NULL, splitlock_cpu_offline) < 0) + pr_warn("No splitlock CPU offline handler\n"); + } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { pr_info("#DB: warning on user-space bus_locks\n"); + } break; case sld_fatal: if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 47f462040f4d..764b2a927ef9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -865,6 +865,9 @@ struct task_struct { /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif +#ifdef CONFIG_CPU_SUP_INTEL + unsigned reported_split_lock:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/kernel/fork.c b/kernel/fork.c index 0fb86b65ae60..7a2395ade1d4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -946,6 +946,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) #ifdef CONFIG_MEMCG tsk->active_memcg = NULL; #endif + +#ifdef CONFIG_CPU_SUP_INTEL + tsk->reported_split_lock = 0; +#endif + return tsk; free_stack: -- Gitee