diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 8de11532ca763069db8794e7fde81b065de54190..d629a2348498095b5bfbb0aee3da4bbd04873cdc 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -19,3 +19,4 @@ are configurable at compile, boot or run time. processor_mmio_stale_data.rst cross-thread-rsb.rst gather_data_sampling.rst + srso diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst new file mode 100644 index 0000000000000000000000000000000000000000..f79cb11b080f67733c4a4d08699a991f95aeb015 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/srso.rst @@ -0,0 +1,133 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Speculative Return Stack Overflow (SRSO) +======================================== + +This is a mitigation for the speculative return stack overflow (SRSO) +vulnerability found on AMD processors. The mechanism is by now the well +known scenario of poisoning CPU functional units - the Branch Target +Buffer (BTB) and Return Address Predictor (RAP) in this case - and then +tricking the elevated privilege domain (the kernel) into leaking +sensitive data. + +AMD CPUs predict RET instructions using a Return Address Predictor (aka +Return Address Stack/Return Stack Buffer). In some cases, a non-architectural +CALL instruction (i.e., an instruction predicted to be a CALL but is +not actually a CALL) can create an entry in the RAP which may be used +to predict the target of a subsequent RET instruction. + +The specific circumstances that lead to this varies by microarchitecture +but the concern is that an attacker can mis-train the CPU BTB to predict +non-architectural CALL instructions in kernel space and use this to +control the speculative target of a subsequent kernel RET, potentially +leading to information disclosure via a speculative side-channel. + +The issue is tracked under CVE-2023-20569. + +Affected processors +------------------- + +AMD Zen, generations 1-4. That is, all families 0x17 and 0x19. Older +processors have not been investigated. + +System information and options +------------------------------ + +First of all, it is required that the latest microcode be loaded for +mitigations to be effective. + +The sysfs file showing SRSO mitigation status is: + + /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow + +The possible values in this file are: + + - 'Not affected' The processor is not vulnerable + + - 'Vulnerable: no microcode' The processor is vulnerable, no + microcode extending IBPB functionality + to address the vulnerability has been + applied. + + - 'Mitigation: microcode' Extended IBPB functionality microcode + patch has been applied. It does not + address User->Kernel and Guest->Host + transitions protection but it does + address User->User and VM->VM attack + vectors. + + (spec_rstack_overflow=microcode) + + - 'Mitigation: safe RET' Software-only mitigation. It complements + the extended IBPB microcode patch + functionality by addressing User->Kernel + and Guest->Host transitions protection. + + Selected by default or by + spec_rstack_overflow=safe-ret + + - 'Mitigation: IBPB' Similar protection as "safe RET" above + but employs an IBPB barrier on privilege + domain crossings (User->Kernel, + Guest->Host). + + (spec_rstack_overflow=ibpb) + + - 'Mitigation: IBPB on VMEXIT' Mitigation addressing the cloud provider + scenario - the Guest->Host transitions + only. + + (spec_rstack_overflow=ibpb-vmexit) + +In order to exploit vulnerability, an attacker needs to: + + - gain local access on the machine + + - break kASLR + + - find gadgets in the running kernel in order to use them in the exploit + + - potentially create and pin an additional workload on the sibling + thread, depending on the microarchitecture (not necessary on fam 0x19) + + - run the exploit + +Considering the performance implications of each mitigation type, the +default one is 'Mitigation: safe RET' which should take care of most +attack vectors, including the local User->Kernel one. + +As always, the user is advised to keep her/his system up-to-date by +applying software updates regularly. + +The default setting will be reevaluated when needed and especially when +new attack vectors appear. + +As one can surmise, 'Mitigation: safe RET' does come at the cost of some +performance depending on the workload. If one trusts her/his userspace +and does not want to suffer the performance impact, one can always +disable the mitigation with spec_rstack_overflow=off. + +Similarly, 'Mitigation: IBPB' is another full mitigation type employing +an indrect branch prediction barrier after having applied the required +microcode patch for one's system. This mitigation comes also at +a performance cost. + +Mitigation: safe RET +-------------------- + +The mitigation works by ensuring all RET instructions speculate to +a controlled location, similar to how speculation is controlled in the +retpoline sequence. To accomplish this, the __x86_return_thunk forces +the CPU to mispredict every function return using a 'safe return' +sequence. + +To ensure the safety of this mitigation, the kernel must ensure that the +safe return sequence is itself free from attacker interference. In Zen3 +and Zen4, this is accomplished by creating a BTB alias between the +untraining function srso_alias_untrain_ret() and the safe return +function srso_alias_safe_ret() which results in evicting a potentially +poisoned BTB entry and using that safe one for all function returns. + +In older Zen1 and Zen2, this is accomplished using a reinterpretation +technique similar to Retbleed one: srso_untrain_ret() and +srso_safe_ret(). diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 93bab61aaf3834247c5b86ec57421c052711068d..d8582d7e59848d70337b03dc3e9794e847eb0bf0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5325,6 +5325,17 @@ Not specifying this option is equivalent to spectre_v2_user=auto. + spec_rstack_overflow= + [X86] Control RAS overflow mitigation on AMD Zen CPUs + + off - Disable mitigation + microcode - Enable microcode mitigation only + safe-ret - Enable sw-only safe RET mitigation (default) + ibpb - Enable mitigation by issuing IBPB on + kernel entry + ibpb-vmexit - Issue IBPB only on VMEXIT + (cloud-specific mitigation) + spec_store_bypass_disable= [HW] Control Speculative Store Bypass (SSB) Disable mitigation (Speculative Store Bypass vulnerability) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0baa452efb5e8fd460eb901d87bbb426bc0d00bc..12a6ff5814c6c8f2eaef4efec485d5d5ddbee1c1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2537,6 +2537,13 @@ config CPU_IBRS_ENTRY This mitigates both spectre_v2 and retbleed at great cost to performance. +config CPU_SRSO + bool "Mitigate speculative RAS overflow on AMD" + depends on CPU_SUP_AMD && X86_64 && RETHUNK + default y + help + Enable the SRSO mitigation needed on AMD Zen1-4 machines. + config SLS bool "Mitigate Straight-Line-Speculation" depends on CC_HAS_SLS && X86_64 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f4cbc01c0bc46e0ea71e2d4065e8e77ed6ab60fd..39cbea8f06dad24d7c99734488fd82f7aeccb32b 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -47,10 +47,14 @@ extern const char * const x86_power_flags[32]; * In order to save room, we index into this array by doing * X86_BUG_ - NCAPINTS*32. */ -extern const char * const x86_bug_flags[NBUGINTS*32]; +extern const char * const x86_bug_flags[(NBUGINTS+NEXTBUGINTS)*32]; + +#define IS_EXT_BUGBIT(bit) ((bit>>5) >= (NCAPINTS+NBUGINTS)) #define test_cpu_cap(c, bit) \ - arch_test_bit(bit, (unsigned long *)((c)->x86_capability)) + (IS_EXT_BUGBIT(bit) ? arch_test_bit((bit) - ((NCAPINTS+NBUGINTS)*32), \ + (unsigned long *)((c)->x86_ext_capability)) : \ + arch_test_bit(bit, (unsigned long *)((c)->x86_capability))) /* * There are 32 bits/features in each mask word. The high bits @@ -137,14 +141,25 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) -#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) +#define set_cpu_cap(c, bit) do { \ + if (IS_EXT_BUGBIT(bit)) \ + set_bit(bit - ((NCAPINTS+NBUGINTS))*32, \ + (unsigned long *)((c)->x86_ext_capability)); \ + else \ + set_bit(bit, (unsigned long *)((c)->x86_capability)); \ +} while (0) + extern void setup_clear_cpu_cap(unsigned int bit); extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_cap(bit) do { \ set_cpu_cap(&boot_cpu_data, bit); \ - set_bit(bit, (unsigned long *)cpu_caps_set); \ + if (IS_EXT_BUGBIT(bit)) \ + set_bit(bit - ((NCAPINTS+NBUGINTS))*32, \ + (unsigned long *)&cpu_caps_set[NCAPINTS+NBUGINTS]); \ + else \ + set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b324dacac04f27914a30c252491c516d571e26bd..a0621cae5ac258b1141e64e3a2b21103daec2f33 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,9 +13,9 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 19 /* N 32-bit words worth of info */ +#define NCAPINTS 19 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ - +#define NEXTBUGINTS 1 /* N 32-bit extended bug flags */ /* * Note: If the comment begins with a quoted string, that string is used * in /proc/cpuinfo instead of the macro name. If the string is "", @@ -323,6 +323,10 @@ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -400,6 +404,11 @@ #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +/* AMD-defined SRSO vulnerability features, CPUID level 0x80000021 (EAX), word 20 */ +#define X86_FEATURE_SBPB (17*32+24) +#define X86_FEATURE_IBPB_BRTYPE (17*32+25) +#define X86_FEATURE_SRSO_NO (17*32+26) + /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ #define X86_FEATURE_SME (17*32+ 27) /* AMD Secure Memory Encryption */ #define X86_FEATURE_SEV (17*32+ 28) /* AMD Secure Encrypted Virtualization */ @@ -475,4 +484,6 @@ #define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ #define X86_BUG_DIV0 X86_BUG(31) /* AMD DIV0 speculation bug */ +/* BUG word 2 */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec39d6d9967bd70adbc90ceb19bb12ac80b7d3f3..62e8681d0e3f754c42c8eab076e0167977a76d38 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -56,6 +56,7 @@ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 020f5ad84ff7adc0dbb86a000617dec5c14f120e..181cda568eff72d76eff2171100a323548141adb 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -112,7 +112,7 @@ * eventually turn into it's own annotation. */ .macro ANNOTATE_UNRET_END -#ifdef CONFIG_DEBUG_ENTRY +#if (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) ANNOTATE_RETPOLINE_SAFE nop #endif @@ -156,9 +156,9 @@ .endm #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#define CALL_UNTRAIN_RET "call entry_untrain_ret" #else -#define CALL_ZEN_UNTRAIN_RET "" +#define CALL_UNTRAIN_RET "" #endif /* @@ -166,17 +166,18 @@ * return thunk isn't mapped into the userspace tables (then again, AMD * typically has NO_MELTDOWN). * - * While zen_untrain_ret() doesn't clobber anything but requires stack, + * While retbleed_untrain_ret() doesn't clobber anything but requires stack, * entry_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point * where we have a stack but before any RET instruction. */ .macro UNTRAIN_RET -#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ + defined(CONFIG_CPU_SRSO) ANNOTATE_UNRET_END ALTERNATIVE_2 "", \ - CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm @@ -189,8 +190,21 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" +#ifdef CONFIG_RETHUNK extern void __x86_return_thunk(void); -extern void zen_untrain_ret(void); +#else +static inline void __x86_return_thunk(void) {} +#endif + +extern void retbleed_return_thunk(void); +extern void srso_return_thunk(void); +extern void srso_alias_return_thunk(void); + +extern void retbleed_untrain_ret(void); +extern void srso_untrain_ret(void); +extern void srso_alias_untrain_ret(void); + +extern void entry_untrain_ret(void); extern void entry_ibpb(void); #ifdef CONFIG_RETPOLINE @@ -307,11 +321,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) : "memory"); } +extern u64 x86_pred_cmd; + static inline void indirect_branch_prediction_barrier(void) { - u64 val = PRED_CMD_IBPB; - - alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); + alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB); } /* The Intel SPEC CTRL MSR base value cache */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 479a5cc39879aa22ebd3410ab3760611fd555a01..33cf8d30e19e4a078336f595ff7886ddfeacdc33 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -141,6 +141,9 @@ struct cpuinfo_x86 { /* Address space bits used by the cache internally */ u8 x86_cache_bits; unsigned initialized : 1; +#ifndef __GENKSYMS__ + __u32 x86_ext_capability[NEXTBUGINTS]; +#endif } __randomize_layout; struct extra_cpuinfo_x86 { @@ -182,7 +185,7 @@ extern struct cpuinfo_x86 new_cpu_data; extern struct extra_cpuinfo_x86 extra_boot_cpu_data; extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; -extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS + NEXTBUGINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); @@ -835,10 +838,12 @@ extern u16 get_llc_id(unsigned int cpu); extern u16 amd_get_nb_id(int cpu); extern u32 amd_get_nodes_per_socket(void); extern void amd_clear_divider(void); +extern bool cpu_has_ibpb_brtype_microcode(void); #else static inline u16 amd_get_nb_id(int cpu) { return 0; } static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline void amd_clear_divider(void) { } +static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; } #endif static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 82e9fd11b36465054800dd72df5374fe08247dbb..293c8d4a43285ba0a5f72d0e351f96265f0ac0aa 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -441,7 +441,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; BUG_ON(a->instrlen > sizeof(insn_buff)); - BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32); + BUG_ON(feature >= (NCAPINTS + NBUGINTS + NEXTBUGINTS) * 32); /* * Patch if either: diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6df68f2afd20057c8c807be05840c3e63440ccc0..53cd34ac89c458a7b7f8cabf8bb0ac1cfa217f0f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1283,6 +1283,25 @@ void set_dr_addr_mask(unsigned long mask, int dr) } } +bool cpu_has_ibpb_brtype_microcode(void) +{ + switch (boot_cpu_data.x86) { + /* Zen1/2 IBPB flushes branch type predictions too. */ + case 0x17: + return boot_cpu_has(X86_FEATURE_AMD_IBPB); + case 0x19: + /* Poke the MSR bit on Zen3/4 to check its presence. */ + if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) { + setup_force_cpu_cap(X86_FEATURE_SBPB); + return true; + } else { + return false; + } + default: + return false; + } +} + static void zenbleed_check_cpu(void *unused) { struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b142595a01d3959099da36eb867fed47cbf472b8..8bbc88149f0cc1efbf5fb3a02a628a65984bf621 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -49,6 +49,7 @@ static void __init taa_select_mitigation(void); static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init gds_select_mitigation(void); +static void __init srso_select_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -58,8 +59,13 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); DEFINE_PER_CPU(u64, x86_spec_ctrl_current); EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); +u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; +EXPORT_SYMBOL_GPL(x86_pred_cmd); + static DEFINE_MUTEX(spec_ctrl_mutex); +void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; + /* * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). @@ -152,6 +158,12 @@ void __init check_bugs(void) l1tf_select_mitigation(); md_clear_select_mitigation(); srbds_select_mitigation(); + + /* + * srso_select_mitigation() depends and must run after + * retbleed_select_mitigation(). + */ + srso_select_mitigation(); gds_select_mitigation(); arch_smt_update(); @@ -1005,6 +1017,9 @@ static void __init retbleed_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); + if (IS_ENABLED(CONFIG_RETHUNK)) + x86_return_thunk = retbleed_return_thunk; + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) pr_err(RETBLEED_UNTRAIN_MSG); @@ -2267,6 +2282,170 @@ static int __init l1tf_cmdline(char *str) } early_param("l1tf", l1tf_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "Speculative Return Stack Overflow: " fmt + +enum srso_mitigation { + SRSO_MITIGATION_NONE, + SRSO_MITIGATION_MICROCODE, + SRSO_MITIGATION_SAFE_RET, + SRSO_MITIGATION_IBPB, + SRSO_MITIGATION_IBPB_ON_VMEXIT, +}; + +enum srso_mitigation_cmd { + SRSO_CMD_OFF, + SRSO_CMD_MICROCODE, + SRSO_CMD_SAFE_RET, + SRSO_CMD_IBPB, + SRSO_CMD_IBPB_ON_VMEXIT, +}; + +static const char * const srso_strings[] = { + [SRSO_MITIGATION_NONE] = "Vulnerable", + [SRSO_MITIGATION_MICROCODE] = "Mitigation: microcode", + [SRSO_MITIGATION_SAFE_RET] = "Mitigation: safe RET", + [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB", + [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only" +}; + +static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE; +static enum srso_mitigation_cmd srso_cmd __ro_after_init = SRSO_CMD_SAFE_RET; + +static int __init srso_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + srso_cmd = SRSO_CMD_OFF; + else if (!strcmp(str, "microcode")) + srso_cmd = SRSO_CMD_MICROCODE; + else if (!strcmp(str, "safe-ret")) + srso_cmd = SRSO_CMD_SAFE_RET; + else if (!strcmp(str, "ibpb")) + srso_cmd = SRSO_CMD_IBPB; + else if (!strcmp(str, "ibpb-vmexit")) + srso_cmd = SRSO_CMD_IBPB_ON_VMEXIT; + else + pr_err("Ignoring unknown SRSO option (%s).", str); + + return 0; +} +early_param("spec_rstack_overflow", srso_parse_cmdline); + +#define SRSO_NOTICE "WARNING: See https://kernel.org/doc/html/latest/admin-guide/hw-vuln/srso.html for mitigation options." + +static void __init srso_select_mitigation(void) +{ + bool has_microcode; + + if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) + goto pred_cmd; + + /* + * The first check is for the kernel running as a guest in order + * for guests to verify whether IBPB is a viable mitigation. + */ + has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode(); + if (!has_microcode) { + pr_warn("IBPB-extending microcode not applied!\n"); + pr_warn(SRSO_NOTICE); + } else { + /* + * Enable the synthetic (even if in a real CPUID leaf) + * flags for guests. + */ + setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); + + /* + * Zen1/2 with SMT off aren't vulnerable after the right + * IBPB microcode has been applied. + */ + if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) { + setup_force_cpu_cap(X86_FEATURE_SRSO_NO); + return; + } + } + + if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { + if (has_microcode) { + pr_err("Retbleed IBPB mitigation enabled, using same for SRSO\n"); + srso_mitigation = SRSO_MITIGATION_IBPB; + goto pred_cmd; + } + } + + switch (srso_cmd) { + case SRSO_CMD_OFF: + return; + + case SRSO_CMD_MICROCODE: + if (has_microcode) { + srso_mitigation = SRSO_MITIGATION_MICROCODE; + pr_warn(SRSO_NOTICE); + } + break; + + case SRSO_CMD_SAFE_RET: + if (IS_ENABLED(CONFIG_CPU_SRSO)) { + /* + * Enable the return thunk for generated code + * like ftrace, static_call, etc. + */ + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + + if (boot_cpu_data.x86 == 0x19) { + setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); + x86_return_thunk = srso_alias_return_thunk; + } else { + setup_force_cpu_cap(X86_FEATURE_SRSO); + x86_return_thunk = srso_return_thunk; + } + srso_mitigation = SRSO_MITIGATION_SAFE_RET; + } else { + pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + goto pred_cmd; + } + break; + + case SRSO_CMD_IBPB: + if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + if (has_microcode) { + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + srso_mitigation = SRSO_MITIGATION_IBPB; + } + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + goto pred_cmd; + } + break; + + case SRSO_CMD_IBPB_ON_VMEXIT: + if (IS_ENABLED(CONFIG_CPU_SRSO)) { + if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; + } + } else { + pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + goto pred_cmd; + } + break; + + default: + break; + } + + pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode")); + +pred_cmd: + if ((boot_cpu_has(X86_FEATURE_SRSO_NO) || srso_cmd == SRSO_CMD_OFF) && + boot_cpu_has(X86_FEATURE_SBPB)) + x86_pred_cmd = PRED_CMD_SBPB; +} + #undef pr_fmt #define pr_fmt(fmt) fmt @@ -2470,6 +2649,16 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t srso_show_state(char *buf) +{ + if (boot_cpu_has(X86_FEATURE_SRSO_NO)) + return sysfs_emit(buf, "Mitigation: SMT disabled\n"); + + return sysfs_emit(buf, "%s%s\n", + srso_strings[srso_mitigation], + (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode")); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -2522,6 +2711,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_GDS: return gds_show_state(buf); + case X86_BUG_SRSO: + return srso_show_state(buf); + default: break; } @@ -2591,4 +2783,9 @@ ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *bu { return cpu_show_common(dev, attr, buf, X86_BUG_GDS); } + +ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SRSO); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index abba125765e4d75b1a4dc215fddc2cf264955ecc..9608633f49ff99f27da3916e7b617136e806ac22 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -592,7 +592,7 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) /* Aligned to unsigned long to avoid split lock in atomic bitmap ops */ __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); -__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); +__u32 cpu_caps_set[NCAPINTS + NBUGINTS + NEXTBUGINTS] __aligned(sizeof(unsigned long)); void load_percpu_segment(int cpu) { @@ -855,6 +855,11 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) c->x86_capability[i] &= ~cpu_caps_cleared[i]; c->x86_capability[i] |= cpu_caps_set[i]; } + + for (i = 0; i < NEXTBUGINTS; i++) { + c->x86_ext_capability[i] |= cpu_caps_set[NCAPINTS+NBUGINTS + i]; + } + } static void init_speculation_control(struct cpuinfo_x86 *c) @@ -1128,6 +1133,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define SMT_RSB BIT(4) /* CPU is affected by GDS */ #define GDS BIT(5) +/* CPU is affected by SRSO */ +#define SRSO BIT(6) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1161,8 +1168,9 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), - VULNBL_AMD(0x17, RETBLEED | SMT_RSB), + VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB), + VULNBL_AMD(0x19, SRSO), {} }; @@ -1293,6 +1301,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); + if (!cpu_has(c, X86_FEATURE_SRSO_NO)) { + if (cpu_matches(cpu_vuln_blacklist, SRSO)) + setup_force_cpu_bug(X86_BUG_SRSO); + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index b030882d2f6f390d9202b4da93fb904ca31d2e4c..a07865f0e62233423ee9bad022a6c9537469a595 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -60,7 +60,7 @@ trap 'rm "$OUT"' EXIT dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" "" $2 echo "" - dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32" $2 + dump_array "x86_bug_flags" "(NBUGINTS+NEXTBUGINTS)*32" "X86_BUG_" "NCAPINTS*32" $2 echo "" echo "#ifdef CONFIG_X86_VMX_FEATURE_NAMES" diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 7810c75abdd4e5bd3d6c44b08a2e40622529be08..daa078c9e2089effcc52801e7d081d37579e42bf 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -126,7 +126,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) #endif seq_puts(m, "\nbugs\t\t:"); - for (i = 0; i < 32*NBUGINTS; i++) { + for (i = 0; i < 32*(NBUGINTS+NEXTBUGINTS); i++) { unsigned int bug_bit = 32*NCAPINTS + i; if (cpu_has_bug(c, bug_bit) && x86_bug_flags[i]) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index b8c775d3c7d3eb63e81637e6e4672ff03d295a15..07f147435919c33b6068da5d853d67ce90ed0abd 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -49,6 +49,9 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_VM_PAGE_FLUSH, CPUID_EAX, 2, 0x8000001f, 0 }, { X86_FEATURE_SEV_ES, CPUID_EAX, 3, 0x8000001f, 0 }, { X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 }, + { X86_FEATURE_SBPB, CPUID_EAX, 27, 0x80000021, 0 }, + { X86_FEATURE_IBPB_BRTYPE, CPUID_EAX, 28, 0x80000021, 0 }, + { X86_FEATURE_SRSO_NO, CPUID_EAX, 29, 0x80000021, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a21cd2381fa8fbd5a2899aad80f564544c373b30..3f3e31a457d4dba2549422d75f1f9491f6d17218 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -133,7 +133,20 @@ SECTIONS LOCK_TEXT KPROBES_TEXT ALIGN_ENTRY_TEXT_BEGIN +#ifdef CONFIG_CPU_SRSO + *(.text.__x86.rethunk_untrain) +#endif + ENTRY_TEXT + +#ifdef CONFIG_CPU_SRSO + /* + * See the comment above srso_alias_untrain_ret()'s + * definition. + */ + . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20); + *(.text.__x86.rethunk_safe) +#endif ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT STATIC_CALL_TEXT @@ -142,13 +155,15 @@ SECTIONS #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; - *(.text.__x86.*) + *(.text.__x86.indirect_thunk) + *(.text.__x86.return_thunk) __indirect_thunk_end = .; #endif } :text =0xcccc /* End of text section, which should occupy whole number of pages */ _etext = .; + . = ALIGN(PAGE_SIZE); X86_ALIGN_RODATA_BEGIN @@ -502,6 +517,27 @@ INIT_PER_CPU(irq_stack_backing_store); "fixed_percpu_data is not at start of per-cpu area"); #endif +#ifdef CONFIG_RETHUNK +. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); +. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); +#endif + +#ifdef CONFIG_CPU_SRSO +/* + * GNU ld cannot do XOR until 2.41. + * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1 + * + * LLVM lld cannot do XOR until lld-17. + * https://github.com/llvm/llvm-project/commit/fae96104d4378166cbe5c875ef8ed808a356f3fb + * + * Instead do: (A | B) - (A & B) in order to compute the XOR + * of the two function addresses: + */ +. = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) - + (ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)), + "SRSO function pair won't alias"); +#endif + #endif /* CONFIG_X86_32 */ #ifdef CONFIG_KEXEC_CORE diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index aab185562803f664b1932ea78017e768f4bfb87c..a2590cde1b0a996bca6cb8a62a4cb5e62a3c97e9 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -559,6 +559,13 @@ void kvm_set_cpu_caps(void) !boot_cpu_has(X86_FEATURE_AMD_SSBD)) kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD); + if (cpu_feature_enabled(X86_FEATURE_SBPB)) + kvm_cpu_cap_set(X86_FEATURE_SBPB); + if (cpu_feature_enabled(X86_FEATURE_IBPB_BRTYPE)) + kvm_cpu_cap_set(X86_FEATURE_IBPB_BRTYPE); + if (cpu_feature_enabled(X86_FEATURE_SRSO_NO)) + kvm_cpu_cap_set(X86_FEATURE_SRSO_NO); + /* * Hide all SVM features by default, SVM will set the cap bits for * features it emulates and/or exposes for L1. diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 63d96ec61cd3927936a133c5871ac07e15a440ed..b10e830283e39756d17dff2eaa21ead9681b2973 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -14,6 +14,7 @@ */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, + CPUID_8000_0021_EAX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -25,6 +26,11 @@ enum kvm_only_cpuid_leafs { #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0) #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) +/* AMD-defined SRSO vulnerability features, CPUID level 0x80000021 (EAX), word 20 */ +#define KVM_X86_FEATURE_SBPB KVM_X86_FEATURE(CPUID_8000_0021_EAX, 27) +#define KVM_X86_FEATURE_IBPB_BRTYPE KVM_X86_FEATURE(CPUID_8000_0021_EAX, 28) +#define KVM_X86_FEATURE_SRSO_NO KVM_X86_FEATURE(CPUID_8000_0021_EAX, 29) + extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; void kvm_set_cpu_caps(void); @@ -84,6 +90,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, + [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, }; /* @@ -115,6 +122,13 @@ static __always_inline u32 __feature_translate(int x86_feature) else if (x86_feature == X86_FEATURE_SGX2) return KVM_X86_FEATURE_SGX2; + if (x86_feature == X86_FEATURE_SBPB) + return KVM_X86_FEATURE_SBPB; + else if (x86_feature == X86_FEATURE_IBPB_BRTYPE) + return KVM_X86_FEATURE_IBPB_BRTYPE; + else if (x86_feature == X86_FEATURE_SRSO_NO) + return KVM_X86_FEATURE_SRSO_NO; + return x86_feature; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3ad12d07160ac5d19d7970e5591798be76133f6e..a09cd81026111d6afd0b050fc16cf7ae09cf5ce5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1399,7 +1399,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (sd->current_vmcb != svm->vmcb) { sd->current_vmcb = svm->vmcb; - indirect_branch_prediction_barrier(); + + if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT)) + indirect_branch_prediction_barrier(); } avic_vcpu_load(vcpu, cpu); } diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index c18d812d00cd715110ca609f9bf504298be503cb..a8859c1732580063da73c41c294d538f7df0fd3d 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -137,6 +137,9 @@ SYM_FUNC_START(__svm_vcpu_run) */ UNTRAIN_RET + /* SRSO */ + ALTERNATIVE "", "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT + /* * Clear all general purpose registers except RSP and RAX to prevent * speculative use of the guest's values, even those that are reloaded diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 1221bb099afb4d81f4392c585623df7182cd2de5..8d82da010784846cff563f6952f1ede6f3f66c9a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -9,6 +9,7 @@ #include #include #include +#include .section .text.__x86.indirect_thunk @@ -73,36 +74,87 @@ SYM_CODE_END(__x86_indirect_thunk_array) */ #ifdef CONFIG_RETHUNK +/* + * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at + * special addresses: + * + * - srso_alias_untrain_ret() is 2M aligned + * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14 + * and 20 in its virtual address are set (while those bits in the + * srso_alias_untrain_ret() function are cleared). + * + * This guarantees that those two addresses will alias in the branch + * target buffer of Zen3/4 generations, leading to any potential + * poisoned entries at that BTB slot to get evicted. + * + * As a result, srso_alias_safe_ret() becomes a safe return. + */ +#ifdef CONFIG_CPU_SRSO + .section .text.__x86.rethunk_untrain + +SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + UNWIND_HINT_FUNC + ASM_NOP2 + lfence + jmp srso_alias_return_thunk +SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) + + .section .text.__x86.rethunk_safe +#else +/* dummy definition for alternatives */ +SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(srso_alias_untrain_ret) +#endif + +SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) + lea 8(%_ASM_SP), %_ASM_SP + UNWIND_HINT_FUNC + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(srso_alias_safe_ret) + .section .text.__x86.return_thunk +SYM_CODE_START(srso_alias_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_alias_safe_ret + ud2 +SYM_CODE_END(srso_alias_return_thunk) + /* * Safety details here pertain to the AMD Zen{1,2} microarchitecture: - * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for * alignment within the BTB. - * 2) The instruction at zen_untrain_ret must contain, and not + * 2) The instruction at retbleed_untrain_ret must contain, and not * end with, the 0xc3 byte of the RET. * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread * from re-poisioning the BTB prediction. */ .align 64 - .skip 63, 0xcc -SYM_FUNC_START_NOALIGN(zen_untrain_ret); + .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc +SYM_FUNC_START_NOALIGN(retbleed_untrain_ret); /* - * As executed from zen_untrain_ret, this is: + * As executed from retbleed_untrain_ret, this is: * * TEST $0xcc, %bl * LFENCE - * JMP __x86_return_thunk + * JMP retbleed_return_thunk * * Executing the TEST instruction has a side effect of evicting any BTB * prediction (potentially attacker controlled) attached to the RET, as - * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. */ .byte 0xf6 /* - * As executed from __x86_return_thunk, this is a plain RET. + * As executed from retbleed_return_thunk, this is a plain RET. * * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. * @@ -114,13 +166,13 @@ SYM_FUNC_START_NOALIGN(zen_untrain_ret); * With SMT enabled and STIBP active, a sibling thread cannot poison * RET's prediction to a type of its choice, but can evict the * prediction due to competitive sharing. If the prediction is - * evicted, __x86_return_thunk will suffer Straight Line Speculation + * evicted, retbleed_return_thunk will suffer Straight Line Speculation * which will be contained safely by the INT3. */ -SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) +SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) ret int3 -SYM_CODE_END(__x86_return_thunk) +SYM_CODE_END(retbleed_return_thunk) /* * Ensure the TEST decoding / BTB invalidation is complete. @@ -131,11 +183,66 @@ SYM_CODE_END(__x86_return_thunk) * Jump back and execute the RET in the middle of the TEST instruction. * INT3 is for SLS protection. */ - jmp __x86_return_thunk + jmp retbleed_return_thunk int3 -SYM_FUNC_END(zen_untrain_ret) -__EXPORT_THUNK(zen_untrain_ret) +SYM_FUNC_END(retbleed_untrain_ret) +__EXPORT_THUNK(retbleed_untrain_ret) +/* + * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() + * above. On kernel entry, srso_untrain_ret() is executed which is a + * + * movabs $0xccccc30824648d48,%rax + * + * and when the return thunk executes the inner label srso_safe_ret() + * later, it is a stack manipulation and a RET which is mispredicted and + * thus a "safe" one to use. + */ + .align 64 + .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc +SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + .byte 0x48, 0xb8 + +/* + * This forces the function return instruction to speculate into a trap + * (UD2 in srso_return_thunk() below). This RET will then mispredict + * and execution will continue at the return site read from the top of + * the stack. + */ +SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) + lea 8(%_ASM_SP), %_ASM_SP + ret + int3 + int3 + /* end of movabs */ + lfence + call srso_safe_ret + ud2 +SYM_CODE_END(srso_safe_ret) +SYM_FUNC_END(srso_untrain_ret) +__EXPORT_THUNK(srso_untrain_ret) + +SYM_CODE_START(srso_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_safe_ret + ud2 +SYM_CODE_END(srso_return_thunk) + +SYM_FUNC_START(entry_untrain_ret) + ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ + "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ + "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS +SYM_FUNC_END(entry_untrain_ret) +__EXPORT_THUNK(entry_untrain_ret) + +SYM_CODE_START(__x86_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) #endif /* CONFIG_RETHUNK */ diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 81743cd47dc80ef0f7c2388f329ccf58a22e68ee..7968e169617c70964a617b124623165677ec56c6 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -584,6 +584,12 @@ ssize_t __weak cpu_show_gds(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_spec_rstack_overflow(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -596,6 +602,7 @@ static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); +static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -610,6 +617,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_mmio_stale_data.attr, &dev_attr_retbleed.attr, &dev_attr_gather_data_sampling.attr, + &dev_attr_spec_rstack_overflow.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 224a3acc2b6624b3ad7ac46b42e85c4d3b6ef3a5..e1ea2680b8e449c3daa23432067530d186d62e0a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -72,6 +72,8 @@ extern ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 662f19374bd982b6e732e79ba62c427427920252..d17fa7f4001d79214a98bb2f18652fec0902e72e 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -71,6 +71,23 @@ struct unwind_hint { static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func +/* + * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore + * for the case where a function is intentionally missing frame pointer setup, + * but otherwise needs objtool/ORC coverage when frame pointers are disabled. + */ +#ifdef CONFIG_FRAME_POINTER +#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func) +#else +#define STACK_FRAME_NON_STANDARD_FP(func) +#endif + +#define ANNOTATE_NOENDBR \ + "986: \n\t" \ + ".pushsection .discard.noendbr\n\t" \ + _ASM_PTR " 986b\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -117,6 +134,13 @@ struct unwind_hint { .popsection .endm +.macro ANNOTATE_NOENDBR +.Lhere_\@: + .pushsection .discard.noendbr + .quad .Lhere_\@ + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -126,10 +150,14 @@ struct unwind_hint { #define UNWIND_HINT(sp_reg, sp_offset, type, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) +#define STACK_FRAME_NON_STANDARD_FP(func) +#define ANNOTATE_NOENDBR #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm +.macro ANNOTATE_NOENDBR +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index ec53f52a06a58d9404239eebc1c74e3215b16fb4..7b66d6c2448ee4288a4245c73981c6dfe3b8380e 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ +/* FREE! ( 3*32+17) */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ +/* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ #define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ @@ -211,7 +211,7 @@ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ +/* FREE! ( 7*32+20) */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ @@ -236,7 +236,6 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -299,6 +298,7 @@ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ @@ -343,6 +343,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ @@ -370,6 +371,13 @@ #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ +#define X86_FEATURE_SME (17*32+27) /* AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (17*32+28) /* AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (17*32+29) /* "" VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (17*32+30) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SME_COHERENT (17*32+31) /* "" AMD hardware-enforced cache coherency */ + /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index b2ab83d8bdcd46116f3c411f16fd071597a1e795..2bd1aa8c68f264fe469fa8a7f6d59dac46043dcc 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -516,6 +516,7 @@ #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 662f19374bd982b6e732e79ba62c427427920252..d17fa7f4001d79214a98bb2f18652fec0902e72e 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -71,6 +71,23 @@ struct unwind_hint { static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func +/* + * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore + * for the case where a function is intentionally missing frame pointer setup, + * but otherwise needs objtool/ORC coverage when frame pointers are disabled. + */ +#ifdef CONFIG_FRAME_POINTER +#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func) +#else +#define STACK_FRAME_NON_STANDARD_FP(func) +#endif + +#define ANNOTATE_NOENDBR \ + "986: \n\t" \ + ".pushsection .discard.noendbr\n\t" \ + _ASM_PTR " 986b\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -117,6 +134,13 @@ struct unwind_hint { .popsection .endm +.macro ANNOTATE_NOENDBR +.Lhere_\@: + .pushsection .discard.noendbr + .quad .Lhere_\@ + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -126,10 +150,14 @@ struct unwind_hint { #define UNWIND_HINT(sp_reg, sp_offset, type, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) +#define STACK_FRAME_NON_STANDARD_FP(func) +#define ANNOTATE_NOENDBR #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm +.macro ANNOTATE_NOENDBR +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index 580ce18575857349f42624403cb13de666a9ed88..75840291b3934f919f013e08626b140d5ff79473 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -90,6 +90,7 @@ int arch_decode_hint_reg(u8 sp_reg, int *base); bool arch_is_retpoline(struct symbol *sym); bool arch_is_rethunk(struct symbol *sym); +bool arch_is_embedded_insn(struct symbol *sym); int arch_rewrite_retpolines(struct objtool_file *file); diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 828d902f482b63efb1dce9d81743509e2a72de91..7e464a95ba554ee0b1344d8ea2b30c3550e34a28 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -663,3 +663,9 @@ bool arch_is_rethunk(struct symbol *sym) { return !strcmp(sym->name, "__x86_return_thunk"); } + +bool arch_is_embedded_insn(struct symbol *sym) +{ + return !strcmp(sym->name, "retbleed_return_thunk") || + !strcmp(sym->name, "srso_safe_ret"); +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ea80b29b99134d242a7c21a901fbe43ebb233717..ffd82ee6ee13e9605bfb24b9b6835f4d74edb467 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -196,7 +196,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, return false; insn = find_insn(file, func->sec, func->offset); - if (!insn->func) + if (!insn || !insn->func) return false; func_for_each_insn(file, func, insn) { @@ -930,16 +930,33 @@ static int add_ignore_alternatives(struct objtool_file *file) return 0; } +/* + * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol + * will be added to the .retpoline_sites section. + */ __weak bool arch_is_retpoline(struct symbol *sym) { return false; } +/* + * Symbols that replace INSN_RETURN, every (tail) call to such a symbol + * will be added to the .return_sites section. + */ __weak bool arch_is_rethunk(struct symbol *sym) { return false; } +/* + * Symbols that are embedded inside other instructions, because sometimes crazy + * code exists. These are mostly ignored for validation purposes. + */ +__weak bool arch_is_embedded_insn(struct symbol *sym) +{ + return false; +} + #define NEGATIVE_RELOC ((void *)-1L) static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) @@ -1149,14 +1166,14 @@ static int add_jump_destinations(struct objtool_file *file) continue; /* - * This is a special case for zen_untrain_ret(). + * This is a special case for retbleed_untrain_ret(). * It jumps to __x86_return_thunk(), but objtool * can't find the thunk's starting RET * instruction, because the RET is also in the * middle of another instruction. Objtool only * knows about the outer instruction. */ - if (sym && sym->return_thunk) { + if (sym && sym->embedded_insn) { add_return_call(file, insn, false); continue; } @@ -1955,6 +1972,9 @@ static int classify_symbols(struct objtool_file *file) if (arch_is_rethunk(func)) func->return_thunk = true; + if (arch_is_embedded_insn(func)) + func->embedded_insn = true; + if (!strcmp(func->name, "__fentry__")) func->fentry = true; @@ -2063,12 +2083,17 @@ static int decode_sections(struct objtool_file *file) return 0; } -static bool is_fentry_call(struct instruction *insn) +static bool is_special_call(struct instruction *insn) { - if (insn->type == INSN_CALL && - insn->call_dest && - insn->call_dest->fentry) - return true; + if (insn->type == INSN_CALL) { + struct symbol *dest = insn->call_dest; + + if (!dest) + return false; + + if (dest->fentry) + return true; + } return false; } @@ -2942,7 +2967,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (ret) return ret; - if (!no_fp && func && !is_fentry_call(insn) && + if (!no_fp && func && !is_special_call(insn) && !has_valid_stack_frame(&state)) { WARN_FUNC("call without frame pointer save/setup", sec, insn->offset); diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index a1863eb35fbbcdfe1babceabdd6ab280f61cd9e8..19446d9112443912a22e91a29deb90ffe607458c 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -61,6 +61,7 @@ struct symbol { u8 return_thunk : 1; u8 fentry : 1; u8 kcov : 1; + u8 embedded_insn : 1; }; struct reloc { diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh index 831c02cf0586ceff53486ab9250840a0ec66aea5..27ee1ea1fe941fed1ec29703e220b69ee4fecbda 100755 --- a/tools/perf/trace/beauty/tracepoints/x86_msr.sh +++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh @@ -15,7 +15,7 @@ x86_msr_index=${arch_x86_header_dir}/msr-index.h printf "static const char *x86_MSRs[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*' -egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|AMD64|IA32_TSCDEADLINE|IDT_FCR4)' | \ +egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSCDEADLINE|UCODE_REV)|IDT_FCR4)' | \ sed -r "s/$regex/\2 \1/g" | sort -n | \ xargs printf "\t[%s] = \"%s\",\n" printf "};\n\n"