From 1fa19722c59fa8b4dcd3eaff9f68d9955f1954c9 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 28 Feb 2025 01:44:14 +0000 Subject: [PATCH 1/3] x86/paravirt: Move halt paravirt calls under CONFIG_PARAVIRT stable inclusion from stable-v6.6.88 commit 8d25a8e275bb0b74fe84367e83f23bf8fc0de5ce category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IC8J7I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-6.6.y&id=8d25a8e275bb0b74fe84367e83f23bf8fc0de5ce -------------------------------- commit 22cc5ca5de52bbfc36a7d4a55323f91fb4492264 upstream. CONFIG_PARAVIRT_XXL is mainly defined/used by XEN PV guests. For other VM guest types, features supported under CONFIG_PARAVIRT are self sufficient. CONFIG_PARAVIRT mainly provides support for TLB flush operations and time related operations. For TDX guest as well, paravirt calls under CONFIG_PARVIRT meets most of its requirement except the need of HLT and SAFE_HLT paravirt calls, which is currently defined under CONFIG_PARAVIRT_XXL. Since enabling CONFIG_PARAVIRT_XXL is too bloated for TDX guest like platforms, move HLT and SAFE_HLT paravirt calls under CONFIG_PARAVIRT. Moving HLT and SAFE_HLT paravirt calls are not fatal and should not break any functionality for current users of CONFIG_PARAVIRT. Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") Co-developed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Kirill A. Shutemov Signed-off-by: Vishal Annapurve Signed-off-by: Ingo Molnar Reviewed-by: Andi Kleen Reviewed-by: Tony Luck Reviewed-by: Juergen Gross Tested-by: Ryan Afranji Cc: Andy Lutomirski Cc: Brian Gerst Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: stable@kernel.org Link: https://lore.kernel.org/r/20250228014416.3925664-2-vannapurve@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jason Zeng --- arch/x86/include/asm/irqflags.h | 40 +++++++++++++++------------ arch/x86/include/asm/paravirt.h | 20 +++++++------- arch/x86/include/asm/paravirt_types.h | 3 +- arch/x86/kernel/paravirt.c | 14 ++++++---- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8c5ae649d2df..9acfe2bcf1fd 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -56,6 +56,28 @@ static __always_inline void native_halt(void) #endif +#ifndef CONFIG_PARAVIRT +#ifndef __ASSEMBLY__ +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static __always_inline void arch_safe_halt(void) +{ + native_safe_halt(); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static __always_inline void halt(void) +{ + native_halt(); +} +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + #ifdef CONFIG_PARAVIRT_XXL #include #else @@ -77,24 +99,6 @@ static __always_inline void arch_local_irq_enable(void) native_irq_enable(); } -/* - * Used in the idle loop; sti takes one instruction cycle - * to complete: - */ -static __always_inline void arch_safe_halt(void) -{ - native_safe_halt(); -} - -/* - * Used when interrupts are already enabled or to - * shutdown the processor: - */ -static __always_inline void halt(void) -{ - native_halt(); -} - /* * For spinlocks, etc: */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 6c8ff12140ae..d8537e30cee1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -103,6 +103,16 @@ static inline void notify_page_enc_status_changed(unsigned long pfn, PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc); } +static __always_inline void arch_safe_halt(void) +{ + PVOP_VCALL0(irq.safe_halt); +} + +static inline void halt(void) +{ + PVOP_VCALL0(irq.halt); +} + #ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { @@ -168,16 +178,6 @@ static inline void __write_cr4(unsigned long x) PVOP_VCALL1(cpu.write_cr4, x); } -static __always_inline void arch_safe_halt(void) -{ - PVOP_VCALL0(irq.safe_halt); -} - -static inline void halt(void) -{ - PVOP_VCALL0(irq.halt); -} - extern noinstr void pv_native_wbinvd(void); static __always_inline void wbinvd(void) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 772d03487520..4149df559aae 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -130,10 +130,9 @@ struct pv_irq_ops { struct paravirt_callee_save save_fl; struct paravirt_callee_save irq_disable; struct paravirt_callee_save irq_enable; - +#endif void (*safe_halt)(void); void (*halt)(void); -#endif } __no_randomize_layout; struct pv_mmu_ops { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8d51c86caa41..234851fe0ef8 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -142,6 +142,11 @@ int paravirt_disable_iospace(void) return request_resource(&ioport_resource, &reserve_ioports); } +static noinstr void pv_native_safe_halt(void) +{ + native_safe_halt(); +} + #ifdef CONFIG_PARAVIRT_XXL static noinstr void pv_native_write_cr2(unsigned long val) { @@ -162,11 +167,6 @@ noinstr void pv_native_wbinvd(void) { native_wbinvd(); } - -static noinstr void pv_native_safe_halt(void) -{ - native_safe_halt(); -} #endif struct pv_info pv_info = { @@ -224,9 +224,11 @@ struct paravirt_patch_template pv_ops = { .irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), +#endif /* CONFIG_PARAVIRT_XXL */ + + /* Irq HLT ops. */ .irq.safe_halt = pv_native_safe_halt, .irq.halt = native_halt, -#endif /* CONFIG_PARAVIRT_XXL */ /* Mmu ops. */ .mmu.flush_tlb_user = native_flush_tlb_local, -- Gitee From e69c02114459574092647fb7bb6e5edc4f9a7c92 Mon Sep 17 00:00:00 2001 From: Vishal Annapurve Date: Fri, 28 Feb 2025 01:44:15 +0000 Subject: [PATCH 2/3] x86/tdx: Fix arch_safe_halt() execution for TDX VMs stable inclusion from stable-v6.6.88 commit 29f040d4efb08497165a695f40d7049bd35f846b category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IC8J7I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=29f040d4efb08497165a695f40d7049bd35f846b -------------------------------- commit 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 upstream. Direct HLT instruction execution causes #VEs for TDX VMs which is routed to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE handler will enable interrupts before TDCALL is routed to hypervisor leading to missed wakeup events, as current TDX spec doesn't expose interruptibility state information to allow #VE handler to selectively enable interrupts. Commit bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") prevented the idle routines from executing HLT instruction in STI-shadow. But it missed the paravirt routine which can be reached via this path as an example: kvm_wait() => safe_halt() => raw_safe_halt() => arch_safe_halt() => irq.safe_halt() => pv_native_safe_halt() To reliably handle arch_safe_halt() for TDX VMs, introduce explicit dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt() routines with TDX-safe versions that execute direct TDCALL and needed interrupt flag updates. Executing direct TDCALL brings in additional benefit of avoiding HLT related #VEs altogether. As tested by Ryan Afranji: "Tested with the specjbb2015 benchmark. It has heavy lock contention which leads to many halt calls. TDX VMs suffered a poor score before this patchset. Verified the major performance improvement with this patchset applied." Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests") Signed-off-by: Vishal Annapurve Signed-off-by: Ingo Molnar Reviewed-by: Kirill A. Shutemov Tested-by: Ryan Afranji Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jason Zeng --- arch/x86/Kconfig | 1 + arch/x86/coco/tdx/tdx.c | 26 +++++++++++++++++++++++++- arch/x86/include/asm/tdx.h | 4 ++-- arch/x86/kernel/process.c | 2 +- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7efe93e4e771..d42298b67adb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -889,6 +889,7 @@ config INTEL_TDX_GUEST depends on X86_64 && CPU_SUP_INTEL depends on X86_X2APIC depends on EFI_STUB + depends on PARAVIRT select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT select X86_MCE diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 2f67e196a2ea..98d0ee9600eb 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -334,7 +335,7 @@ static int handle_halt(struct ve_info *ve) return ve_instr_len(ve); } -void __cpuidle tdx_safe_halt(void) +void __cpuidle tdx_halt(void) { const bool irq_disabled = false; @@ -345,6 +346,16 @@ void __cpuidle tdx_safe_halt(void) WARN_ONCE(1, "HLT instruction emulation failed\n"); } +static void __cpuidle tdx_safe_halt(void) +{ + tdx_halt(); + /* + * "__cpuidle" section doesn't support instrumentation, so stick + * with raw_* variant that avoids tracing hooks. + */ + raw_local_irq_enable(); +} + static int read_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_hypercall_args args = { @@ -888,6 +899,19 @@ void __init tdx_early_init(void) x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; + /* + * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that + * will enable interrupts before HLT TDCALL invocation if executed + * in STI-shadow, possibly resulting in missed wakeup events. + * + * Modify all possible HLT execution paths to use TDX specific routines + * that directly execute TDCALL and toggle the interrupt state as + * needed after TDCALL completion. This also reduces HLT related #VEs + * in addition to having a reliable halt logic execution. + */ + pv_ops.irq.safe_halt = tdx_safe_halt; + pv_ops.irq.halt = tdx_halt; + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 603e6d1e9d4a..c632f09f0c97 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -46,7 +46,7 @@ void tdx_get_ve_info(struct ve_info *ve); bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); -void tdx_safe_halt(void); +void tdx_halt(void); bool tdx_early_handle_ve(struct pt_regs *regs); @@ -55,7 +55,7 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport); #else static inline void tdx_early_init(void) { }; -static inline void tdx_safe_halt(void) { }; +static inline void tdx_halt(void) { }; static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5351f293f770..64128a501446 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -950,7 +950,7 @@ void select_idle_routine(const struct cpuinfo_x86 *c) static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - static_call_update(x86_idle, tdx_safe_halt); + static_call_update(x86_idle, tdx_halt); } else static_call_update(x86_idle, default_idle); } -- Gitee From 9f9bf6bdb75bd445e1d91936741ae2f1c1765f2a Mon Sep 17 00:00:00 2001 From: Jason Zeng Date: Mon, 30 Jun 2025 16:18:18 +0800 Subject: [PATCH 3/3] Fix kabi for TDX halt/safe-halt Intel inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IC8J7I CVE: NA ---------------------------------------- Since default openEuler config doesn't turn on CONFIG_PARAVIRT_XXL, struct pv_irq_ops is actually an empty structure. So we can move it to the end of struct paravirt_patch_template without changing offset of any fields, thus makes no change to kabi. Meanwhile, after moving to end, when we open the 2 fields (halt/safe_halt), they don't impact offsets of any fields in struct paravirt_patch_template either. Thus overall this change is kabi safe. Fixes: 1fa19722c59f ("x86/paravirt: Move halt paravirt calls under CONFIG_PARAVIRT") Signed-off-by: Jason Zeng --- arch/x86/include/asm/paravirt_types.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 4149df559aae..99208ed1dd64 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_PARAVIRT_TYPES_H #define _ASM_X86_PARAVIRT_TYPES_H +#include + #ifndef __ASSEMBLY__ /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch_site { @@ -118,6 +120,24 @@ struct pv_cpu_ops { #endif } __no_randomize_layout; +#if defined(__GENKSYMS__) +struct pv_irq_ops { +#ifdef CONFIG_PARAVIRT_XXL + /* + * Get/set interrupt state. save_fl is expected to use X86_EFLAGS_IF; + * all other bits returned from save_fl are undefined. + * + * NOTE: These functions callers expect the callee to preserve + * more registers than the standard C calling convention. + */ + struct paravirt_callee_save save_fl; + struct paravirt_callee_save irq_disable; + struct paravirt_callee_save irq_enable; + void (*safe_halt)(void); + void (*halt)(void); +#endif +} __no_randomize_layout; +#else struct pv_irq_ops { #ifdef CONFIG_PARAVIRT_XXL /* @@ -134,6 +154,7 @@ struct pv_irq_ops { void (*safe_halt)(void); void (*halt)(void); } __no_randomize_layout; +#endif struct pv_mmu_ops { /* TLB operations */ @@ -241,9 +262,10 @@ struct pv_lock_ops { * what to patch. */ struct paravirt_patch_template { struct pv_cpu_ops cpu; - struct pv_irq_ops irq; + KABI_BROKEN_REMOVE(struct pv_irq_ops irq) struct pv_mmu_ops mmu; struct pv_lock_ops lock; + KABI_EXTEND(struct pv_irq_ops irq) } __no_randomize_layout; extern struct pv_info pv_info; -- Gitee