diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7efe93e4e771285b4967eaf8a9a7b683da314ef0..d42298b67adb19f16c0eed2f2afe366e7d8f3171 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -889,6 +889,7 @@ config INTEL_TDX_GUEST depends on X86_64 && CPU_SUP_INTEL depends on X86_X2APIC depends on EFI_STUB + depends on PARAVIRT select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT select X86_MCE diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 2f67e196a2ead2ab334d3c2a72055bc94c20eeb9..98d0ee9600eb54fe29fae068182f3f297a3c44a9 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -334,7 +335,7 @@ static int handle_halt(struct ve_info *ve) return ve_instr_len(ve); } -void __cpuidle tdx_safe_halt(void) +void __cpuidle tdx_halt(void) { const bool irq_disabled = false; @@ -345,6 +346,16 @@ void __cpuidle tdx_safe_halt(void) WARN_ONCE(1, "HLT instruction emulation failed\n"); } +static void __cpuidle tdx_safe_halt(void) +{ + tdx_halt(); + /* + * "__cpuidle" section doesn't support instrumentation, so stick + * with raw_* variant that avoids tracing hooks. + */ + raw_local_irq_enable(); +} + static int read_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_hypercall_args args = { @@ -888,6 +899,19 @@ void __init tdx_early_init(void) x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; + /* + * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that + * will enable interrupts before HLT TDCALL invocation if executed + * in STI-shadow, possibly resulting in missed wakeup events. + * + * Modify all possible HLT execution paths to use TDX specific routines + * that directly execute TDCALL and toggle the interrupt state as + * needed after TDCALL completion. This also reduces HLT related #VEs + * in addition to having a reliable halt logic execution. + */ + pv_ops.irq.safe_halt = tdx_safe_halt; + pv_ops.irq.halt = tdx_halt; + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8c5ae649d2df82999815998c5ba92ba044dfbea9..9acfe2bcf1fd5bbfb6f24e3a606200e9593b4155 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -56,6 +56,28 @@ static __always_inline void native_halt(void) #endif +#ifndef CONFIG_PARAVIRT +#ifndef __ASSEMBLY__ +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static __always_inline void arch_safe_halt(void) +{ + native_safe_halt(); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static __always_inline void halt(void) +{ + native_halt(); +} +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + #ifdef CONFIG_PARAVIRT_XXL #include #else @@ -77,24 +99,6 @@ static __always_inline void arch_local_irq_enable(void) native_irq_enable(); } -/* - * Used in the idle loop; sti takes one instruction cycle - * to complete: - */ -static __always_inline void arch_safe_halt(void) -{ - native_safe_halt(); -} - -/* - * Used when interrupts are already enabled or to - * shutdown the processor: - */ -static __always_inline void halt(void) -{ - native_halt(); -} - /* * For spinlocks, etc: */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 6c8ff12140aea9a860ccd229c1bf74acc7948b07..d8537e30cee192b2eac6a4d5a84143e0047d01a4 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -103,6 +103,16 @@ static inline void notify_page_enc_status_changed(unsigned long pfn, PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc); } +static __always_inline void arch_safe_halt(void) +{ + PVOP_VCALL0(irq.safe_halt); +} + +static inline void halt(void) +{ + PVOP_VCALL0(irq.halt); +} + #ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { @@ -168,16 +178,6 @@ static inline void __write_cr4(unsigned long x) PVOP_VCALL1(cpu.write_cr4, x); } -static __always_inline void arch_safe_halt(void) -{ - PVOP_VCALL0(irq.safe_halt); -} - -static inline void halt(void) -{ - PVOP_VCALL0(irq.halt); -} - extern noinstr void pv_native_wbinvd(void); static __always_inline void wbinvd(void) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 772d03487520e595ba458ae9032f8a0a2dc00101..99208ed1dd64979a45ff370459d37fdee93e2fbe 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_PARAVIRT_TYPES_H #define _ASM_X86_PARAVIRT_TYPES_H +#include + #ifndef __ASSEMBLY__ /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch_site { @@ -118,6 +120,7 @@ struct pv_cpu_ops { #endif } __no_randomize_layout; +#if defined(__GENKSYMS__) struct pv_irq_ops { #ifdef CONFIG_PARAVIRT_XXL /* @@ -130,11 +133,28 @@ struct pv_irq_ops { struct paravirt_callee_save save_fl; struct paravirt_callee_save irq_disable; struct paravirt_callee_save irq_enable; - void (*safe_halt)(void); void (*halt)(void); #endif } __no_randomize_layout; +#else +struct pv_irq_ops { +#ifdef CONFIG_PARAVIRT_XXL + /* + * Get/set interrupt state. save_fl is expected to use X86_EFLAGS_IF; + * all other bits returned from save_fl are undefined. + * + * NOTE: These functions callers expect the callee to preserve + * more registers than the standard C calling convention. + */ + struct paravirt_callee_save save_fl; + struct paravirt_callee_save irq_disable; + struct paravirt_callee_save irq_enable; +#endif + void (*safe_halt)(void); + void (*halt)(void); +} __no_randomize_layout; +#endif struct pv_mmu_ops { /* TLB operations */ @@ -242,9 +262,10 @@ struct pv_lock_ops { * what to patch. */ struct paravirt_patch_template { struct pv_cpu_ops cpu; - struct pv_irq_ops irq; + KABI_BROKEN_REMOVE(struct pv_irq_ops irq) struct pv_mmu_ops mmu; struct pv_lock_ops lock; + KABI_EXTEND(struct pv_irq_ops irq) } __no_randomize_layout; extern struct pv_info pv_info; diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 603e6d1e9d4aac6e0e145f8832ce2a52483a6910..c632f09f0c9721ccb63007b431d22e5d3d576e79 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -46,7 +46,7 @@ void tdx_get_ve_info(struct ve_info *ve); bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); -void tdx_safe_halt(void); +void tdx_halt(void); bool tdx_early_handle_ve(struct pt_regs *regs); @@ -55,7 +55,7 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport); #else static inline void tdx_early_init(void) { }; -static inline void tdx_safe_halt(void) { }; +static inline void tdx_halt(void) { }; static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8d51c86caa415fb76eaf87452fe3a684575de4d9..234851fe0ef8e7e499f4a33bff9e245fc04d092f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -142,6 +142,11 @@ int paravirt_disable_iospace(void) return request_resource(&ioport_resource, &reserve_ioports); } +static noinstr void pv_native_safe_halt(void) +{ + native_safe_halt(); +} + #ifdef CONFIG_PARAVIRT_XXL static noinstr void pv_native_write_cr2(unsigned long val) { @@ -162,11 +167,6 @@ noinstr void pv_native_wbinvd(void) { native_wbinvd(); } - -static noinstr void pv_native_safe_halt(void) -{ - native_safe_halt(); -} #endif struct pv_info pv_info = { @@ -224,9 +224,11 @@ struct paravirt_patch_template pv_ops = { .irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), +#endif /* CONFIG_PARAVIRT_XXL */ + + /* Irq HLT ops. */ .irq.safe_halt = pv_native_safe_halt, .irq.halt = native_halt, -#endif /* CONFIG_PARAVIRT_XXL */ /* Mmu ops. */ .mmu.flush_tlb_user = native_flush_tlb_local, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5351f293f770b5e9e8e6a0ab0cb0399c62a98893..64128a5014467c6b59b791944947a476cd0486a9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -950,7 +950,7 @@ void select_idle_routine(const struct cpuinfo_x86 *c) static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - static_call_update(x86_idle, tdx_safe_halt); + static_call_update(x86_idle, tdx_halt); } else static_call_update(x86_idle, default_idle); }