From 8a9d86dc2e6ddc59fbe9950e7d54eafc8c9ff5ef Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:21:46 -0700 Subject: [PATCH 01/11] KVM: WARN if there are dangling MMU invalidations at VM destruction mainline inclusion from mainline-v6.8-rc1 commit d497a0fab8b8457214fcc9b1a39530920ea7e95e category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/d497a0fab8b8457214fcc9b1a39530920ea7e95e -------------------------------- commit d497a0fab8b8457214fcc9b1a39530920ea7e95e upstream Add an assertion that there are no in-progress MMU invalidations when a VM is being destroyed, with the exception of the scenario where KVM unregisters its MMU notifier between an .invalidate_range_start() call and the corresponding .invalidate_range_end(). KVM can't detect unpaired calls from the mmu_notifier due to the above exception waiver, but the assertion can detect KVM bugs, e.g. such as the bug that *almost* escaped initial guest_memfd development. Link: https://lore.kernel.org/all/e397d30c-c6af-e68f-d18e-b4e3739c5389@linux.intel.com Signed-off-by: Sean Christopherson Reviewed-by: Paolo Bonzini Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Message-Id: <20231027182217.3615211-5-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- virt/kvm/kvm_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9e351bce483e..20be172ed356 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1364,9 +1364,16 @@ static void kvm_destroy_vm(struct kvm *kvm) * No threads can be waiting in kvm_swap_active_memslots() as the * last reference on KVM has been dropped, but freeing * memslots would deadlock without this manual intervention. + * + * If the count isn't unbalanced, i.e. KVM did NOT unregister its MMU + * notifier between a start() and end(), then there shouldn't be any + * in-progress invalidations. */ WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait)); - kvm->mn_active_invalidate_count = 0; + if (kvm->mn_active_invalidate_count) + kvm->mn_active_invalidate_count = 0; + else + WARN_ON(kvm->mmu_invalidate_in_progress); #else kvm_flush_shadow_all(kvm); #endif -- Gitee From 74d5130c0123389c7f2c14bf1a8ce6b1e710e8e7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:21:47 -0700 Subject: [PATCH 02/11] KVM: PPC: Drop dead code related to KVM_ARCH_WANT_MMU_NOTIFIER mainline inclusion from mainline-v6.8-rc1 commit 1853d7502a19b34b2cfe4ea0698bee73323fec3a category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/1853d7502a19b34b2cfe4ea0698bee73323fec3a -------------------------------- commit 1853d7502a19b34b2cfe4ea0698bee73323fec3a upstream Assert that both KVM_ARCH_WANT_MMU_NOTIFIER and CONFIG_MMU_NOTIFIER are defined when KVM is enabled, and return '1' unconditionally for the CONFIG_KVM_BOOK3S_HV_POSSIBLE=n path. All flavors of PPC support for KVM select MMU_NOTIFIER, and KVM_ARCH_WANT_MMU_NOTIFIER is unconditionally defined by arch/powerpc/include/asm/kvm_host.h. Effectively dropping use of KVM_ARCH_WANT_MMU_NOTIFIER will simplify a future cleanup to turn KVM_ARCH_WANT_MMU_NOTIFIER into a Kconfig, i.e. will allow combining all of the #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) checks into a single #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER without having to worry about PPC's "bare" usage of KVM_ARCH_WANT_MMU_NOTIFIER. Signed-off-by: Sean Christopherson Reviewed-by: Paolo Bonzini Reviewed-by: Fuad Tabba Message-Id: <20231027182217.3615211-6-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/powerpc/kvm/powerpc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6cef200c2404..9e4507668de1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -632,12 +632,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; #endif case KVM_CAP_SYNC_MMU: +#if !defined(CONFIG_MMU_NOTIFIER) || !defined(KVM_ARCH_WANT_MMU_NOTIFIER) + BUILD_BUG(); +#endif #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE r = hv_enabled; -#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) - r = 1; #else - r = 0; + r = 1; #endif break; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -- Gitee From e723068f55a65341d3dfe52c07e0a822e5a666a5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:21:48 -0700 Subject: [PATCH 03/11] KVM: PPC: Return '1' unconditionally for KVM_CAP_SYNC_MMU mainline inclusion from mainline-v6.8-rc1 commit 4a2e993faad3880962540ab8e3b68f22e48b18e8 category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/4a2e993faad3880962540ab8e3b68f22e48b18e8 -------------------------------- commit 4a2e993faad3880962540ab8e3b68f22e48b18e8 upstream Advertise that KVM's MMU is synchronized with the primary MMU for all flavors of PPC KVM support, i.e. advertise that the MMU is synchronized when CONFIG_KVM_BOOK3S_HV_POSSIBLE=y but the VM is not using hypervisor mode (a.k.a. PR VMs). PR VMs, via kvm_unmap_gfn_range_pr(), do the right thing for mmu_notifier invalidation events, and more tellingly, KVM returns '1' for KVM_CAP_SYNC_MMU when CONFIG_KVM_BOOK3S_HV_POSSIBLE=n and CONFIG_KVM_BOOK3S_PR_POSSIBLE=y, i.e. KVM already advertises a synchronized MMU for PR VMs, just not when CONFIG_KVM_BOOK3S_HV_POSSIBLE=y. Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-7-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/powerpc/kvm/powerpc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9e4507668de1..cb47fd487a12 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -635,11 +635,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #if !defined(CONFIG_MMU_NOTIFIER) || !defined(KVM_ARCH_WANT_MMU_NOTIFIER) BUILD_BUG(); #endif -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - r = hv_enabled; -#else r = 1; -#endif break; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_HTAB_FD: -- Gitee From c1c9d7f481d704659571217993bba093b4f13353 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:21:49 -0700 Subject: [PATCH 04/11] KVM: Convert KVM_ARCH_WANT_MMU_NOTIFIER to CONFIG_KVM_GENERIC_MMU_NOTIFIER mainline inclusion from mainline-v6.8-rc1 commit f128cf8cfbecccf95e891ae90d9c917df5117c7a category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/f128cf8cfbecccf95e891ae90d9c917df5117c7a -------------------------------- commit f128cf8cfbecccf95e891ae90d9c917df5117c7a upstream Convert KVM_ARCH_WANT_MMU_NOTIFIER into a Kconfig and select it where appropriate to effectively maintain existing behavior. Using a proper Kconfig will simplify building more functionality on top of KVM's mmu_notifier infrastructure. Add a forward declaration of kvm_gfn_range to kvm_types.h so that including arch/powerpc/include/asm/kvm_ppc.h's with CONFIG_KVM=n doesn't generate warnings due to kvm_gfn_range being undeclared. PPC defines hooks for PR vs. HV without guarding them via #ifdeffery, e.g. bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range); bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); bool (*set_spte_gfn)(struct kvm *kvm, struct kvm_gfn_range *range); Alternatively, PPC could forward declare kvm_gfn_range, but there's no good reason not to define it in common KVM. Acked-by: Anup Patel Signed-off-by: Sean Christopherson Reviewed-by: Paolo Bonzini Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Message-Id: <20231027182217.3615211-8-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/arm64/include/asm/kvm_host.h | 2 -- arch/arm64/kvm/Kconfig | 2 +- arch/loongarch/include/asm/kvm_host.h | 1 - arch/loongarch/kvm/Kconfig | 2 +- arch/mips/include/asm/kvm_host.h | 2 -- arch/mips/kvm/Kconfig | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 -- arch/powerpc/kvm/Kconfig | 8 ++++---- arch/powerpc/kvm/powerpc.c | 4 +--- arch/riscv/include/asm/kvm_host.h | 2 -- arch/riscv/kvm/Kconfig | 2 +- arch/x86/include/asm/kvm_host.h | 2 -- arch/x86/kvm/Kconfig | 2 +- include/linux/kvm_host.h | 6 +++--- include/linux/kvm_types.h | 1 + virt/kvm/Kconfig | 4 ++++ virt/kvm/kvm_main.c | 10 +++++----- 17 files changed, 23 insertions(+), 31 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c51ae33acc4b..a4b77ff046f8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1033,8 +1033,6 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); -#define KVM_ARCH_WANT_MMU_NOTIFIER - void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 52edbd7f6340..fc88494beed0 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -23,7 +23,7 @@ menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM select KVM_GENERIC_HARDWARE_ENABLING - select MMU_NOTIFIER + select KVM_GENERIC_MMU_NOTIFIER select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 4da029a93c68..274217aba8a3 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -298,7 +298,6 @@ void kvm_flush_tlb_all(void); void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa); int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write); -#define KVM_ARCH_WANT_MMU_NOTIFIER void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig index e899d96f4da6..283871e15e41 100644 --- a/arch/loongarch/kvm/Kconfig +++ b/arch/loongarch/kvm/Kconfig @@ -30,10 +30,10 @@ config KVM select HAVE_KVM_MSI select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_HARDWARE_ENABLING + select KVM_GENERIC_MMU_NOTIFIER select KVM_MMIO select KVM_XFER_TO_GUEST_WORK select SCHED_INFO - select MMU_NOTIFIER select PREEMPT_NOTIFIERS select KVM_VFIO help diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 54a85f1d4f2c..179f320cc231 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -810,8 +810,6 @@ int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn); pgd_t *kvm_pgd_alloc(void); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); -#define KVM_ARCH_WANT_MMU_NOTIFIER - /* Emulation */ enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause); int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index a8cdba75f98d..c04987d2ed2e 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -25,7 +25,7 @@ config KVM select HAVE_KVM_EVENTFD select HAVE_KVM_VCPU_ASYNC_IOCTL select KVM_MMIO - select MMU_NOTIFIER + select KVM_GENERIC_MMU_NOTIFIER select INTERVAL_TREE select KVM_GENERIC_HARDWARE_ENABLING help diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 14ee0dece853..4b5c3f2acf78 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -62,8 +62,6 @@ #include -#define KVM_ARCH_WANT_MMU_NOTIFIER - #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 902611954200..b33358ee6424 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -42,7 +42,7 @@ config KVM_BOOK3S_64_HANDLER config KVM_BOOK3S_PR_POSSIBLE bool select KVM_MMIO - select MMU_NOTIFIER + select KVM_GENERIC_MMU_NOTIFIER config KVM_BOOK3S_HV_POSSIBLE bool @@ -85,7 +85,7 @@ config KVM_BOOK3S_64_HV tristate "KVM for POWER7 and later using hypervisor mode in host" depends on KVM_BOOK3S_64 && PPC_POWERNV select KVM_BOOK3S_HV_POSSIBLE - select MMU_NOTIFIER + select KVM_GENERIC_MMU_NOTIFIER select CMA help Support running unmodified book3s_64 guest kernels in @@ -194,7 +194,7 @@ config KVM_E500V2 depends on !CONTEXT_TRACKING_USER select KVM select KVM_MMIO - select MMU_NOTIFIER + select KVM_GENERIC_MMU_NOTIFIER help Support running unmodified E500 guest kernels in virtual machines on E500v2 host processors. @@ -211,7 +211,7 @@ config KVM_E500MC select KVM select KVM_MMIO select KVM_BOOKE_HV - select MMU_NOTIFIER + select KVM_GENERIC_MMU_NOTIFIER help Support running unmodified E500MC/E5500/E6500 guest kernels in virtual machines on E500MC/E5500/E6500 host processors. diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cb47fd487a12..d867b209cc95 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -632,9 +632,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; #endif case KVM_CAP_SYNC_MMU: -#if !defined(CONFIG_MMU_NOTIFIER) || !defined(KVM_ARCH_WANT_MMU_NOTIFIER) - BUILD_BUG(); -#endif + BUILD_BUG_ON(!IS_ENABLED(CONFIG_KVM_GENERIC_MMU_NOTIFIER)); r = 1; break; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 459e61ad7d2b..efad04b22b6a 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -250,8 +250,6 @@ struct kvm_vcpu_arch { static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} -#define KVM_ARCH_WANT_MMU_NOTIFIER - #define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12 void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index dfc237d7875b..ae2e05f050ec 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -30,7 +30,7 @@ config KVM select KVM_GENERIC_HARDWARE_ENABLING select KVM_MMIO select KVM_XFER_TO_GUEST_WORK - select MMU_NOTIFIER + select KVM_GENERIC_MMU_NOTIFIER select PREEMPT_NOTIFIERS help Support hosting virtualized guest machines. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d27a280daf5c..d254766ed650 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2163,8 +2163,6 @@ enum { # define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0) #endif -#define KVM_ARCH_WANT_MMU_NOTIFIER - int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_cpu_has_extint(struct kvm_vcpu *v); diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index d926a24c9632..c5d5fe069a64 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -24,7 +24,7 @@ config KVM depends on HIGH_RES_TIMERS depends on X86_LOCAL_APIC select PREEMPT_NOTIFIERS - select MMU_NOTIFIER + select KVM_GENERIC_MMU_NOTIFIER select HAVE_KVM_IRQCHIP select HAVE_KVM_PFNCACHE select HAVE_KVM_IRQFD diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a5cebf76aaa5..58dfb410bc7b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -254,7 +254,7 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif -#ifdef KVM_ARCH_WANT_MMU_NOTIFIER +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER union kvm_mmu_notifier_arg { pte_t pte; }; @@ -833,7 +833,7 @@ struct kvm { struct hlist_head irq_ack_notifier_list; #endif -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_invalidate_seq; long mmu_invalidate_in_progress; @@ -2094,7 +2094,7 @@ extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { if (unlikely(kvm->mmu_invalidate_in_progress)) diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 6f4737d5046a..9d1f7835d8c1 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -6,6 +6,7 @@ struct kvm; struct kvm_async_pf; struct kvm_device_ops; +struct kvm_gfn_range; struct kvm_interrupt; struct kvm_irq_routing_table; struct kvm_memory_slot; diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 484d0873061c..ecae2914c97e 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -92,3 +92,7 @@ config HAVE_KVM_PM_NOTIFIER config KVM_GENERIC_HARDWARE_ENABLING bool + +config KVM_GENERIC_MMU_NOTIFIER + select MMU_NOTIFIER + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 20be172ed356..362779da35e5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -548,7 +548,7 @@ void kvm_destroy_vcpus(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_destroy_vcpus); -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) { return container_of(mn, struct kvm, mmu_notifier); @@ -952,14 +952,14 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) return mmu_notifier_register(&kvm->mmu_notifier, current->mm); } -#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ +#else /* !CONFIG_KVM_GENERIC_MMU_NOTIFIER */ static int kvm_init_mmu_notifier(struct kvm *kvm) { return 0; } -#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ +#endif /* CONFIG_KVM_GENERIC_MMU_NOTIFIER */ #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER static int kvm_pm_notifier_call(struct notifier_block *bl, @@ -1279,7 +1279,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) out_err_no_debugfs: kvm_coalesced_mmio_free(kvm); out_no_coalesced_mmio: -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER if (kvm->mmu_notifier.ops) mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); #endif @@ -1355,7 +1355,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm->buses[i] = NULL; } kvm_coalesced_mmio_free(kvm); -#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +#ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); /* * At this point, pending calls to invalidate_range_start() -- Gitee From a60178ead8f02e1a1d39b225a49fd24e3f1bf38b Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Tue, 15 Aug 2023 00:08:37 +0200 Subject: [PATCH 05/11] KVM: Correct kvm_vcpu_event(s) typo in KVM API documentation mainline inclusion from mainline-v6.7-rc1 commit 57f33f1a8756c980acb335852e802e656be8aec0 category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/57f33f1a8756c980acb335852e802e656be8aec0 -------------------------------- commit 57f33f1a8756c980acb335852e802e656be8aec0 upstream Set KVM_GET_VCPU_EVENTS and KVM_SET_VCPU_EVENTS parameter type to `struct kvm_vcpu_events`. Events, plural. Opportunistically fix few other typos. Signed-off-by: Michal Luczaj Link: https://lore.kernel.org/r/20230814222358.707877-4-mhal@rbox.co Signed-off-by: Sean Christopherson Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- Documentation/virt/kvm/api.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index c40b6a09ffd8..b3328eb03ce2 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -559,7 +559,7 @@ ioctl is useful if the in-kernel PIC is not used. PPC: ^^^^ -Queues an external interrupt to be injected. This ioctl is overleaded +Queues an external interrupt to be injected. This ioctl is overloaded with 3 different irq values: a) KVM_INTERRUPT_SET @@ -1010,7 +1010,7 @@ be set in the flags field of this ioctl: The KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag requests KVM to generate the contents of the hypercall page automatically; hypercalls will be intercepted and passed to userspace through KVM_EXIT_XEN. In this -ase, all of the blob size and address fields must be zero. +case, all of the blob size and address fields must be zero. The KVM_XEN_HVM_CONFIG_EVTCHN_SEND flag indicates to KVM that userspace will always use the KVM_XEN_HVM_EVTCHN_SEND ioctl to deliver event @@ -1115,7 +1115,7 @@ Other flags returned by ``KVM_GET_CLOCK`` are accepted but ignored. :Extended by: KVM_CAP_INTR_SHADOW :Architectures: x86, arm64 :Type: vcpu ioctl -:Parameters: struct kvm_vcpu_event (out) +:Parameters: struct kvm_vcpu_events (out) :Returns: 0 on success, -1 on error X86: @@ -1238,7 +1238,7 @@ directly to the virtual CPU). :Extended by: KVM_CAP_INTR_SHADOW :Architectures: x86, arm64 :Type: vcpu ioctl -:Parameters: struct kvm_vcpu_event (in) +:Parameters: struct kvm_vcpu_events (in) :Returns: 0 on success, -1 on error X86: @@ -3135,7 +3135,7 @@ as follow:: }; An entry with a "page_shift" of 0 is unused. Because the array is -organized in increasing order, a lookup can stop when encoutering +organized in increasing order, a lookup can stop when encountering such an entry. The "slb_enc" field provides the encoding to use in the SLB for the @@ -3527,7 +3527,7 @@ Possible features: - KVM_RUN and KVM_GET_REG_LIST are not available; - KVM_GET_ONE_REG and KVM_SET_ONE_REG cannot be used to access - the scalable archietctural SVE registers + the scalable architectural SVE registers KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() or KVM_REG_ARM64_SVE_FFR; @@ -4478,7 +4478,7 @@ This will have undefined effects on the guest if it has not already placed itself in a quiescent state where no vcpu will make MMU enabled memory accesses. -On succsful completion, the pending HPT will become the guest's active +On successful completion, the pending HPT will become the guest's active HPT and the previous HPT will be discarded. On failure, the guest will still be operating on its previous HPT. @@ -5094,7 +5094,7 @@ before the vcpu is fully usable. Between KVM_ARM_VCPU_INIT and KVM_ARM_VCPU_FINALIZE, the feature may be configured by use of ioctls such as KVM_SET_ONE_REG. The exact configuration -that should be performaned and how to do it are feature-dependent. +that should be performed and how to do it are feature-dependent. Other calls that depend on a particular feature being finalized, such as KVM_RUN, KVM_GET_REG_LIST, KVM_GET_ONE_REG and KVM_SET_ONE_REG, will fail with @@ -5553,7 +5553,7 @@ KVM_XEN_ATTR_TYPE_EVTCHN from the guest. A given sending port number may be directed back to a specified vCPU (by APIC ID) / port / priority on the guest, or to trigger events on an eventfd. The vCPU and priority can be changed - by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call, but but other + by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call, but other fields cannot change for a given sending port. A port mapping is removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags field. Passing KVM_XEN_EVTCHN_RESET in the flags field removes all interception of -- Gitee From 59b602dc1cb019d1107cd2c6e7084e3b5d59ff25 Mon Sep 17 00:00:00 2001 From: Jinrong Liang Date: Wed, 31 May 2023 15:50:52 +0800 Subject: [PATCH 06/11] KVM: x86/pmu: Add documentation for fixed ctr on PMU filter mainline inclusion from mainline-v6.7-rc1 commit b35babd3abea081de0611ce0d5b85281c18c52c7 category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/b35babd3abea081de0611ce0d5b85281c18c52c7 -------------------------------- commit b35babd3abea081de0611ce0d5b85281c18c52c7 upstream Update the documentation for the KVM_SET_PMU_EVENT_FILTER ioctl to include a detailed description of how fixed performance events are handled in the pmu filter. The action and fixed_counter_bitmap members of the pmu filter to determine whether fixed performance events can be programmed by the guest. This information is helpful for correctly configuring the fixed_counter_bitmap and action fields to filter fixed performance events. Suggested-by: Like Xu Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202304150850.rx4UDDsB-lkp@intel.com Signed-off-by: Jinrong Liang Link: https://lore.kernel.org/r/20230531075052.43239-1-cloudliang@tencent.com Signed-off-by: Sean Christopherson Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- Documentation/virt/kvm/api.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index b3328eb03ce2..47f983ac9862 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5202,6 +5202,24 @@ Valid values for 'action':: #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 +Via this API, KVM userspace can also control the behavior of the VM's fixed +counters (if any) by configuring the "action" and "fixed_counter_bitmap" fields. + +Specifically, KVM follows the following pseudo-code when determining whether to +allow the guest FixCtr[i] to count its pre-defined fixed event:: + + FixCtr[i]_is_allowed = (action == ALLOW) && (bitmap & BIT(i)) || + (action == DENY) && !(bitmap & BIT(i)); + FixCtr[i]_is_denied = !FixCtr[i]_is_allowed; + +KVM always consumes fixed_counter_bitmap, it's userspace's responsibility to +ensure fixed_counter_bitmap is set correctly, e.g. if userspace wants to define +a filter that only affects general purpose counters. + +Note, the "events" field also applies to fixed counters' hardcoded event_select +and unit_mask values. "fixed_counter_bitmap" has higher priority than "events" +if there is a contradiction between the two. + 4.121 KVM_PPC_SVM_OFF --------------------- -- Gitee From 100fed42b59a9756334fb1dd451b1fc29a062898 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 3 Oct 2023 23:03:57 +0000 Subject: [PATCH 07/11] KVM: arm64: Allow userspace to get the writable masks for feature ID registers mainline inclusion from mainline-v6.7-rc1 commit 3f9cd0ca848413fd368278310d2cdd6c2bef48b2 category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/3f9cd0ca848413fd368278310d2cdd6c2bef48b2 -------------------------------- commit 3f9cd0ca848413fd368278310d2cdd6c2bef48b2 upstream While the Feature ID range is well defined and pretty large, it isn't inconceivable that the architecture will eventually grow some other ranges that will need to similarly be described to userspace. Add a VM ioctl to allow userspace to get writable masks for feature ID registers in below system register space: op0 = 3, op1 = {0, 1, 3}, CRn = 0, CRm = {0 - 7}, op2 = {0 - 7} This is used to support mix-and-match userspace and kernels for writable ID registers, where userspace may want to know upfront whether it can actually tweak the contents of an idreg or not. Add a new capability (KVM_CAP_ARM_SUPPORTED_FEATURE_ID_RANGES) that returns a bitmap of the valid ranges, which can subsequently be retrieved, one at a time by setting the index of the set bit as the range identifier. Suggested-by: Marc Zyngier Suggested-by: Cornelia Huck Signed-off-by: Jing Zhang Reviewed-by: Cornelia Huck Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20231003230408.3405722-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- arch/arm64/include/asm/kvm_host.h | 2 + arch/arm64/include/uapi/asm/kvm.h | 32 +++++++++++++++ arch/arm64/kvm/arm.c | 10 +++++ arch/arm64/kvm/sys_regs.c | 66 +++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 2 + 5 files changed, 112 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a4b77ff046f8..0a4a4bf8d939 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1241,6 +1241,8 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, struct kvm_arm_copy_mte_tags *copy_tags); int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, struct kvm_arm_counter_offset *offset); +int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, + struct reg_mask_range *range); /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 47eea5041899..871b751732f5 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -562,6 +562,38 @@ struct kvm_smccc_filter { #define KVM_HYPERCALL_EXIT_SMC (1U << 0) #define KVM_HYPERCALL_EXIT_16BIT (1U << 1) +/* + * Get feature ID registers userspace writable mask. + * + * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model + * Feature Register 2"): + * + * "The Feature ID space is defined as the System register space in + * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, + * op2=={0-7}." + * + * This covers all currently known R/O registers that indicate + * anything useful feature wise, including the ID registers. + * + * If we ever need to introduce a new range, it will be described as + * such in the range field. + */ +#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2) \ + ({ \ + __u64 __op1 = (op1) & 3; \ + __op1 -= (__op1 == 3); \ + (__op1 << 6 | ((crm) & 7) << 3 | (op2)); \ + }) + +#define KVM_ARM_FEATURE_ID_RANGE 0 +#define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8) + +struct reg_mask_range { + __u64 addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; +}; + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d7511196e607..3be3c6a3ed96 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -526,6 +526,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES: r = kvm_supported_block_sizes(); break; + case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES: + r = BIT(0); + break; #ifdef CONFIG_VIRT_PLAT_DEV case KVM_CAP_ARM_VIRT_MSI_BYPASS: r = sdev_enable; @@ -2066,6 +2069,13 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return kvm_vm_set_attr(kvm, &attr); } + case KVM_ARM_GET_REG_WRITABLE_MASKS: { + struct reg_mask_range range; + + if (copy_from_user(&range, argp, sizeof(range))) + return -EFAULT; + return kvm_vm_ioctl_get_reg_writable_masks(kvm, &range); + } #ifdef CONFIG_VIRT_PLAT_DEV case KVM_CREATE_SHADOW_DEV: { struct kvm_master_dev_info *mdi; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 240c8caf0f0a..33ceb0bcf7de 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1455,6 +1455,13 @@ static inline bool is_id_reg(u32 id) sys_reg_CRm(id) < 8); } +static inline bool is_aa32_id_reg(u32 id) +{ + return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 && + sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 && + sys_reg_CRm(id) <= 3); +} + static unsigned int id_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { @@ -3816,6 +3823,65 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(vcpu, uindices); } +#define KVM_ARM_FEATURE_ID_RANGE_INDEX(r) \ + KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(r), \ + sys_reg_Op1(r), \ + sys_reg_CRn(r), \ + sys_reg_CRm(r), \ + sys_reg_Op2(r)) + +static bool is_feature_id_reg(u32 encoding) +{ + return (sys_reg_Op0(encoding) == 3 && + (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) && + sys_reg_CRn(encoding) == 0 && + sys_reg_CRm(encoding) <= 7); +} + +int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range) +{ + const void *zero_page = page_to_virt(ZERO_PAGE(0)); + u64 __user *masks = (u64 __user *)range->addr; + + /* Only feature id range is supported, reserved[13] must be zero. */ + if (range->range || + memcmp(range->reserved, zero_page, sizeof(range->reserved))) + return -EINVAL; + + /* Wipe the whole thing first */ + if (clear_user(masks, KVM_ARM_FEATURE_ID_RANGE_SIZE * sizeof(__u64))) + return -EFAULT; + + for (int i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) { + const struct sys_reg_desc *reg = &sys_reg_descs[i]; + u32 encoding = reg_to_encoding(reg); + u64 val; + + if (!is_feature_id_reg(encoding) || !reg->set_user) + continue; + + /* + * For ID registers, we return the writable mask. Other feature + * registers return a full 64bit mask. That's not necessary + * compliant with a given revision of the architecture, but the + * RES0/RES1 definitions allow us to do that. + */ + if (is_id_reg(encoding)) { + if (!reg->val || + (is_aa32_id_reg(encoding) && !kvm_supports_32bit_el0())) + continue; + val = reg->val; + } else { + val = ~0UL; + } + + if (put_user(val, (masks + KVM_ARM_FEATURE_ID_RANGE_INDEX(encoding)))) + return -EFAULT; + } + + return 0; +} + int __init kvm_sys_reg_table_init(void) { struct sys_reg_params params; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 51da8fe01061..f3abdeac3dd2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1218,6 +1218,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #define KVM_CAP_ARM_RME 300 +#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 #define KVM_CAP_SEV_ES_GHCB 500 #define KVM_CAP_HYGON_COCO_EXT 501 @@ -1652,6 +1653,7 @@ struct kvm_s390_ucas_mapping { #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* Available with KVM_CAP_COUNTER_OFFSET */ #define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) +#define KVM_ARM_GET_REG_WRITABLE_MASKS _IOR(KVMIO, 0xb6, struct reg_mask_range) /* ioctl for SW vcpu init*/ #define KVM_SW64_VCPU_INIT _IO(KVMIO, 0xba) -- Gitee From 5041e8a98127e803886f9b93493bda5e2568305e Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 3 Oct 2023 23:03:58 +0000 Subject: [PATCH 08/11] KVM: arm64: Document KVM_ARM_GET_REG_WRITABLE_MASKS mainline inclusion from mainline-v6.7-rc1 commit 6656cda0f3b269cb87cfb5773d3369e6d96e83db category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/6656cda0f3b269cb87cfb5773d3369e6d96e83db -------------------------------- commit 6656cda0f3b269cb87cfb5773d3369e6d96e83db upstream Add some basic documentation on how to get feature ID register writable masks from userspace. Signed-off-by: Jing Zhang Reviewed-by: Cornelia Huck Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20231003230408.3405722-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- Documentation/virt/kvm/api.rst | 48 ++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 47f983ac9862..df30407bef62 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6166,6 +6166,54 @@ writes to the CNTVCT_EL0 and CNTPCT_EL0 registers using the SET_ONE_REG interface. No error will be returned, but the resulting offset will not be applied. +4.139 KVM_ARM_GET_REG_WRITABLE_MASKS +------------------------------------------- + +:Capability: KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES +:Architectures: arm64 +:Type: vm ioctl +:Parameters: struct reg_mask_range (in/out) +:Returns: 0 on success, < 0 on error + + +:: + + #define KVM_ARM_FEATURE_ID_RANGE 0 + #define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8) + + struct reg_mask_range { + __u64 addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; + }; + +This ioctl copies the writable masks for a selected range of registers to +userspace. + +The ``addr`` field is a pointer to the destination array where KVM copies +the writable masks. + +The ``range`` field indicates the requested range of registers. +``KVM_CHECK_EXTENSION`` for the ``KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES`` +capability returns the supported ranges, expressed as a set of flags. Each +flag's bit index represents a possible value for the ``range`` field. +All other values are reserved for future use and KVM may return an error. + +The ``reserved[13]`` array is reserved for future use and should be 0, or +KVM may return an error. + +KVM_ARM_FEATURE_ID_RANGE (0) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Feature ID range is defined as the AArch64 System register space with +op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, op2=={0-7}. + +The mask returned array pointed to by ``addr`` is indexed by the macro +``ARM64_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2)``, allowing userspace +to know what fields can be changed for the system register described by +``op0, op1, crn, crm, op2``. KVM rejects ID register values that describe a +superset of the features supported by the system. + 5. The kvm_run structure ======================== -- Gitee From 272169ca694939e6da13a4048d79a40fcd197805 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 3 Oct 2023 23:04:06 +0000 Subject: [PATCH 09/11] KVM: arm64: Document vCPU feature selection UAPIs mainline inclusion from mainline-v6.7-rc1 commit dafa493dd01d5992f1cb70b08d1741c3ab99e04a category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/dafa493dd01d5992f1cb70b08d1741c3ab99e04a -------------------------------- commit dafa493dd01d5992f1cb70b08d1741c3ab99e04a upstream KVM/arm64 has a couple schemes for handling vCPU feature selection now, which is a lot to put on userspace. Add some documentation about how these interact and provide some recommendations for how to use the writable ID register scheme. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20231003230408.3405722-11-oliver.upton@linux.dev Signed-off-by: Oliver Upton Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- Documentation/virt/kvm/api.rst | 4 ++ Documentation/virt/kvm/arm/index.rst | 1 + Documentation/virt/kvm/arm/vcpu-features.rst | 48 ++++++++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 Documentation/virt/kvm/arm/vcpu-features.rst diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index df30407bef62..08b66c2356a8 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3442,6 +3442,8 @@ return indicates the attribute is implemented. It does not necessarily indicate that the attribute can be read or written in the device's current state. "addr" is ignored. +.. _KVM_ARM_VCPU_INIT: + 4.82 KVM_ARM_VCPU_INIT ---------------------- @@ -6166,6 +6168,8 @@ writes to the CNTVCT_EL0 and CNTPCT_EL0 registers using the SET_ONE_REG interface. No error will be returned, but the resulting offset will not be applied. +.. _KVM_ARM_GET_REG_WRITABLE_MASKS: + 4.139 KVM_ARM_GET_REG_WRITABLE_MASKS ------------------------------------------- diff --git a/Documentation/virt/kvm/arm/index.rst b/Documentation/virt/kvm/arm/index.rst index e84848432158..7f231c724e16 100644 --- a/Documentation/virt/kvm/arm/index.rst +++ b/Documentation/virt/kvm/arm/index.rst @@ -11,3 +11,4 @@ ARM hypercalls pvtime ptp_kvm + vcpu-features diff --git a/Documentation/virt/kvm/arm/vcpu-features.rst b/Documentation/virt/kvm/arm/vcpu-features.rst new file mode 100644 index 000000000000..f7cc6d8d8b74 --- /dev/null +++ b/Documentation/virt/kvm/arm/vcpu-features.rst @@ -0,0 +1,48 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================== +vCPU feature selection on arm64 +=============================== + +KVM/arm64 provides two mechanisms that allow userspace to configure +the CPU features presented to the guest. + +KVM_ARM_VCPU_INIT +================= + +The ``KVM_ARM_VCPU_INIT`` ioctl accepts a bitmap of feature flags +(``struct kvm_vcpu_init::features``). Features enabled by this interface are +*opt-in* and may change/extend UAPI. See :ref:`KVM_ARM_VCPU_INIT` for complete +documentation of the features controlled by the ioctl. + +Otherwise, all CPU features supported by KVM are described by the architected +ID registers. + +The ID Registers +================ + +The Arm architecture specifies a range of *ID Registers* that describe the set +of architectural features supported by the CPU implementation. KVM initializes +the guest's ID registers to the maximum set of CPU features supported by the +system. The ID register values may be VM-scoped in KVM, meaning that the +values could be shared for all vCPUs in a VM. + +KVM allows userspace to *opt-out* of certain CPU features described by the ID +registers by writing values to them via the ``KVM_SET_ONE_REG`` ioctl. The ID +registers are mutable until the VM has started, i.e. userspace has called +``KVM_RUN`` on at least one vCPU in the VM. Userspace can discover what fields +are mutable in the ID registers using the ``KVM_ARM_GET_REG_WRITABLE_MASKS``. +See the :ref:`ioctl documentation ` for more +details. + +Userspace is allowed to *limit* or *mask* CPU features according to the rules +outlined by the architecture in DDI0487J.a D19.1.3 'Principles of the ID +scheme for fields in ID register'. KVM does not allow ID register values that +exceed the capabilities of the system. + +.. warning:: + It is **strongly recommended** that userspace modify the ID register values + before accessing the rest of the vCPU's CPU register state. KVM may use the + ID register values to control feature emulation. Interleaving ID register + modification with other system register accesses may lead to unpredictable + behavior. -- Gitee From 12d7b2a3d3c58e0e075b608be6ca5a1b015ad337 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:21:50 -0700 Subject: [PATCH 10/11] KVM: Introduce KVM_SET_USER_MEMORY_REGION2 mainline inclusion from mainline-v6.8-rc1 commit bb58b90b1a8f753b582055adaf448214a8e22c31 category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/bb58b90b1a8f753b582055adaf448214a8e22c31 -------------------------------- commit bb58b90b1a8f753b582055adaf448214a8e22c31 upstream Introduce a "version 2" of KVM_SET_USER_MEMORY_REGION so that additional information can be supplied without setting userspace up to fail. The padding in the new kvm_userspace_memory_region2 structure will be used to pass a file descriptor in addition to the userspace_addr, i.e. allow userspace to point at a file descriptor and map memory into a guest that is NOT mapped into host userspace. Alternatively, KVM could simply add "struct kvm_userspace_memory_region2" without a new ioctl(), but as Paolo pointed out, adding a new ioctl() makes detection of bad flags a bit more robust, e.g. if the new fd field is guarded only by a flag and not a new ioctl(), then a userspace bug (setting a "bad" flag) would generate out-of-bounds access instead of an -EINVAL error. [Backport Changes] 1. In file include/uapi/linux/kvm.h updated the definition of non-upstream ioctl KVM_LOAD_USER_DATA with unused command number 0x43 instead of 0x49 in current source code, to avoid conflict with the upstream definition of KVM_SET_USER_MEMORY_REGION2. The command number 0x49 is allocated in upstream kernel and QEMU for KVM_SET_USER_MEMORY_REGION2 and was added as part sev-snp support, which is used extensively in latest QEMU releases. Therefore, changing this number causes deviation from the upstream which is risky, as it may introduce compatibility issues across KVM and QEMU interactions. So, this change ensures compatibility with upstream Kernel/QEMU interfaces and prevents potential issues in future backports. Cc: Jarkko Sakkinen Reviewed-by: Paolo Bonzini Reviewed-by: Xiaoyao Li Signed-off-by: Sean Christopherson Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Message-Id: <20231027182217.3615211-9-seanjc@google.com> Acked-by: Kai Huang Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- Documentation/virt/kvm/api.rst | 22 +++++++++++++ arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 4 +-- include/uapi/linux/kvm.h | 15 ++++++++- virt/kvm/kvm_main.c | 57 +++++++++++++++++++++++++++++----- 5 files changed, 88 insertions(+), 12 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 08b66c2356a8..0d6254674bb5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6218,6 +6218,28 @@ to know what fields can be changed for the system register described by ``op0, op1, crn, crm, op2``. KVM rejects ID register values that describe a superset of the features supported by the system. +4.140 KVM_SET_USER_MEMORY_REGION2 +--------------------------------- + +:Capability: KVM_CAP_USER_MEMORY2 +:Architectures: all +:Type: vm ioctl +:Parameters: struct kvm_userspace_memory_region2 (in) +:Returns: 0 on success, -1 on error + +:: + + struct kvm_userspace_memory_region2 { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; /* start of the userspace allocated memory */ + __u64 pad[16]; + }; + +See KVM_SET_USER_MEMORY_REGION. + 5. The kvm_run structure ======================== diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 914153ae6e5f..ddc9dbcb7a8b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12650,7 +12650,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, } for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - struct kvm_userspace_memory_region m; + struct kvm_userspace_memory_region2 m; m.slot = id | (i << 16); m.flags = 0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 58dfb410bc7b..de0ec5918120 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1230,9 +1230,9 @@ enum kvm_mr_change { }; int kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region2 *mem); int __kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region2 *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f3abdeac3dd2..1544ac384781 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -95,6 +95,16 @@ struct kvm_userspace_memory_region { __u64 userspace_addr; /* start of the userspace allocated memory */ }; +/* for KVM_SET_USER_MEMORY_REGION2 */ +struct kvm_userspace_memory_region2 { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; + __u64 userspace_addr; + __u64 pad[16]; +}; + /* * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for * userspace, other bits are reserved for kvm internal use which are defined @@ -1219,6 +1229,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #define KVM_CAP_ARM_RME 300 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 +#define KVM_CAP_USER_MEMORY2 231 #define KVM_CAP_SEV_ES_GHCB 500 #define KVM_CAP_HYGON_COCO_EXT 501 @@ -1545,8 +1556,10 @@ struct kvm_numa_info { struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \ + struct kvm_userspace_memory_region2) -#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x49, struct kvm_user_data) +#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x43, struct kvm_user_data) #define KVM_CAP_ARM_TMM 300 /* FIXME: Large number to prevent conflicts */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 362779da35e5..6b195c721479 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1586,7 +1586,15 @@ static void kvm_replace_memslot(struct kvm *kvm, } } -static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem) +/* + * Flags that do not access any of the extra space of struct + * kvm_userspace_memory_region2. KVM_SET_USER_MEMORY_REGION_V1_FLAGS + * only allows these. + */ +#define KVM_SET_USER_MEMORY_REGION_V1_FLAGS \ + (KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_READONLY) + +static int check_memory_region_flags(const struct kvm_userspace_memory_region2 *mem) { u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; @@ -1988,7 +1996,7 @@ static bool kvm_check_memslot_overlap(struct kvm_memslots *slots, int id, * Must be called holding kvm->slots_lock for write. */ int __kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region2 *mem) { struct kvm_memory_slot *old, *new; struct kvm_memslots *slots; @@ -2092,7 +2100,7 @@ int __kvm_set_memory_region(struct kvm *kvm, EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region2 *mem) { int r; @@ -2104,7 +2112,7 @@ int kvm_set_memory_region(struct kvm *kvm, EXPORT_SYMBOL_GPL(kvm_set_memory_region); static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region2 *mem) { if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; @@ -4592,6 +4600,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { case KVM_CAP_USER_MEMORY: + case KVM_CAP_USER_MEMORY2: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: case KVM_CAP_INTERNAL_ERROR_DATA: @@ -4847,6 +4856,14 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm) return fd; } +#define SANITY_CHECK_MEM_REGION_FIELD(field) \ +do { \ + BUILD_BUG_ON(offsetof(struct kvm_userspace_memory_region, field) != \ + offsetof(struct kvm_userspace_memory_region2, field)); \ + BUILD_BUG_ON(sizeof_field(struct kvm_userspace_memory_region, field) != \ + sizeof_field(struct kvm_userspace_memory_region2, field)); \ +} while (0) + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4869,15 +4886,39 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_enable_cap_generic(kvm, &cap); break; } + case KVM_SET_USER_MEMORY_REGION2: case KVM_SET_USER_MEMORY_REGION: { - struct kvm_userspace_memory_region kvm_userspace_mem; + struct kvm_userspace_memory_region2 mem; + unsigned long size; + + if (ioctl == KVM_SET_USER_MEMORY_REGION) { + /* + * Fields beyond struct kvm_userspace_memory_region shouldn't be + * accessed, but avoid leaking kernel memory in case of a bug. + */ + memset(&mem, 0, sizeof(mem)); + size = sizeof(struct kvm_userspace_memory_region); + } else { + size = sizeof(struct kvm_userspace_memory_region2); + } + + /* Ensure the common parts of the two structs are identical. */ + SANITY_CHECK_MEM_REGION_FIELD(slot); + SANITY_CHECK_MEM_REGION_FIELD(flags); + SANITY_CHECK_MEM_REGION_FIELD(guest_phys_addr); + SANITY_CHECK_MEM_REGION_FIELD(memory_size); + SANITY_CHECK_MEM_REGION_FIELD(userspace_addr); r = -EFAULT; - if (copy_from_user(&kvm_userspace_mem, argp, - sizeof(kvm_userspace_mem))) + if (copy_from_user(&mem, argp, size)) + goto out; + + r = -EINVAL; + if (ioctl == KVM_SET_USER_MEMORY_REGION && + (mem.flags & ~KVM_SET_USER_MEMORY_REGION_V1_FLAGS)) goto out; - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); + r = kvm_vm_ioctl_set_memory_region(kvm, &mem); break; } case KVM_GET_DIRTY_LOG: { -- Gitee From ebe9ac5d9577023fc6fe3fddd0a12e5603582f2c Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Fri, 27 Oct 2023 11:21:51 -0700 Subject: [PATCH 11/11] KVM: Add KVM_EXIT_MEMORY_FAULT exit to report faults to userspace mainline inclusion from mainline-v6.8-rc1 commit 16f95f3b95caded251a0440051e44a2fbe9e5f55 category: Feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICQGHL CVE: NA Reference: https://github.com/torvalds/linux/commit/16f95f3b95caded251a0440051e44a2fbe9e5f55 -------------------------------- commit 16f95f3b95caded251a0440051e44a2fbe9e5f55 upstream Add a new KVM exit type to allow userspace to handle memory faults that KVM cannot resolve, but that userspace *may* be able to handle (without terminating the guest). KVM will initially use KVM_EXIT_MEMORY_FAULT to report implicit conversions between private and shared memory. With guest private memory, there will be two kind of memory conversions: - explicit conversion: happens when the guest explicitly calls into KVM to map a range (as private or shared) - implicit conversion: happens when the guest attempts to access a gfn that is configured in the "wrong" state (private vs. shared) On x86 (first architecture to support guest private memory), explicit conversions will be reported via KVM_EXIT_HYPERCALL+KVM_HC_MAP_GPA_RANGE, but reporting KVM_EXIT_HYPERCALL for implicit conversions is undesriable as there is (obviously) no hypercall, and there is no guarantee that the guest actually intends to convert between private and shared, i.e. what KVM thinks is an implicit conversion "request" could actually be the result of a guest code bug. KVM_EXIT_MEMORY_FAULT will be used to report memory faults that appear to be implicit conversions. Note! To allow for future possibilities where KVM reports KVM_EXIT_MEMORY_FAULT and fills run->memory_fault on _any_ unresolved fault, KVM returns "-EFAULT" (-1 with errno == EFAULT from userspace's perspective), not '0'! Due to historical baggage within KVM, exiting to userspace with '0' from deep callstacks, e.g. in emulation paths, is infeasible as doing so would require a near-complete overhaul of KVM, whereas KVM already propagates -errno return codes to userspace even when the -errno originated in a low level helper. Report the gpa+size instead of a single gfn even though the initial usage is expected to always report single pages. It's entirely possible, likely even, that KVM will someday support sub-page granularity faults, e.g. Intel's sub-page protection feature allows for additional protections at 128-byte granularity. Link: https://lore.kernel.org/all/20230908222905.1321305-5-amoorthy@google.com Link: https://lore.kernel.org/all/ZQ3AmLO2SYv3DszH@google.com [backport changes] Modified include/uapi/linux/kvm.h using KABI_EXTEND() to maintain KABI compatibility. Cc: Anish Moorthy Cc: David Matlack Suggested-by: Sean Christopherson Co-developed-by: Yu Zhang Signed-off-by: Yu Zhang Signed-off-by: Chao Peng Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Reviewed-by: Paolo Bonzini Message-Id: <20231027182217.3615211-10-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: Xiaoyao Li Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- Documentation/virt/kvm/api.rst | 41 ++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + include/linux/kvm_host.h | 11 +++++++++ include/uapi/linux/kvm.h | 8 +++++++ 4 files changed, 61 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 0d6254674bb5..21489daa0157 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6896,6 +6896,26 @@ array field represents return values. The userspace should update the return values of SBI call before resuming the VCPU. For more details on RISC-V SBI spec refer, https://github.com/riscv/riscv-sbi-doc. +:: + + /* KVM_EXIT_MEMORY_FAULT */ + struct { + __u64 flags; + __u64 gpa; + __u64 size; + } memory_fault; + +KVM_EXIT_MEMORY_FAULT indicates the vCPU has encountered a memory fault that +could not be resolved by KVM. The 'gpa' and 'size' (in bytes) describe the +guest physical address range [gpa, gpa + size) of the fault. The 'flags' field +describes properties of the faulting access that are likely pertinent. +Currently, no flags are defined. + +Note! KVM_EXIT_MEMORY_FAULT is unique among all KVM exit reasons in that it +accompanies a return code of '-1', not '0'! errno will always be set to EFAULT +or EHWPOISON when KVM exits with KVM_EXIT_MEMORY_FAULT, userspace should assume +kvm_run.exit_reason is stale/undefined for all other error numbers. + :: /* KVM_EXIT_NOTIFY */ @@ -7930,6 +7950,27 @@ This capability is aimed to mitigate the threat that malicious VMs can cause CPU stuck (due to event windows don't open up) and make the CPU unavailable to host or other VMs. +7.34 KVM_CAP_MEMORY_FAULT_INFO +------------------------------ + +:Architectures: x86 +:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP. + +The presence of this capability indicates that KVM_RUN will fill +kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if +there is a valid memslot but no backing VMA for the corresponding host virtual +address. + +The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns +an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set +to KVM_EXIT_MEMORY_FAULT. + +Note: Userspaces which attempt to resolve memory faults so that they can retry +KVM_RUN are encouraged to guard against repeatedly receiving the same +error/annotated fault. + +See KVM_EXIT_MEMORY_FAULT for more information. + 7.38 KVM_CAP_ARM_RME -------------------- diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ddc9dbcb7a8b..2791a62037c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4602,6 +4602,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: case KVM_CAP_IRQFD_RESAMPLE: + case KVM_CAP_MEMORY_FAULT_INFO: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index de0ec5918120..2e937a719dec 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2470,4 +2470,15 @@ static inline void kvm_account_pgtable_pages(void *virt, int nr) /* Max number of entries allowed for each kvm dirty ring */ #define KVM_DIRTY_RING_MAX_ENTRIES 65536 +static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, + gpa_t gpa, gpa_t size) +{ + vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; + vcpu->run->memory_fault.gpa = gpa; + vcpu->run->memory_fault.size = size; + + /* Flags are not (yet) defined or communicated to userspace. */ + vcpu->run->memory_fault.flags = 0; +} + #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1544ac384781..750f13888f9e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -275,6 +275,7 @@ struct kvm_xen_exit { #define KVM_EXIT_RISCV_CSR 36 #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 +#define KVM_EXIT_MEMORY_FAULT 39 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -528,6 +529,12 @@ struct kvm_run { #define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) __u32 flags; } notify; + /* KVM_EXIT_MEMORY_FAULT */ + KABI_EXTEND(struct { + __u64 flags; + __u64 gpa; + __u64 size; + } memory_fault;) /* Fix the size of the union. */ char padding[256]; }; @@ -1230,6 +1237,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_RME 300 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 #define KVM_CAP_USER_MEMORY2 231 +#define KVM_CAP_MEMORY_FAULT_INFO 232 #define KVM_CAP_SEV_ES_GHCB 500 #define KVM_CAP_HYGON_COCO_EXT 501 -- Gitee