From f7bcc01a45044ada584065282afbbf120b35f124 Mon Sep 17 00:00:00 2001 From: yxk Date: Mon, 24 Feb 2025 01:19:17 -0500 Subject: [PATCH 01/52] Revert "pmu: enable pmu phys irq inject for cvm" virtcca inclusion category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- This reverts commit 1182be89002c268a781bd90b25f18da76cfea732. Signed-off-by: yxk Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/arm.c | 25 ------------------------- arch/arm64/kvm/pmu-emul.c | 9 --------- drivers/perf/arm_pmu.c | 17 ----------------- include/linux/perf/arm_pmu.h | 2 +- 4 files changed, 1 insertion(+), 52 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5bd31fc0e4469..e390ef68e9a9b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -41,8 +41,6 @@ #include #include #include -#include -#include #include #include @@ -1148,18 +1146,6 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) xfer_to_guest_mode_work_pending(); } -#ifdef CONFIG_HISI_VIRTCCA_HOST -static inline void update_pmu_phys_irq(struct kvm_vcpu *vcpu, bool *pmu_stopped) -{ - struct kvm_pmu *pmu = &vcpu->arch.pmu; - - if (pmu->irq_level) { - *pmu_stopped = true; - arm_pmu_set_phys_irq(false); - } -} -#endif - /* * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while * the vCPU is running. @@ -1212,9 +1198,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; run->flags = 0; while (ret > 0) { -#ifdef CONFIG_HISI_VIRTCCA_HOST - bool pmu_stopped = false; -#endif /* * Check conditions before entering the guest */ @@ -1242,10 +1225,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid); kvm_pmu_flush_hwstate(vcpu); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - update_pmu_phys_irq(vcpu, &pmu_stopped); -#endif local_irq_disable(); @@ -1356,10 +1335,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) #endif preempt_enable(); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (pmu_stopped) - arm_pmu_set_phys_irq(true); -#endif /* * The ARMv8 architecture doesn't give the hypervisor * a mechanism to prevent a guest from dropping to AArch32 EL0 diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 6469a7c51df3f..20e7b97376cb7 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -15,7 +15,6 @@ #include #include #include -#include #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) @@ -341,14 +340,6 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) { - struct tmi_tec_run *run = vcpu->arch.tec.tec_run; - - reg = run->tec_exit.pmu_ovf_status; - return reg; - } -#endif if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 57cd2d1a9a18e..227cf80895bea 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -770,23 +770,6 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -#ifdef CONFIG_HISI_VIRTCCA_HOST -void arm_pmu_set_phys_irq(bool enable) -{ - int cpu = get_cpu(); - struct arm_pmu *pmu = per_cpu(cpu_armpmu, cpu); - int irq; - - irq = armpmu_get_cpu_irq(pmu, cpu); - if (irq && !enable) - per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); - else if (irq && enable) - per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); - - put_cpu(); -} -#endif - #ifdef CONFIG_CPU_PM static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) { diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index ac84689cc11cf..0bbb5094b87b6 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -221,7 +221,7 @@ void armpmu_free_irq(int irq, int cpu); #endif /* CONFIG_ARM_PMU */ -#if defined(CONFIG_ARM_PMU) && defined(CONFIG_HISI_VIRTCCA_HOST) +#if defined(CONFIG_ARM_PMU) void arm_pmu_set_phys_irq(bool enable); #else #define arm_pmu_set_phys_irq(x) do {} while (0) -- Gitee From da2ad8a95b550da42ead653cc29badd4f0dda99b Mon Sep 17 00:00:00 2001 From: Yiwei Zhuang Date: Thu, 3 Apr 2025 16:22:42 +0800 Subject: [PATCH 02/52] Revert "VirtCCA: cvm support UEFI boot" virtcca inclusion category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- This partially reverts commit 67773feaf933058b9e64a9089345b3c64f823acd. Signed-off-by: yxk Signed-off-by: Yiwei Zhuang --- include/uapi/linux/kvm.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 938613c255537..47e88bcfb135a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1533,15 +1533,9 @@ struct kvm_numa_info { struct kvm_user_data { __u64 loader_start; - /* - * When the lowest bit of dtb_info is 0, the value of dtb_info represents the size of the - * DTB, and data_start and data_size represent the address base and size of the MMIO. - * When the lowest bit of dtb_info is 1, data_start and data_size represent the address base - * and size of the DTB. - */ - __u64 dtb_info; - __u64 data_start; - __u64 data_size; + __u64 image_end; + __u64 initrd_start; + __u64 dtb_end; __u64 ram_size; struct kvm_numa_info numa_info; }; -- Gitee From c08313e51b50160ce8f489ed33e760df5f6d9a1d Mon Sep 17 00:00:00 2001 From: yxk Date: Thu, 27 Mar 2025 04:30:47 -0400 Subject: [PATCH 03/52] Revert "kvm: add virtcca cvm host feature" virtcca inclusion category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- This partially reverts commit 67e11ee6347c43a97e8987b03a5b3534cd8095d9. Signed-off-by: yxk Signed-off-by: Yiwei Zhuang --- arch/arm64/configs/openeuler_defconfig | 3 +- arch/arm64/include/asm/kvm_emulate.h | 18 -- arch/arm64/include/uapi/asm/kvm.h | 4 - arch/arm64/kvm/Makefile | 3 - arch/arm64/kvm/arm.c | 108 +----------- arch/arm64/kvm/guest.c | 5 - arch/arm64/kvm/mmio.c | 14 +- arch/arm64/kvm/mmu.c | 10 -- arch/arm64/kvm/psci.c | 10 +- arch/arm64/kvm/reset.c | 11 -- arch/arm64/kvm/vgic/vgic-v3.c | 14 +- arch/arm64/kvm/vgic/vgic.c | 52 +----- arch/arm64/kvm/virtcca_cvm_exit.c | 221 ------------------------- include/linux/kvm_host.h | 22 --- include/uapi/linux/kvm.h | 13 -- 15 files changed, 15 insertions(+), 493 deletions(-) delete mode 100644 arch/arm64/kvm/virtcca_cvm_exit.c diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 7481b12939e62..8705b478ab7db 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -769,8 +769,7 @@ CONFIG_KVM_GENERIC_HARDWARE_ENABLING=y CONFIG_KVM_HISI_VIRT=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=y -CONFIG_HISI_VIRTCCA_HOST=y -CONFIG_HISI_VIRTCCA_CODA=y +CONFIG_CVM_HOST=y # CONFIG_NVHE_EL2_DEBUG is not set CONFIG_KVM_ARM_MULTI_LPI_TRANSLATE_CACHE=y CONFIG_ARCH_VCPU_STAT=y diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index f0b10cb2c87dd..3aa1c7f56f655 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -642,22 +642,4 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu) kvm_write_cptr_el2(val); } - -#ifdef CONFIG_HISI_VIRTCCA_HOST -static inline bool kvm_is_virtcca_cvm(struct kvm *kvm) -{ - if (static_branch_unlikely(&virtcca_cvm_is_available)) - return kvm->arch.is_virtcca_cvm; - return false; -} - -static inline enum virtcca_cvm_state virtcca_cvm_state(struct kvm *kvm) -{ - struct virtcca_cvm *virtcca_cvm = kvm->arch.virtcca_cvm; - - if (!virtcca_cvm) - return 0; - return READ_ONCE(virtcca_cvm->state); -} -#endif #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 97941e582d83d..f7ddd73a8c0fa 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -110,7 +110,6 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ -#define KVM_ARM_VCPU_TEC 8 /* VCPU TEC state as part of cvm */ struct kvm_vcpu_init { __u32 target; @@ -416,9 +415,6 @@ enum { #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 #define KVM_DEV_ARM_ITS_CTRL_RESET 4 -#define KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256 0 -#define KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512 1 - /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index eadf41417ffa6..952eee572e234 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -24,9 +24,6 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o pvsched.o \ kvm-$(CONFIG_VIRT_PLAT_DEV) += vgic/shadow_dev.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o -kvm-$(CONFIG_HISI_VIRTCCA_HOST) += tmi.o -kvm-$(CONFIG_HISI_VIRTCCA_HOST) += virtcca_cvm.o -kvm-$(CONFIG_HISI_VIRTCCA_HOST) += virtcca_cvm_exit.o obj-$(CONFIG_KVM_HISI_VIRT) += hisilicon/ always-y := hyp_constants.h hyp-constants.s diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e390ef68e9a9b..c7df79fb40823 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include @@ -243,13 +242,6 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } mutex_unlock(&kvm->slots_lock); break; -#ifdef CONFIG_HISI_VIRTCCA_HOST - case KVM_CAP_ARM_TMM: - r = 0; - if (static_branch_unlikely(&virtcca_cvm_is_available)) - r = kvm_cvm_enable_cap(kvm, cap); - break; -#endif #ifdef CONFIG_ARM64_HDBSS case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: r = kvm_cap_arm_enable_hdbss(kvm, cap); @@ -276,14 +268,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret; -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (kvm_arm_cvm_type(type)) { - ret = kvm_enable_virtcca_cvm(kvm); - if (ret) - return ret; - } -#endif - ret = kvm_sched_affinity_vm_init(kvm); if (ret) return ret; @@ -330,20 +314,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) bitmap_zero(kvm->arch.vcpu_features, KVM_VCPU_MAX_FEATURES); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (kvm_arm_cvm_type(type)) { - ret = kvm_init_cvm_vm(kvm); - if (ret) - goto out_free_stage2_pgd; - } -#endif - return 0; -#ifdef CONFIG_HISI_VIRTCCA_HOST -out_free_stage2_pgd: - kvm_free_stage2_pgd(&kvm->arch.mmu); -#endif err_free_cpumask: free_cpumask_var(kvm->arch.supported_cpus); err_unshare_kvm: @@ -378,10 +350,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_unshare_hyp(kvm, kvm + 1); kvm_arm_teardown_hypercalls(kvm); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (kvm_is_virtcca_cvm(kvm)) - kvm_destroy_cvm(kvm); -#endif } extern struct static_key_false ipiv_enable; @@ -455,12 +423,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = system_supports_mte(); break; case KVM_CAP_STEAL_TIME: -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (kvm && kvm_is_virtcca_cvm(kvm)) - r = 0; - else -#endif - r = kvm_arm_pvtime_supported(); + r = kvm_arm_pvtime_supported(); break; case KVM_CAP_ARM_EL1_32BIT: r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); @@ -501,15 +464,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = sdev_enable; break; #endif -#ifdef CONFIG_HISI_VIRTCCA_HOST - case KVM_CAP_ARM_TMM: - if (!is_armv8_4_sel2_present()) { - r = -ENXIO; - break; - } - r = static_key_enabled(&virtcca_cvm_is_available); - break; -#endif #ifdef CONFIG_ARM64_HDBSS case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: r = system_supports_hdbss(); @@ -664,23 +618,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->cpu = cpu; -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) { - if (single_task_running()) - vcpu_clear_wfx_traps(vcpu); - else - vcpu_set_wfx_traps(vcpu); - } -#endif kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) { - if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) - kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); - return; - } -#endif if (has_vhe()) kvm_vcpu_load_sysregs_vhe(vcpu); kvm_arch_vcpu_load_fp(vcpu); @@ -711,12 +650,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) { - kvm_cvm_vcpu_put(vcpu); - return; - } -#endif kvm_arch_vcpu_put_debug_state_flags(vcpu); kvm_arch_vcpu_put_fp(vcpu); if (has_vhe()) @@ -1263,12 +1196,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) trace_kvm_entry(*vcpu_pc(vcpu)); guest_timing_enter_irqoff(); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - ret = kvm_tec_enter(vcpu); - else -#endif - ret = kvm_arm_vcpu_enter_exit(vcpu); + ret = kvm_arm_vcpu_enter_exit(vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -1322,17 +1250,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) local_irq_enable(); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (!vcpu_is_tec(vcpu)) { -#endif - trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); - /* Exit types that need handling before we can be preempted */ - handle_exit_early(vcpu, ret); + /* Exit types that need handling before we can be preempted */ + handle_exit_early(vcpu, ret); -#ifdef CONFIG_HISI_VIRTCCA_HOST - } -#endif preempt_enable(); /* @@ -1354,12 +1276,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ret = ARM_EXCEPTION_IL; } -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - ret = handle_cvm_exit(vcpu, ret); - else -#endif - ret = handle_exit(vcpu, ret); + ret = handle_exit(vcpu, ret); #ifdef CONFIG_ARCH_VCPU_STAT update_vcpu_stat_time(&vcpu->stat); #endif @@ -1884,11 +1801,6 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) struct kvm_device_attr attr; switch (ioctl) { -#ifdef CONFIG_HISI_VIRTCCA_HOST - case KVM_LOAD_USER_DATA: { - return kvm_load_user_data(kvm, arg); - } -#endif case KVM_CREATE_IRQCHIP: { int ret; if (!vgic_present) @@ -2780,14 +2692,6 @@ static __init int kvm_arm_init(void) in_hyp_mode = is_kernel_in_hyp_mode(); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (is_virtcca_cvm_enable() && in_hyp_mode) { - err = kvm_init_tmm(); - if (err) - return err; - } -#endif - if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || cpus_have_final_cap(ARM64_WORKAROUND_1508412)) kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 105f4e00ec8b3..d3161a683838d 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "trace.h" @@ -876,10 +875,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, bool has_esr = events->exception.serror_has_esr; bool ext_dabt_pending = events->exception.ext_dabt_pending; -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - return kvm_cvm_vcpu_set_events(vcpu, serror_pending, ext_dabt_pending); -#endif if (serror_pending && has_esr) { if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) return -EINVAL; diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 9abea8e352408..2aa503ff742ee 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -6,7 +6,6 @@ #include #include -#include #include #include "trace.h" @@ -138,12 +137,6 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) &data); data = vcpu_data_host_to_guest(vcpu, data, len); vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) { - ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)-> - tec_entry.gprs[0] = data; - } -#endif } /* @@ -213,12 +206,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) run->mmio.phys_addr = fault_ipa; run->mmio.len = len; vcpu->mmio_needed = 1; -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) { - ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)->tec_entry.flags |= - TEC_ENTRY_FLAG_EMUL_MMIO; - } -#endif + if (!ret) { /* We handled the access successfully in the kernel. */ if (!is_write) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3830aa0b07a0b..429ba7ab0721c 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -873,11 +873,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t u64 mmfr0, mmfr1; u32 phys_shift; -#ifdef CONFIG_HISI_VIRTCCA_CODA - if ((type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) && (!kvm_is_virtcca_cvm(kvm))) -#else if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) -#endif return -EINVAL; phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); @@ -1420,12 +1416,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level); write_fault = kvm_is_write_fault(vcpu); -#ifdef CONFIG_HISI_VIRTCCA_CODA - if (vcpu_is_tec(vcpu)) { - write_fault = true; - prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; - } -#endif exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); vcpu->stat.mabt_exit_stat++; diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index b544418b68edf..1f69b667332b2 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -12,7 +12,6 @@ #include #include -#include #include #include @@ -80,10 +79,6 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) return PSCI_RET_INVALID_PARAMS; spin_lock(&vcpu->arch.mp_state_lock); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - cvm_psci_complete(source_vcpu, vcpu); -#endif if (!kvm_arm_vcpu_stopped(vcpu)) { if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) ret = PSCI_RET_ALREADY_ON; @@ -146,10 +141,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) /* Ignore other bits of target affinity */ target_affinity &= target_affinity_mask; -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - return cvm_psci_vcpu_affinity_info(vcpu, target_affinity, lowest_affinity_level); -#endif + /* * If one or more VCPU matching target affinity are running * then ON else OFF diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 0de1094d4e197..a1830e6393852 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -29,7 +29,6 @@ #include #include #include -#include /* Maximum phys_shift supported for any VM on this host */ static u32 __ro_after_init kvm_ipa_limit; @@ -140,12 +139,6 @@ int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature) return -EPERM; return kvm_vcpu_finalize_sve(vcpu); -#ifdef CONFIG_HISI_VIRTCCA_HOST - case KVM_ARM_VCPU_TEC: - if (!kvm_is_virtcca_cvm(vcpu->kvm)) - return -EINVAL; - return kvm_finalize_vcpu_tec(vcpu); -#endif } return -EINVAL; @@ -170,10 +163,6 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); kfree(sve_state); kfree(vcpu->arch.ccsidr); -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - kvm_destroy_tec(vcpu); -#endif #ifdef CONFIG_ARM64_HDBSS if (vcpu->arch.hdbss.br_el2) { diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index dab599e857b51..69ca111e349d9 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -10,7 +10,6 @@ #include #include #include -#include #include "vgic.h" @@ -681,10 +680,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) (unsigned long long)info->vcpu.start); } else if (kvm_get_mode() != KVM_MODE_PROTECTED) { kvm_vgic_global_state.vcpu_base = info->vcpu.start; -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (!static_branch_unlikely(&virtcca_cvm_is_available)) -#endif - kvm_vgic_global_state.can_emulate_gicv2 = true; + kvm_vgic_global_state.can_emulate_gicv2 = true; ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); if (ret) { kvm_err("Cannot register GICv2 KVM device.\n"); @@ -764,13 +760,7 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) { - cpu_if->vgic_vmcr = - ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)->tec_exit.gicv3_vmcr; - return; - } -#endif + if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); } diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index ec110006acf54..2459b0adea086 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -11,7 +11,6 @@ #include #include -#include #include "vgic.h" @@ -898,44 +897,12 @@ static inline bool can_access_vgic_from_kernel(void) return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); } -#ifdef CONFIG_HISI_VIRTCCA_HOST -static inline void vgic_tmm_save_state(struct kvm_vcpu *vcpu) -{ - int i; - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - struct tmi_tec_run *tec_run = vcpu->arch.tec.tec_run; - - for (i = 0; i < kvm_vgic_global_state.nr_lr; ++i) { - cpu_if->vgic_lr[i] = tec_run->tec_exit.gicv3_lrs[i]; - tec_run->tec_entry.gicv3_lrs[i] = 0; - } -} - -static inline void vgic_tmm_restore_state(struct kvm_vcpu *vcpu) -{ - int i; - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - struct tmi_tec_run *tec_run = vcpu->arch.tec.tec_run; - - for (i = 0; i < kvm_vgic_global_state.nr_lr; ++i) { - tec_run->tec_entry.gicv3_lrs[i] = cpu_if->vgic_lr[i]; - tec_run->tec_exit.gicv3_lrs[i] = cpu_if->vgic_lr[i]; - } -} -#endif - static inline void vgic_save_state(struct kvm_vcpu *vcpu) { if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_save_state(vcpu); else -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - vgic_tmm_save_state(vcpu); - else -#endif - __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); - + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); } /* Sync back the hardware VGIC state into our emulation after a guest's run. */ @@ -965,12 +932,7 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu) if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_restore_state(vcpu); else -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - vgic_tmm_restore_state(vcpu); - else -#endif - __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); } /* Flush our emulation state into the GIC hardware before entering the guest. */ @@ -1011,10 +973,7 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu) { if (unlikely(!vgic_initialized(vcpu->kvm))) return; -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - return; -#endif + if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_load(vcpu); else @@ -1025,10 +984,7 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu) { if (unlikely(!vgic_initialized(vcpu->kvm))) return; -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (vcpu_is_tec(vcpu)) - return; -#endif + if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_put(vcpu); else diff --git a/arch/arm64/kvm/virtcca_cvm_exit.c b/arch/arm64/kvm/virtcca_cvm_exit.c deleted file mode 100644 index 9654375a9c8cb..0000000000000 --- a/arch/arm64/kvm/virtcca_cvm_exit.c +++ /dev/null @@ -1,221 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2024, The Linux Foundation. All rights reserved. - */ -#include -#include -#include - -#include -#include -#include - -typedef int (*exit_handler_fn)(struct kvm_vcpu *vcpu); - -static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu, bool unmask_ctl) -{ - struct tmi_tec_run *run = vcpu->arch.tec.tec_run; - - __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = run->tec_exit.cntv_ctl; - __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = run->tec_exit.cntv_cval; - __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = run->tec_exit.cntp_ctl; - __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = run->tec_exit.cntp_cval; - - /* Because the timer mask is tainted by TMM, we don't know the - * true intent of the guest. Here, we assume mask is always - * cleared during WFI. - */ - if (unmask_ctl) { - __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) &= ~ARCH_TIMER_CTRL_IT_MASK; - __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) &= ~ARCH_TIMER_CTRL_IT_MASK; - } - - kvm_cvm_timers_update(vcpu); -} - -static int tec_exit_reason_notimpl(struct kvm_vcpu *vcpu) -{ - struct tmi_tec_run *run = vcpu->arch.tec.tec_run; - - pr_err("[vcpu %d] Unhandled exit reason from cvm (ESR: %#llx)\n", - vcpu->vcpu_id, run->tec_exit.esr); - return -ENXIO; -} - -/* The process is the same as kvm_handle_wfx, - * except the tracing and updating operation for pc, - * we copy kvm_handle_wfx process here - * to avoid changing kvm_handle_wfx function. - */ -static int tec_exit_wfx(struct kvm_vcpu *vcpu) -{ - u64 esr = kvm_vcpu_get_esr(vcpu); - - if (esr & ESR_ELx_WFx_ISS_WFE) { - vcpu->stat.wfe_exit_stat++; - } else { - vcpu->stat.wfi_exit_stat++; - } - - if (esr & ESR_ELx_WFx_ISS_WFxT) { - if (esr & ESR_ELx_WFx_ISS_RV) { - u64 val, now; - - now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); - val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); - - if (now >= val) - goto out; - } else { - /* Treat WFxT as WFx if RN is invalid */ - esr &= ~ESR_ELx_WFx_ISS_WFxT; - } - } - - if (esr & ESR_ELx_WFx_ISS_WFE) { - kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); - } else { - if (esr & ESR_ELx_WFx_ISS_WFxT) - vcpu_set_flag(vcpu, IN_WFIT); - - kvm_vcpu_wfi(vcpu); - } - -out: - return 1; -} - -static int tec_exit_sys_reg(struct kvm_vcpu *vcpu) -{ - int ret; - struct tmi_tec_run *run = vcpu->arch.tec.tec_run; - unsigned long esr = kvm_vcpu_get_esr(vcpu); - int rt = kvm_vcpu_sys_get_rt(vcpu); - bool is_write = !(esr & 1); - - if (is_write) - vcpu_set_reg(vcpu, rt, run->tec_exit.gprs[0]); - - ret = kvm_handle_sys_reg(vcpu); - - if (ret >= 0 && !is_write) - run->tec_entry.gprs[0] = vcpu_get_reg(vcpu, rt); - - return ret; -} - -static int tec_exit_sync_dabt(struct kvm_vcpu *vcpu) -{ - struct tmi_tec_run *run = vcpu->arch.tec.tec_run; - - if (kvm_vcpu_dabt_iswrite(vcpu) && kvm_vcpu_dabt_isvalid(vcpu)) { - vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), - run->tec_exit.gprs[0]); - } - return kvm_handle_guest_abort(vcpu); -} - -static int tec_exit_sync_iabt(struct kvm_vcpu *vcpu) -{ - struct tmi_tec_run *run = vcpu->arch.tec.tec_run; - - pr_err("[vcpu %d] Unhandled instruction abort (ESR: %#llx).\n", - vcpu->vcpu_id, run->tec_exit.esr); - - return -ENXIO; -} - -static exit_handler_fn tec_exit_handlers[] = { - [0 ... ESR_ELx_EC_MAX] = tec_exit_reason_notimpl, - [ESR_ELx_EC_WFx] = tec_exit_wfx, - [ESR_ELx_EC_SYS64] = tec_exit_sys_reg, - [ESR_ELx_EC_DABT_LOW] = tec_exit_sync_dabt, - [ESR_ELx_EC_IABT_LOW] = tec_exit_sync_iabt -}; - -static int tec_exit_psci(struct kvm_vcpu *vcpu) -{ - int i; - struct tmi_tec_run *run = vcpu->arch.tec.tec_run; - - for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) - vcpu_set_reg(vcpu, i, run->tec_exit.gprs[i]); - - return kvm_psci_call(vcpu); -} - -static int tec_exit_host_call(struct kvm_vcpu *vcpu) -{ - int ret, i; - struct tmi_tec_run *run = vcpu->arch.tec.tec_run; - - vcpu->stat.hvc_exit_stat++; - - for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) - vcpu_set_reg(vcpu, i, run->tec_exit.gprs[i]); - - ret = kvm_smccc_call_handler(vcpu); - - if (ret < 0) { - vcpu_set_reg(vcpu, 0, ~0UL); - ret = 1; - } - for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) - run->tec_entry.gprs[i] = vcpu_get_reg(vcpu, i); - - return ret; -} - -/* - * Return > 0 to return to guest, < 0 on error, 0(and set exit_reason) on - * proper exit to userspace - */ - -int handle_cvm_exit(struct kvm_vcpu *vcpu, int tec_run_ret) -{ - unsigned long status; - struct tmi_tec_run *run = vcpu->arch.tec.tec_run; - u8 esr_ec = ESR_ELx_EC(run->tec_exit.esr); - bool is_wfx; - - status = TMI_RETURN_STATUS(tec_run_ret); - - if (status == TMI_ERROR_CVM_POWEROFF) { - vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; - vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SHUTDOWN; - return 0; - } - - if (status == TMI_ERROR_CVM_STATE) { - vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; - return 0; - } - - if (tec_run_ret) - return -ENXIO; - - vcpu->arch.fault.esr_el2 = run->tec_exit.esr; - vcpu->arch.fault.far_el2 = run->tec_exit.far; - vcpu->arch.fault.hpfar_el2 = run->tec_exit.hpfar; - - is_wfx = (run->tec_exit.exit_reason == TMI_EXIT_SYNC) && (esr_ec == ESR_ELx_EC_WFx); - update_arch_timer_irq_lines(vcpu, is_wfx); - - run->tec_entry.flags = 0; - - switch (run->tec_exit.exit_reason) { - case TMI_EXIT_FIQ: - case TMI_EXIT_IRQ: - return 1; - case TMI_EXIT_PSCI: - return tec_exit_psci(vcpu); - case TMI_EXIT_SYNC: - return tec_exit_handlers[esr_ec](vcpu); - case TMI_EXIT_HOST_CALL: - return tec_exit_host_call(vcpu); - } - - kvm_pr_unimpl("Unsupported exit reason : 0x%llx\n", - run->tec_exit.exit_reason); - return 0; -} diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3968797272b94..1dd58672789fb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -544,28 +544,6 @@ static __always_inline void guest_state_exit_irqoff(void) instrumentation_end(); } -#ifdef CONFIG_HISI_VIRTCCA_HOST - -#define KVM_TYPE_CVM_BIT 8 -#define CVM_MAX_HALT_POLL_NS 100000 - -DECLARE_STATIC_KEY_FALSE(virtcca_cvm_is_available); - -static __always_inline bool vcpu_is_tec(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&virtcca_cvm_is_available)) - return vcpu->arch.tec.tec_run; - - return false; -} - -static inline bool kvm_arm_cvm_type(unsigned long type) -{ - return type & (1UL << KVM_TYPE_CVM_BIT); -} - -#endif - static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 47e88bcfb135a..7e9fa9b5ce78c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1527,19 +1527,6 @@ struct kvm_numa_info { #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) -#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x49, struct kvm_user_data) - -#define KVM_CAP_ARM_TMM 300 /* FIXME: Large number to prevent conflicts */ - -struct kvm_user_data { - __u64 loader_start; - __u64 image_end; - __u64 initrd_start; - __u64 dtb_end; - __u64 ram_size; - struct kvm_numa_info numa_info; -}; - /* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping { __u64 user_addr; -- Gitee From d959753851f0057989e93d68758b200c65cc4bf4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:21:54 -0700 Subject: [PATCH 04/52] KVM: Prepare for handling only shared mappings in mmu_notifier events community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Add flags to "struct kvm_gfn_range" to let notifier events target only shared and only private mappings, and write up the existing mmu_notifier events to be shared-only (private memory is never associated with a userspace virtual address, i.e. can't be reached via mmu_notifiers). Add two flags so that KVM can handle the three possibilities (shared, private, and shared+private) without needing something like a tri-state enum. Link: https://lore.kernel.org/all/ZJX0hk+KpQP0KUyB@google.com Signed-off-by: Sean Christopherson Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1dd58672789fb..a7790325665b2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -264,6 +264,8 @@ struct kvm_gfn_range { gfn_t start; gfn_t end; union kvm_mmu_notifier_arg arg; + bool only_private; + bool only_shared; bool may_block; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4b7378445812f..29b7757b4600e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -631,6 +631,13 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm, * the second or later invocation of the handler). */ gfn_range.arg = range->arg; + + /* + * HVA-based notifications aren't relevant to private + * mappings as they don't have a userspace mapping. + */ + gfn_range.only_private = false; + gfn_range.only_shared = true; gfn_range.may_block = range->may_block; /* -- Gitee From ebfcbd13e4271c3c5ead22cec9d9b52c545e850f Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 13 Dec 2023 11:25:39 +0000 Subject: [PATCH 05/52] kvm: arm64: Include kvm_emulate.h in kvm/arm_psci.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Fix a potential build error (like below, when asm/kvm_emulate.h gets included after the kvm/arm_psci.h) by including the missing header file in kvm/arm_psci.h: ./include/kvm/arm_psci.h: In function ‘kvm_psci_version’: ./include/kvm/arm_psci.h:29:13: error: implicit declaration of function ‘vcpu_has_feature’; did you mean ‘cpu_have_feature’? [-Werror=implicit-function-declaration] 29 | if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2)) { | ^~~~~~~~~~~~~~~~ | cpu_have_feature Signed-off-by: Suzuki K Poulose Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- include/kvm/arm_psci.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 6e55b9283789b..bbeb68f031be4 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -10,6 +10,8 @@ #include #include +#include + #define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) #define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2) #define KVM_ARM_PSCI_1_0 PSCI_VERSION(1, 0) -- Gitee From 3e73d4d6bd1c9b30cdc8e824dc8cef9906e8194b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 27 Sep 2021 12:02:56 +0100 Subject: [PATCH 06/52] arm64: RME: Handle Granule Protection Faults (GPFs) community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- If the host attempts to access granules that have been delegated for use in a realm these accesses will be caught and will trigger a Granule Protection Fault (GPF). A fault during a page walk signals a bug in the kernel and is handled by oopsing the kernel. A non-page walk fault could be caused by user space having access to a page which has been delegated to the kernel and will trigger a SIGBUS to allow debugging why user space is trying to access a delegated page. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/mm/fault.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4ea07caba71cf..647bbd9bad5b7 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -898,6 +898,25 @@ static int do_tag_check_fault(unsigned long far, unsigned long esr, return 0; } +static int do_gpf_ptw(unsigned long far, unsigned long esr, struct pt_regs *regs) +{ + const struct fault_info *inf = esr_to_fault_info(esr); + + die_kernel_fault(inf->name, far, esr, regs); + return 0; +} + +static int do_gpf(unsigned long far, unsigned long esr, struct pt_regs *regs) +{ + const struct fault_info *inf = esr_to_fault_info(esr); + + if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) + return 0; + + arm64_notify_die(inf->name, regs, inf->sig, inf->code, far, esr); + return 0; +} + static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" }, { do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" }, @@ -934,12 +953,12 @@ static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "unknown 32" }, { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 34" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 35" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 36" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 37" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 38" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 39" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 40" }, + { do_gpf_ptw, SIGKILL, SI_KERNEL, "Granule Protection Fault at level -1" }, + { do_gpf_ptw, SIGKILL, SI_KERNEL, "Granule Protection Fault at level 0" }, + { do_gpf_ptw, SIGKILL, SI_KERNEL, "Granule Protection Fault at level 1" }, + { do_gpf_ptw, SIGKILL, SI_KERNEL, "Granule Protection Fault at level 2" }, + { do_gpf_ptw, SIGKILL, SI_KERNEL, "Granule Protection Fault at level 3" }, + { do_gpf, SIGBUS, SI_KERNEL, "Granule Protection Fault not on table walk" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 41" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 42" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 43" }, -- Gitee From 8051d820572fc55bf15718ef146b40b6532c871e Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 27 Sep 2021 14:50:37 +0100 Subject: [PATCH 07/52] arm64: RME: Add SMC definitions for calling the RMM community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The RMM (Realm Management Monitor) provides functionality that can be accessed by SMC calls from the host. The SMC definitions are based on DEN0137[1] version 1.0-rel0 [1] https://developer.arm.com/documentation/den0137/1-0rel0/ Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/rmi_smc.h | 259 +++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 arch/arm64/include/asm/rmi_smc.h diff --git a/arch/arm64/include/asm/rmi_smc.h b/arch/arm64/include/asm/rmi_smc.h new file mode 100644 index 0000000000000..eec659e284cd4 --- /dev/null +++ b/arch/arm64/include/asm/rmi_smc.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024 ARM Ltd. + * + * The values and structures in this file are from the Realm Management Monitor + * specification (DEN0137) version 1.0-rel0: + * https://developer.arm.com/documentation/den0137/1-0rel0/ + */ + +#ifndef __ASM_RME_SMC_H +#define __ASM_RME_SMC_H + +#include + +#define SMC_RMI_CALL(func) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD, \ + (func)) + +#define SMC_RMI_VERSION SMC_RMI_CALL(0x0150) +#define SMC_RMI_GRANULE_DELEGATE SMC_RMI_CALL(0x0151) +#define SMC_RMI_GRANULE_UNDELEGATE SMC_RMI_CALL(0x0152) +#define SMC_RMI_DATA_CREATE SMC_RMI_CALL(0x0153) +#define SMC_RMI_DATA_CREATE_UNKNOWN SMC_RMI_CALL(0x0154) +#define SMC_RMI_DATA_DESTROY SMC_RMI_CALL(0x0155) + +#define SMC_RMI_REALM_ACTIVATE SMC_RMI_CALL(0x0157) +#define SMC_RMI_REALM_CREATE SMC_RMI_CALL(0x0158) +#define SMC_RMI_REALM_DESTROY SMC_RMI_CALL(0x0159) +#define SMC_RMI_REC_CREATE SMC_RMI_CALL(0x015a) +#define SMC_RMI_REC_DESTROY SMC_RMI_CALL(0x015b) +#define SMC_RMI_REC_ENTER SMC_RMI_CALL(0x015c) +#define SMC_RMI_RTT_CREATE SMC_RMI_CALL(0x015d) +#define SMC_RMI_RTT_DESTROY SMC_RMI_CALL(0x015e) +#define SMC_RMI_RTT_MAP_UNPROTECTED SMC_RMI_CALL(0x015f) + +#define SMC_RMI_RTT_READ_ENTRY SMC_RMI_CALL(0x0161) +#define SMC_RMI_RTT_UNMAP_UNPROTECTED SMC_RMI_CALL(0x0162) + +#define SMC_RMI_PSCI_COMPLETE SMC_RMI_CALL(0x0164) +#define SMC_RMI_FEATURES SMC_RMI_CALL(0x0165) +#define SMC_RMI_RTT_FOLD SMC_RMI_CALL(0x0166) +#define SMC_RMI_REC_AUX_COUNT SMC_RMI_CALL(0x0167) +#define SMC_RMI_RTT_INIT_RIPAS SMC_RMI_CALL(0x0168) +#define SMC_RMI_RTT_SET_RIPAS SMC_RMI_CALL(0x0169) + +#define RMI_ABI_MAJOR_VERSION 1 +#define RMI_ABI_MINOR_VERSION 0 + +#define RMI_ABI_VERSION_GET_MAJOR(version) ((version) >> 16) +#define RMI_ABI_VERSION_GET_MINOR(version) ((version) & 0xFFFF) +#define RMI_ABI_VERSION(major, minor) (((major) << 16) | (minor)) + +#define RMI_UNASSIGNED 0 +#define RMI_ASSIGNED 1 +#define RMI_TABLE 2 + +#define RMI_RETURN_STATUS(ret) ((ret) & 0xFF) +#define RMI_RETURN_INDEX(ret) (((ret) >> 8) & 0xFF) + +#define RMI_SUCCESS 0 +#define RMI_ERROR_INPUT 1 +#define RMI_ERROR_REALM 2 +#define RMI_ERROR_REC 3 +#define RMI_ERROR_RTT 4 + +enum rmi_ripas { + RMI_EMPTY = 0, + RMI_RAM = 1, + RMI_DESTROYED = 2, +}; + +#define RMI_NO_MEASURE_CONTENT 0 +#define RMI_MEASURE_CONTENT 1 + +#define RMI_FEATURE_REGISTER_0_S2SZ GENMASK(7, 0) +#define RMI_FEATURE_REGISTER_0_LPA2 BIT(8) +#define RMI_FEATURE_REGISTER_0_SVE_EN BIT(9) +#define RMI_FEATURE_REGISTER_0_SVE_VL GENMASK(13, 10) +#define RMI_FEATURE_REGISTER_0_NUM_BPS GENMASK(19, 14) +#define RMI_FEATURE_REGISTER_0_NUM_WPS GENMASK(25, 20) +#define RMI_FEATURE_REGISTER_0_PMU_EN BIT(26) +#define RMI_FEATURE_REGISTER_0_PMU_NUM_CTRS GENMASK(31, 27) +#define RMI_FEATURE_REGISTER_0_HASH_SHA_256 BIT(32) +#define RMI_FEATURE_REGISTER_0_HASH_SHA_512 BIT(33) +#define RMI_FEATURE_REGISTER_0_GICV3_NUM_LRS GENMASK(37, 34) +#define RMI_FEATURE_REGISTER_0_MAX_RECS_ORDER GENMASK(41, 38) +#define RMI_FEATURE_REGISTER_0_Reserved GENMASK(63, 42) + +#define RMI_REALM_PARAM_FLAG_LPA2 BIT(0) +#define RMI_REALM_PARAM_FLAG_SVE BIT(1) +#define RMI_REALM_PARAM_FLAG_PMU BIT(2) + +/* + * Note many of these fields are smaller than u64 but all fields have u64 + * alignment, so use u64 to ensure correct alignment. + */ +struct realm_params { + union { /* 0x0 */ + struct { + u64 flags; + u64 s2sz; + u64 sve_vl; + u64 num_bps; + u64 num_wps; + u64 pmu_num_ctrs; + u64 hash_algo; + }; + u8 padding0[0x400]; + }; + union { /* 0x400 */ + u8 rpv[64]; + u8 padding1[0x400]; + }; + union { /* 0x800 */ + struct { + u64 vmid; + u64 rtt_base; + s64 rtt_level_start; + u64 rtt_num_start; + }; + u8 padding2[0x800]; + }; +}; + +/* + * The number of GPRs (starting from X0) that are + * configured by the host when a REC is created. + */ +#define REC_CREATE_NR_GPRS 8 + +#define REC_PARAMS_FLAG_RUNNABLE BIT_ULL(0) + +#define REC_PARAMS_AUX_GRANULES 16 + +struct rec_params { + union { /* 0x0 */ + u64 flags; + u8 padding0[0x100]; + }; + union { /* 0x100 */ + u64 mpidr; + u8 padding1[0x100]; + }; + union { /* 0x200 */ + u64 pc; + u8 padding2[0x100]; + }; + union { /* 0x300 */ + u64 gprs[REC_CREATE_NR_GPRS]; + u8 padding3[0x500]; + }; + union { /* 0x800 */ + struct { + u64 num_rec_aux; + u64 aux[REC_PARAMS_AUX_GRANULES]; + }; + u8 padding4[0x800]; + }; +}; + +#define REC_ENTER_EMULATED_MMIO BIT(0) +#define REC_ENTER_INJECT_SEA BIT(1) +#define REC_ENTER_TRAP_WFI BIT(2) +#define REC_ENTER_TRAP_WFE BIT(3) +#define REC_ENTER_RIPAS_RESPONSE BIT(4) + +#define REC_RUN_GPRS 31 +#define REC_MAX_GIC_NUM_LRS 16 + +struct rec_enter { + union { /* 0x000 */ + u64 flags; + u8 padding0[0x200]; + }; + union { /* 0x200 */ + u64 gprs[REC_RUN_GPRS]; + u8 padding1[0x100]; + }; + union { /* 0x300 */ + struct { + u64 gicv3_hcr; + u64 gicv3_lrs[REC_MAX_GIC_NUM_LRS]; + }; + u8 padding2[0x100]; + }; + u8 padding3[0x400]; +}; + +#define RMI_EXIT_SYNC 0x00 +#define RMI_EXIT_IRQ 0x01 +#define RMI_EXIT_FIQ 0x02 +#define RMI_EXIT_PSCI 0x03 +#define RMI_EXIT_RIPAS_CHANGE 0x04 +#define RMI_EXIT_HOST_CALL 0x05 +#define RMI_EXIT_SERROR 0x06 + +struct rec_exit { + union { /* 0x000 */ + u8 exit_reason; + u8 padding0[0x100]; + }; + union { /* 0x100 */ + struct { + u64 esr; + u64 far; + u64 hpfar; + }; + u8 padding1[0x100]; + }; + union { /* 0x200 */ + u64 gprs[REC_RUN_GPRS]; + u8 padding2[0x100]; + }; + union { /* 0x300 */ + struct { + u64 gicv3_hcr; + u64 gicv3_lrs[REC_MAX_GIC_NUM_LRS]; + u64 gicv3_misr; + u64 gicv3_vmcr; + }; + u8 padding3[0x100]; + }; + union { /* 0x400 */ + struct { + u64 cntp_ctl; + u64 cntp_cval; + u64 cntv_ctl; + u64 cntv_cval; + }; + u8 padding4[0x100]; + }; + union { /* 0x500 */ + struct { + u64 ripas_base; + u64 ripas_top; + u64 ripas_value; + }; + u8 padding5[0x100]; + }; + union { /* 0x600 */ + u16 imm; + u8 padding6[0x100]; + }; + union { /* 0x700 */ + struct { + u8 pmu_ovf_status; + }; + u8 padding7[0x100]; + }; +}; + +struct rec_run { + struct rec_enter enter; + struct rec_exit exit; +}; + +#endif -- Gitee From f587255bfe1828789935f07f1983b12408fa94ba Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 17 Nov 2021 16:14:28 +0000 Subject: [PATCH 08/52] arm64: RME: Add wrappers for RMI calls community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The wrappers make the call sites easier to read and deal with the boiler plate of handling the error codes from the RMM. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/rmi_cmds.h | 508 ++++++++++++++++++++++++++++++ 1 file changed, 508 insertions(+) create mode 100644 arch/arm64/include/asm/rmi_cmds.h diff --git a/arch/arm64/include/asm/rmi_cmds.h b/arch/arm64/include/asm/rmi_cmds.h new file mode 100644 index 0000000000000..043b7ff278eeb --- /dev/null +++ b/arch/arm64/include/asm/rmi_cmds.h @@ -0,0 +1,508 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#ifndef __ASM_RMI_CMDS_H +#define __ASM_RMI_CMDS_H + +#include + +#include + +struct rtt_entry { + unsigned long walk_level; + unsigned long desc; + int state; + int ripas; +}; + +/** + * rmi_data_create() - Create a data granule + * @rd: PA of the RD + * @data: PA of the target granule + * @ipa: IPA at which the granule will be mapped in the guest + * @src: PA of the source granule + * @flags: RMI_MEASURE_CONTENT if the contents should be measured + * + * Create a new data granule, copying contents from a non-secure granule. + * + * Return: RMI return code + */ +static inline int rmi_data_create(unsigned long rd, unsigned long data, + unsigned long ipa, unsigned long src, + unsigned long flags) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_DATA_CREATE, rd, data, ipa, src, + flags, &res); + + return res.a0; +} + +/** + * rmi_data_create_unknown() - Create a data granule with unknown contents + * @rd: PA of the RD + * @data: PA of the target granule + * @ipa: IPA at which the granule will be mapped in the guest + * + * Return: RMI return code + */ +static inline int rmi_data_create_unknown(unsigned long rd, + unsigned long data, + unsigned long ipa) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_DATA_CREATE_UNKNOWN, rd, data, ipa, &res); + + return res.a0; +} + +/** + * rmi_data_destroy() - Destroy a data granule + * @rd: PA of the RD + * @ipa: IPA at which the granule is mapped in the guest + * @data_out: PA of the granule which was destroyed + * @top_out: Top IPA of non-live RTT entries + * + * Unmap a protected IPA from stage 2, transitioning it to DESTROYED. + * The IPA cannot be used by the guest unless it is transitioned to RAM again + * by the realm guest. + * + * Return: RMI return code + */ +static inline int rmi_data_destroy(unsigned long rd, unsigned long ipa, + unsigned long *data_out, + unsigned long *top_out) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_DATA_DESTROY, rd, ipa, &res); + + if (data_out) + *data_out = res.a1; + if (top_out) + *top_out = res.a2; + + return res.a0; +} + +/** + * rmi_features() - Read feature register + * @index: Feature register index + * @out: Feature register value is written to this pointer + * + * Return: RMI return code + */ +static inline int rmi_features(unsigned long index, unsigned long *out) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_FEATURES, index, &res); + + if (out) + *out = res.a1; + return res.a0; +} + +/** + * rmi_granule_delegate() - Delegate a granule + * @phys: PA of the granule + * + * Delegate a granule for use by the realm world. + * + * Return: RMI return code + */ +static inline int rmi_granule_delegate(unsigned long phys) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_GRANULE_DELEGATE, phys, &res); + + return res.a0; +} + +/** + * rmi_granule_undelegate() - Undelegate a granule + * @phys: PA of the granule + * + * Undelegate a granule to allow use by the normal world. Will fail if the + * granule is in use. + * + * Return: RMI return code + */ +static inline int rmi_granule_undelegate(unsigned long phys) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_GRANULE_UNDELEGATE, phys, &res); + + return res.a0; +} + +/** + * rmi_psci_complete() - Complete pending PSCI command + * @calling_rec: PA of the calling REC + * @target_rec: PA of the target REC + * @status: Status of the PSCI request + * + * Completes a pending PSCI command which was called with an MPIDR argument, by + * providing the corresponding REC. + * + * Return: RMI return code + */ +static inline int rmi_psci_complete(unsigned long calling_rec, + unsigned long target_rec, + unsigned long status) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_PSCI_COMPLETE, calling_rec, target_rec, + status, &res); + + return res.a0; +} + +/** + * rmi_realm_activate() - Active a realm + * @rd: PA of the RD + * + * Mark a realm as Active signalling that creation is complete and allowing + * execution of the realm. + * + * Return: RMI return code + */ +static inline int rmi_realm_activate(unsigned long rd) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_REALM_ACTIVATE, rd, &res); + + return res.a0; +} + +/** + * rmi_realm_create() - Create a realm + * @rd: PA of the RD + * @params_ptr: PA of realm parameters + * + * Create a new realm using the given parameters. + * + * Return: RMI return code + */ +static inline int rmi_realm_create(unsigned long rd, unsigned long params_ptr) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_REALM_CREATE, rd, params_ptr, &res); + + return res.a0; +} + +/** + * rmi_realm_destroy() - Destroy a realm + * @rd: PA of the RD + * + * Destroys a realm, all objects belonging to the realm must be destroyed first. + * + * Return: RMI return code + */ +static inline int rmi_realm_destroy(unsigned long rd) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_REALM_DESTROY, rd, &res); + + return res.a0; +} + +/** + * rmi_rec_aux_count() - Get number of auxiliary granules required + * @rd: PA of the RD + * @aux_count: Number of pages written to this pointer + * + * A REC may require extra auxiliary pages to be delegated for the RMM to + * store metadata (not visible to the normal world) in. This function provides + * the number of pages that are required. + * + * Return: RMI return code + */ +static inline int rmi_rec_aux_count(unsigned long rd, unsigned long *aux_count) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_REC_AUX_COUNT, rd, &res); + + if (aux_count) + *aux_count = res.a1; + return res.a0; +} + +/** + * rmi_rec_create() - Create a REC + * @rd: PA of the RD + * @rec: PA of the target REC + * @params_ptr: PA of REC parameters + * + * Create a REC using the parameters specified in the struct rec_params pointed + * to by @params_ptr. + * + * Return: RMI return code + */ +static inline int rmi_rec_create(unsigned long rd, unsigned long rec, + unsigned long params_ptr) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_REC_CREATE, rd, rec, params_ptr, &res); + + return res.a0; +} + +/** + * rmi_rec_destroy() - Destroy a REC + * @rec: PA of the target REC + * + * Destroys a REC. The REC must not be running. + * + * Return: RMI return code + */ +static inline int rmi_rec_destroy(unsigned long rec) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_REC_DESTROY, rec, &res); + + return res.a0; +} + +/** + * rmi_rec_enter() - Enter a REC + * @rec: PA of the target REC + * @run_ptr: PA of RecRun structure + * + * Starts (or continues) execution within a REC. + * + * Return: RMI return code + */ +static inline int rmi_rec_enter(unsigned long rec, unsigned long run_ptr) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_REC_ENTER, rec, run_ptr, &res); + + return res.a0; +} + +/** + * rmi_rtt_create() - Creates an RTT + * @rd: PA of the RD + * @rtt: PA of the target RTT + * @ipa: Base of the IPA range described by the RTT + * @level: Depth of the RTT within the tree + * + * Creates an RTT (Realm Translation Table) at the specified level for the + * translation of the specified address within the realm. + * + * Return: RMI return code + */ +static inline int rmi_rtt_create(unsigned long rd, unsigned long rtt, + unsigned long ipa, long level) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_RTT_CREATE, rd, rtt, ipa, level, &res); + + return res.a0; +} + +/** + * rmi_rtt_destroy() - Destroy an RTT + * @rd: PA of the RD + * @ipa: Base of the IPA range described by the RTT + * @level: Depth of the RTT within the tree + * @out_rtt: Pointer to write the PA of the RTT which was destroyed + * @out_top: Pointer to write the top IPA of non-live RTT entries + * + * Destroys an RTT. The RTT must be non-live, i.e. none of the entries in the + * table are in ASSIGNED or TABLE state. + * + * Return: RMI return code. + */ +static inline int rmi_rtt_destroy(unsigned long rd, + unsigned long ipa, + long level, + unsigned long *out_rtt, + unsigned long *out_top) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_RTT_DESTROY, rd, ipa, level, &res); + + if (out_rtt) + *out_rtt = res.a1; + if (out_top) + *out_top = res.a2; + + return res.a0; +} + +/** + * rmi_rtt_fold() - Fold an RTT + * @rd: PA of the RD + * @ipa: Base of the IPA range described by the RTT + * @level: Depth of the RTT within the tree + * @out_rtt: Pointer to write the PA of the RTT which was destroyed + * + * Folds an RTT. If all entries with the RTT are 'homogeneous' the RTT can be + * folded into the parent and the RTT destroyed. + * + * Return: RMI return code + */ +static inline int rmi_rtt_fold(unsigned long rd, unsigned long ipa, + long level, unsigned long *out_rtt) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_RTT_FOLD, rd, ipa, level, &res); + + if (out_rtt) + *out_rtt = res.a1; + + return res.a0; +} + +/** + * rmi_rtt_init_ripas() - Set RIPAS for new realm + * @rd: PA of the RD + * @base: Base of target IPA region + * @top: Top of target IPA region + * @out_top: Top IPA of range whose RIPAS was modified + * + * Sets the RIPAS of a target IPA range to RAM, for a realm in the NEW state. + * + * Return: RMI return code + */ +static inline int rmi_rtt_init_ripas(unsigned long rd, unsigned long base, + unsigned long top, unsigned long *out_top) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_RTT_INIT_RIPAS, rd, base, top, &res); + + if (out_top) + *out_top = res.a1; + + return res.a0; +} + +/** + * rmi_rtt_map_unprotected() - Map NS pages into a realm + * @rd: PA of the RD + * @ipa: Base IPA of the mapping + * @level: Depth within the RTT tree + * @desc: RTTE descriptor + * + * Create a mapping from an Unprotected IPA to a Non-secure PA. + * + * Return: RMI return code + */ +static inline int rmi_rtt_map_unprotected(unsigned long rd, + unsigned long ipa, + long level, + unsigned long desc) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_RTT_MAP_UNPROTECTED, rd, ipa, level, + desc, &res); + + return res.a0; +} + +/** + * rmi_rtt_read_entry() - Read an RTTE + * @rd: PA of the RD + * @ipa: IPA for which to read the RTTE + * @level: RTT level at which to read the RTTE + * @rtt: Output structure describing the RTTE + * + * Reads a RTTE (Realm Translation Table Entry). + * + * Return: RMI return code + */ +static inline int rmi_rtt_read_entry(unsigned long rd, unsigned long ipa, + long level, struct rtt_entry *rtt) +{ + struct arm_smccc_1_2_regs regs = { + SMC_RMI_RTT_READ_ENTRY, + rd, ipa, level + }; + + arm_smccc_1_2_smc(®s, ®s); + + rtt->walk_level = regs.a1; + rtt->state = regs.a2 & 0xFF; + rtt->desc = regs.a3; + rtt->ripas = regs.a4 & 0xFF; + + return regs.a0; +} + +/** + * rmi_rtt_set_ripas() - Set RIPAS for an running realm + * @rd: PA of the RD + * @rec: PA of the REC making the request + * @base: Base of target IPA region + * @top: Top of target IPA region + * @out_top: Pointer to write top IPA of range whose RIPAS was modified + * + * Completes a request made by the realm to change the RIPAS of a target IPA + * range. + * + * Return: RMI return code + */ +static inline int rmi_rtt_set_ripas(unsigned long rd, unsigned long rec, + unsigned long base, unsigned long top, + unsigned long *out_top) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_RTT_SET_RIPAS, rd, rec, base, top, &res); + + if (out_top) + *out_top = res.a1; + + return res.a0; +} + +/** + * rmi_rtt_unmap_unprotected() - Remove a NS mapping + * @rd: PA of the RD + * @ipa: Base IPA of the mapping + * @level: Depth within the RTT tree + * @out_top: Pointer to write top IPA of non-live RTT entries + * + * Removes a mapping at an Unprotected IPA. + * + * Return: RMI return code + */ +static inline int rmi_rtt_unmap_unprotected(unsigned long rd, + unsigned long ipa, + long level, + unsigned long *out_top) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RMI_RTT_UNMAP_UNPROTECTED, rd, ipa, + level, &res); + + if (out_top) + *out_top = res.a1; + + return res.a0; +} + +#endif /* __ASM_RMI_CMDS_H */ -- Gitee From 672698fe29e794cbd0e6c96dccfbc16c6581da9b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 27 Sep 2021 14:52:31 +0100 Subject: [PATCH 09/52] arm64: RME: Check for RME support at KVM init community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Query the RMI version number and check if it is a compatible version. A static key is also provided to signal that a supported RMM is available. Functions are provided to query if a VM or VCPU is a realm (or rec) which currently will always return false. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_emulate.h | 18 +++++++++ arch/arm64/include/asm/kvm_host.h | 3 ++ arch/arm64/include/asm/kvm_rme.h | 56 ++++++++++++++++++++++++++++ arch/arm64/include/asm/virt.h | 1 + arch/arm64/kvm/Makefile | 3 +- arch/arm64/kvm/arm.c | 6 +++ arch/arm64/kvm/rme.c | 50 +++++++++++++++++++++++++ 7 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/kvm_rme.h create mode 100644 arch/arm64/kvm/rme.c diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3aa1c7f56f655..e5516a80f4420 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -642,4 +642,22 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu) kvm_write_cptr_el2(val); } + +static inline bool kvm_is_realm(struct kvm *kvm) +{ + if (static_branch_unlikely(&kvm_rme_is_available) && kvm) + return kvm->arch.is_realm; + return false; +} + +static inline enum realm_state kvm_realm_state(struct kvm *kvm) +{ + return READ_ONCE(kvm->arch.realm.state); +} + +static inline bool vcpu_is_rec(struct kvm_vcpu *vcpu) +{ + return false; +} + #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 80276f048aef9..d172503516d86 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_HISI_VIRTCCA_HOST #include #endif @@ -285,6 +286,8 @@ struct kvm_arch { */ struct kvm_protected_vm pkvm; + bool is_realm; + struct realm realm; #ifdef CONFIG_KVM_HISI_VIRT spinlock_t sched_lock; cpumask_var_t sched_cpus; /* Union of all vcpu's cpus_ptr */ diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h new file mode 100644 index 0000000000000..69af5c3a1e44d --- /dev/null +++ b/arch/arm64/include/asm/kvm_rme.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#ifndef __ASM_KVM_RME_H +#define __ASM_KVM_RME_H + +/** + * enum realm_state - State of a Realm + */ +enum realm_state { + /** + * @REALM_STATE_NONE: + * Realm has not yet been created. rmi_realm_create() may be + * called to create the realm. + */ + REALM_STATE_NONE, + /** + * @REALM_STATE_NEW: + * Realm is under construction, not eligible for execution. Pages + * may be populated with rmi_data_create(). + */ + REALM_STATE_NEW, + /** + * @REALM_STATE_ACTIVE: + * Realm has been created and is eligible for execution with + * rmi_rec_enter(). Pages may no longer be populated with + * rmi_data_create(). + */ + REALM_STATE_ACTIVE, + /** + * @REALM_STATE_DYING: + * Realm is in the process of being destroyed or has already been + * destroyed. + */ + REALM_STATE_DYING, + /** + * @REALM_STATE_DEAD: + * Realm has been destroyed. + */ + REALM_STATE_DEAD +}; + +/** + * struct realm - Additional per VM data for a Realm + * + * @state: The lifetime state machine for the realm + */ +struct realm { + enum realm_state state; +}; + +void kvm_init_rme(void); + +#endif diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 261d6e9df2e10..12cf36c381890 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -81,6 +81,7 @@ void __hyp_reset_vectors(void); bool is_kvm_arm_initialised(void); DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); +DECLARE_STATIC_KEY_FALSE(kvm_rme_is_available); /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 952eee572e234..3a704db002919 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -20,7 +20,8 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o pvsched.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ - vgic/vgic-its.o vgic/vgic-debug.o + vgic/vgic-its.o vgic/vgic-debug.o \ + rme.o kvm-$(CONFIG_VIRT_PLAT_DEV) += vgic/shadow_dev.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c7df79fb40823..03236c4178383 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,8 @@ static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; #include "hisilicon/hisi_virt.h" +DEFINE_STATIC_KEY_FALSE(kvm_rme_is_available); + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); @@ -2692,6 +2695,9 @@ static __init int kvm_arm_init(void) in_hyp_mode = is_kernel_in_hyp_mode(); + if (in_hyp_mode) + kvm_init_rme(); + if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || cpus_have_final_cap(ARM64_WORKAROUND_1508412)) kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c new file mode 100644 index 0000000000000..b88269b80c116 --- /dev/null +++ b/arch/arm64/kvm/rme.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#include + +#include +#include + +static int rmi_check_version(void) +{ + struct arm_smccc_res res; + int version_major, version_minor; + unsigned long host_version = RMI_ABI_VERSION(RMI_ABI_MAJOR_VERSION, + RMI_ABI_MINOR_VERSION); + + arm_smccc_1_1_invoke(SMC_RMI_VERSION, host_version, &res); + + if (res.a0 == SMCCC_RET_NOT_SUPPORTED) + return -ENXIO; + + version_major = RMI_ABI_VERSION_GET_MAJOR(res.a1); + version_minor = RMI_ABI_VERSION_GET_MINOR(res.a1); + + if (res.a0 != RMI_SUCCESS) { + kvm_err("Unsupported RMI ABI (v%d.%d) we want v%d.%d\n", + version_major, version_minor, + RMI_ABI_MAJOR_VERSION, + RMI_ABI_MINOR_VERSION); + return -ENXIO; + } + + kvm_info("RMI ABI version %d.%d\n", version_major, version_minor); + + return 0; +} + +void kvm_init_rme(void) +{ + if (PAGE_SIZE != SZ_4K) + /* Only 4k page size on the host is supported */ + return; + + if (rmi_check_version()) + /* Continue without realm support */ + return; + + /* Future patch will enable static branch kvm_rme_is_available */ +} -- Gitee From 4b95afe367b49acaa8b922bf7e6dd2f72865f7ca Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 30 Sep 2021 15:49:43 +0100 Subject: [PATCH 10/52] arm64: RME: Define the user ABI community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- There is one (multiplexed) CAP which can be used to create, populate and then activate the realm. Co-developed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- Documentation/virt/kvm/api.rst | 1 + arch/arm64/include/uapi/asm/kvm.h | 49 +++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 12 ++++++++ 3 files changed, 62 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 17811002a8f7c..6aaac32abd399 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5061,6 +5061,7 @@ Recognised values for feature: ===== =========================================== arm64 KVM_ARM_VCPU_SVE (requires KVM_CAP_ARM_SVE) + arm64 KVM_ARM_VCPU_REC (requires KVM_CAP_ARM_RME) ===== =========================================== Finalizes the configuration of the specified vcpu feature. diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f7ddd73a8c0fa..bd3033ab139df 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -110,6 +110,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +#define KVM_ARM_VCPU_REC 8 /* VCPU REC state as part of Realm */ struct kvm_vcpu_init { __u32 target; @@ -415,6 +416,54 @@ enum { #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 #define KVM_DEV_ARM_ITS_CTRL_RESET 4 +/* KVM_CAP_ARM_RME on VM fd */ +#define KVM_CAP_ARM_RME_CONFIG_REALM 0 +#define KVM_CAP_ARM_RME_CREATE_RD 1 +#define KVM_CAP_ARM_RME_INIT_IPA_REALM 2 +#define KVM_CAP_ARM_RME_POPULATE_REALM 3 +#define KVM_CAP_ARM_RME_ACTIVATE_REALM 4 + +#define KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256 0 +#define KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512 1 + +#define KVM_CAP_ARM_RME_RPV_SIZE 64 + +/* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ +#define KVM_CAP_ARM_RME_CFG_RPV 0 +#define KVM_CAP_ARM_RME_CFG_HASH_ALGO 1 + +struct kvm_cap_arm_rme_config_item { + __u32 cfg; + union { + /* cfg == KVM_CAP_ARM_RME_CFG_RPV */ + struct { + __u8 rpv[KVM_CAP_ARM_RME_RPV_SIZE]; + }; + + /* cfg == KVM_CAP_ARM_RME_CFG_HASH_ALGO */ + struct { + __u32 hash_algo; + }; + + /* Fix the size of the union */ + __u8 reserved[256]; + }; +}; + +#define KVM_ARM_RME_POPULATE_FLAGS_MEASURE BIT(0) +struct kvm_cap_arm_rme_populate_realm_args { + __u64 populate_ipa_base; + __u64 populate_ipa_size; + __u32 flags; + __u32 reserved[3]; +}; + +struct kvm_cap_arm_rme_init_ipa_args { + __u64 init_ipa_base; + __u64 init_ipa_size; + __u32 reserved[4]; +}; + /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7e9fa9b5ce78c..0d3043f014389 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1201,6 +1201,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 +#define KVM_CAP_ARM_RME 300 /* FIXME: Large number to prevent conflicts */ + #define KVM_CAP_SEV_ES_GHCB 500 #define KVM_CAP_HYGON_COCO_EXT 501 /* support userspace to request firmware to build CSV3 guest's memory space */ @@ -2432,4 +2434,14 @@ struct kvm_csv3_handle_memory { /* get tmi version */ #define KVM_GET_TMI_VERSION _IOR(KVMIO, 0xd2, u64) +/* Available with KVM_CAP_ARM_RME, only for VMs with KVM_VM_TYPE_ARM_REALM */ +struct kvm_arm_rmm_psci_complete { + __u64 target_mpidr; + __u32 psci_status; + __u32 padding[3]; +}; + +/* FIXME: Update nr (0xd2) when merging */ +#define KVM_ARM_VCPU_RMM_PSCI_COMPLETE _IOW(KVMIO, 0xd2, struct kvm_arm_rmm_psci_complete) + #endif /* __LINUX_KVM_H */ -- Gitee From 314abb7d9db3c81431037adcf089fa277f50f972 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 17 Nov 2021 11:23:26 +0000 Subject: [PATCH 11/52] arm64: RME: ioctls to create and configure realms community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Add the KVM_CAP_ARM_RME_CREATE_RD ioctl to create a realm. This involves delegating pages to the RMM to hold the Realm Descriptor (RD) and for the base level of the Realm Translation Tables (RTT). A VMID also need to be picked, since the RMM has a separate VMID address space a dedicated allocator is added for this purpose. KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm before it is created. Configuration options can be classified as: 1. Parameters specific to the Realm stage2 (e.g. IPA Size, vmid, stage2 entry level, entry level RTTs, number of RTTs in start level, LPA2) Most of these are not measured by RMM and comes from KVM book keeping. 2. Parameters controlling "Arm Architecture features for the VM". (e.g. SVE VL, PMU counters, number of HW BRPs/WPs), configured by the VMM using the "user ID register write" mechanism. These will be supported in the later patches. 3. Parameters are not part of the core Arm architecture but defined by the RMM spec (e.g. Hash algorithm for measurement, Personalisation value). These are programmed via KVM_CAP_ARM_RME_CONFIG_REALM. For the IPA size there is the possibility that the RMM supports a different size to the IPA size supported by KVM for normal guests. At the moment the 'normal limit' is exposed by KVM_CAP_ARM_VM_IPA_SIZE and the IPA size is configured by the bottom bits of vm_type in KVM_CREATE_VM. This means that it isn't easy for the VMM to discover what IPA sizes are supported for Realm guests. Since the IPA is part of the measurement of the realm guest the current expectation is that the VMM will be required to pick the IPA size demanded by attestation and therefore simply failing if this isn't available is fine. An option would be to expose a new capability ioctl to obtain the RMM's maximum IPA size if this is needed in the future. Co-developed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Signed-off-by: Steven Price Conflicts: arch/arm64/kvm/rme.c arch/arm64/kvm/mmu.c [zhuangyiwei: use kvm->arch.vtcr to access vtcr instead of kvm->arch.mmu.vtcr which is not changed in OLK-6.6] [zhuangyiwei: do not extract part of kvm_init_stage2_mmu to a new function kvm_init_ipa_range and directly add codes in kvm_init_stage2_mmu] Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_emulate.h | 5 + arch/arm64/include/asm/kvm_rme.h | 19 ++ arch/arm64/kvm/arm.c | 18 ++ arch/arm64/kvm/mmu.c | 17 ++ arch/arm64/kvm/rme.c | 296 +++++++++++++++++++++++++++ 5 files changed, 355 insertions(+) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index e5516a80f4420..dce57feeaae56 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -655,6 +655,11 @@ static inline enum realm_state kvm_realm_state(struct kvm *kvm) return READ_ONCE(kvm->arch.realm.state); } +static inline bool kvm_realm_is_created(struct kvm *kvm) +{ + return kvm_is_realm(kvm) && kvm_realm_state(kvm) != REALM_STATE_NONE; +} + static inline bool vcpu_is_rec(struct kvm_vcpu *vcpu) { return false; diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 69af5c3a1e44d..209cd99f03dd1 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -6,6 +6,8 @@ #ifndef __ASM_KVM_RME_H #define __ASM_KVM_RME_H +#include + /** * enum realm_state - State of a Realm */ @@ -46,11 +48,28 @@ enum realm_state { * struct realm - Additional per VM data for a Realm * * @state: The lifetime state machine for the realm + * @rd: Kernel mapping of the Realm Descriptor (RD) + * @params: Parameters for the RMI_REALM_CREATE command + * @num_aux: The number of auxiliary pages required by the RMM + * @vmid: VMID to be used by the RMM for the realm + * @ia_bits: Number of valid Input Address bits in the IPA */ struct realm { enum realm_state state; + + void *rd; + struct realm_params *params; + + unsigned long num_aux; + unsigned int vmid; + unsigned int ia_bits; }; void kvm_init_rme(void); +u32 kvm_realm_ipa_limit(void); + +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); +int kvm_init_realm_vm(struct kvm *kvm); +void kvm_destroy_realm(struct kvm *kvm); #endif diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 03236c4178383..aa3a89c794d5f 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -245,6 +245,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } mutex_unlock(&kvm->slots_lock); break; + case KVM_CAP_ARM_RME: + if (!kvm_is_realm(kvm)) + return -EINVAL; + mutex_lock(&kvm->lock); + r = kvm_realm_enable_cap(kvm, cap); + mutex_unlock(&kvm->lock); + break; #ifdef CONFIG_ARM64_HDBSS case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: r = kvm_cap_arm_enable_hdbss(kvm, cap); @@ -317,6 +324,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) bitmap_zero(kvm->arch.vcpu_features, KVM_VCPU_MAX_FEATURES); + /* Initialise the realm bits after the generic bits are enabled */ + if (kvm_is_realm(kvm)) { + ret = kvm_init_realm_vm(kvm); + if (ret) + goto err_free_cpumask; + } + return 0; err_free_cpumask: @@ -353,6 +367,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_unshare_hyp(kvm, kvm + 1); kvm_arm_teardown_hypercalls(kvm); + kvm_destroy_realm(kvm); } extern struct static_key_false ipiv_enable; @@ -462,6 +477,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES: r = kvm_supported_block_sizes(); break; + case KVM_CAP_ARM_RME: + r = static_key_enabled(&kvm_rme_is_available); + break; #ifdef CONFIG_VIRT_PLAT_DEV case KVM_CAP_ARM_VIRT_MSI_BYPASS: r = sdev_enable; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 429ba7ab0721c..39f1b64983367 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -873,6 +873,9 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t u64 mmfr0, mmfr1; u32 phys_shift; + if (kvm_is_realm(kvm)) + kvm_ipa_limit = kvm_realm_ipa_limit(); + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) return -EINVAL; @@ -1014,6 +1017,20 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) struct kvm_pgtable *pgt = NULL; write_lock(&kvm->mmu_lock); + if (kvm_is_realm(kvm) && + (kvm_realm_state(kvm) != REALM_STATE_DEAD && + kvm_realm_state(kvm) != REALM_STATE_NONE)) { + /* Tearing down RTTs will be added in a later patch */ + write_unlock(&kvm->mmu_lock); + + /* + * The physical PGD pages are delegated to the RMM, so cannot + * be freed at this point. This function will be called again + * from kvm_destroy_realm() after the physical pages have been + * returned at which point the memory can be freed. + */ + return; + } pgt = mmu->pgt; if (pgt) { mmu->pgd_phys = 0; diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index b88269b80c116..355bc1773c4f5 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -5,9 +5,20 @@ #include +#include +#include #include #include +#include + +static unsigned long rmm_feat_reg0; + +static bool rme_supports(unsigned long feature) +{ + return !!u64_get_bits(rmm_feat_reg0, feature); +} + static int rmi_check_version(void) { struct arm_smccc_res res; @@ -36,6 +47,285 @@ static int rmi_check_version(void) return 0; } +u32 kvm_realm_ipa_limit(void) +{ + return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ); +} + +static int get_start_level(struct realm *realm) +{ + return 4 - stage2_pgtable_levels(realm->ia_bits); +} + +static void free_delegated_granule(phys_addr_t phys) +{ + if (WARN_ON(rmi_granule_undelegate(phys))) { + /* Undelegate failed: leak the page */ + return; + } + + free_page((unsigned long)phys_to_virt(phys)); +} + +static int realm_create_rd(struct kvm *kvm) +{ + struct realm *realm = &kvm->arch.realm; + struct realm_params *params = realm->params; + void *rd = NULL; + phys_addr_t rd_phys, params_phys; + size_t pgd_size = kvm_pgtable_stage2_pgd_size(kvm->arch.vtcr); + int i, r; + + if (WARN_ON(realm->rd) || WARN_ON(!realm->params)) + return -EEXIST; + + rd = (void *)__get_free_page(GFP_KERNEL); + if (!rd) + return -ENOMEM; + + rd_phys = virt_to_phys(rd); + if (rmi_granule_delegate(rd_phys)) { + r = -ENXIO; + goto free_rd; + } + + for (i = 0; i < pgd_size; i += PAGE_SIZE) { + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i; + + if (rmi_granule_delegate(pgd_phys)) { + r = -ENXIO; + goto out_undelegate_tables; + } + } + + realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); + + params->s2sz = VTCR_EL2_IPA(kvm->arch.vtcr); + params->rtt_level_start = get_start_level(realm); + params->rtt_num_start = pgd_size / PAGE_SIZE; + params->rtt_base = kvm->arch.mmu.pgd_phys; + params->vmid = realm->vmid; + + params_phys = virt_to_phys(params); + + if (rmi_realm_create(rd_phys, params_phys)) { + r = -ENXIO; + goto out_undelegate_tables; + } + + if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) { + WARN_ON(rmi_realm_destroy(rd_phys)); + goto out_undelegate_tables; + } + + realm->rd = rd; + + return 0; + +out_undelegate_tables: + while (--i >= 0) { + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE; + + if (WARN_ON(rmi_granule_undelegate(pgd_phys))) { + /* Leak the pages if they cannot be returned */ + kvm->arch.mmu.pgt = NULL; + break; + } + } + if (WARN_ON(rmi_granule_undelegate(rd_phys))) { + /* Leak the page if it isn't returned */ + return r; + } +free_rd: + free_page((unsigned long)rd); + return r; +} + +/* Protects access to rme_vmid_bitmap */ +static DEFINE_SPINLOCK(rme_vmid_lock); +static unsigned long *rme_vmid_bitmap; + +static int rme_vmid_init(void) +{ + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); + + rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); + if (!rme_vmid_bitmap) { + kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__); + return -ENOMEM; + } + + return 0; +} + +static int rme_vmid_reserve(void) +{ + int ret; + unsigned int vmid_count = 1 << kvm_get_vmid_bits(); + + spin_lock(&rme_vmid_lock); + ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0); + spin_unlock(&rme_vmid_lock); + + return ret; +} + +static void rme_vmid_release(unsigned int vmid) +{ + spin_lock(&rme_vmid_lock); + bitmap_release_region(rme_vmid_bitmap, vmid, 0); + spin_unlock(&rme_vmid_lock); +} + +static int kvm_create_realm(struct kvm *kvm) +{ + struct realm *realm = &kvm->arch.realm; + int ret; + + if (!kvm_is_realm(kvm)) + return -EINVAL; + if (kvm_realm_is_created(kvm)) + return -EEXIST; + + ret = rme_vmid_reserve(); + if (ret < 0) + return ret; + realm->vmid = ret; + + ret = realm_create_rd(kvm); + if (ret) { + rme_vmid_release(realm->vmid); + return ret; + } + + WRITE_ONCE(realm->state, REALM_STATE_NEW); + + /* The realm is up, free the parameters. */ + free_page((unsigned long)realm->params); + realm->params = NULL; + + return 0; +} + +static int config_realm_hash_algo(struct realm *realm, + struct kvm_cap_arm_rme_config_item *cfg) +{ + switch (cfg->hash_algo) { + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256: + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256)) + return -EINVAL; + break; + case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512: + if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512)) + return -EINVAL; + break; + default: + return -EINVAL; + } + realm->params->hash_algo = cfg->hash_algo; + return 0; +} + +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + struct kvm_cap_arm_rme_config_item cfg; + struct realm *realm = &kvm->arch.realm; + int r = 0; + + if (kvm_realm_is_created(kvm)) + return -EBUSY; + + if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) + return -EFAULT; + + switch (cfg.cfg) { + case KVM_CAP_ARM_RME_CFG_RPV: + memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv)); + break; + case KVM_CAP_ARM_RME_CFG_HASH_ALGO: + r = config_realm_hash_algo(realm, &cfg); + break; + default: + r = -EINVAL; + } + + return r; +} + +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + int r = 0; + + if (!kvm_is_realm(kvm)) + return -EINVAL; + + switch (cap->args[0]) { + case KVM_CAP_ARM_RME_CONFIG_REALM: + r = kvm_rme_config_realm(kvm, cap); + break; + case KVM_CAP_ARM_RME_CREATE_RD: + r = kvm_create_realm(kvm); + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +void kvm_destroy_realm(struct kvm *kvm) +{ + struct realm *realm = &kvm->arch.realm; + size_t pgd_size = kvm_pgtable_stage2_pgd_size(kvm->arch.vtcr); + int i; + + if (realm->params) { + free_page((unsigned long)realm->params); + realm->params = NULL; + } + + if (!kvm_realm_is_created(kvm)) + return; + + WRITE_ONCE(realm->state, REALM_STATE_DYING); + + if (realm->rd) { + phys_addr_t rd_phys = virt_to_phys(realm->rd); + + if (WARN_ON(rmi_realm_destroy(rd_phys))) + return; + free_delegated_granule(rd_phys); + realm->rd = NULL; + } + + rme_vmid_release(realm->vmid); + + for (i = 0; i < pgd_size; i += PAGE_SIZE) { + phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i; + + if (WARN_ON(rmi_granule_undelegate(pgd_phys))) + return; + } + + WRITE_ONCE(realm->state, REALM_STATE_DEAD); + + /* Now that the Realm is destroyed, free the entry level RTTs */ + kvm_free_stage2_pgd(&kvm->arch.mmu); +} + +int kvm_init_realm_vm(struct kvm *kvm) +{ + struct realm_params *params; + + params = (struct realm_params *)get_zeroed_page(GFP_KERNEL); + if (!params) + return -ENOMEM; + + kvm->arch.realm.params = params; + return 0; +} + void kvm_init_rme(void) { if (PAGE_SIZE != SZ_4K) @@ -46,5 +336,11 @@ void kvm_init_rme(void) /* Continue without realm support */ return; + if (WARN_ON(rmi_features(0, &rmm_feat_reg0))) + return; + + if (rme_vmid_init()) + return; + /* Future patch will enable static branch kvm_rme_is_available */ } -- Gitee From ac127d00889a351ba5ba6e166562916a06c1a151 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 3 Aug 2023 11:20:27 +0100 Subject: [PATCH 12/52] kvm: arm64: Expose debug HW register numbers for Realm community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Expose VM specific Debug HW register numbers. Signed-off-by: Suzuki K Poulose Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/arm.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index aa3a89c794d5f..ed3ac121d3290 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -77,6 +77,22 @@ bool is_kvm_arm_initialised(void) return kvm_arm_initialised; } +static u32 kvm_arm_get_num_brps(struct kvm *kvm) +{ + if (!kvm_is_realm(kvm)) + return get_num_brps(); + /* Realm guest is not debuggable. */ + return 0; +} + +static u32 kvm_arm_get_num_wrps(struct kvm *kvm) +{ + if (!kvm_is_realm(kvm)) + return get_num_wrps(); + /* Realm guest is not debuggable. */ + return 0; +} + #ifdef CONFIG_ARM64_TWED bool twed_enable; module_param(twed_enable, bool, 0644); @@ -394,7 +410,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: case KVM_CAP_ARM_NISV_TO_USER: case KVM_CAP_ARM_INJECT_EXT_DABT: - case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: case KVM_CAP_ARM_SYSTEM_SUSPEND: @@ -402,6 +417,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_COUNTER_OFFSET: r = 1; break; + case KVM_CAP_SET_GUEST_DEBUG: + r = !kvm_is_realm(kvm); + break; case KVM_CAP_SET_GUEST_DEBUG2: return KVM_GUESTDBG_VALID_MASK; case KVM_CAP_ARM_SET_DEVICE_ADDR: @@ -447,10 +465,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); break; case KVM_CAP_GUEST_DEBUG_HW_BPS: - r = get_num_brps(); + r = kvm_arm_get_num_brps(kvm); break; case KVM_CAP_GUEST_DEBUG_HW_WPS: - r = get_num_wrps(); + r = kvm_arm_get_num_wrps(kvm); break; case KVM_CAP_ARM_PMU_V3: r = kvm_arm_support_pmu_v3(); -- Gitee From 55d9ca1c9aee1fbebe088c1193eab283b5efc501 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 6 Oct 2021 11:47:49 +0100 Subject: [PATCH 13/52] arm64: kvm: Allow passing machine type in KVM creation community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Previously machine type was used purely for specifying the physical address size of the guest. Reserve the higher bits to specify an ARM specific machine type and declare a new type 'KVM_VM_TYPE_ARM_REALM' used to create a realm guest. Reviewed-by: Suzuki K Poulose Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/arm.c | 17 +++++++++++++++++ arch/arm64/kvm/mmu.c | 3 --- include/uapi/linux/kvm.h | 19 +++++++++++++++---- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ed3ac121d3290..dacea66bc6731 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -308,6 +308,23 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_unlock(&kvm->lock); #endif + if (type & ~(KVM_VM_TYPE_ARM_MASK | KVM_VM_TYPE_ARM_IPA_SIZE_MASK)) + return -EINVAL; + + switch (type & KVM_VM_TYPE_ARM_MASK) { + case KVM_VM_TYPE_ARM_NORMAL: + break; + case KVM_VM_TYPE_ARM_REALM: + kvm->arch.is_realm = true; + if (!kvm_is_realm(kvm)) { + /* Realm support unavailable */ + return -EINVAL; + } + break; + default: + return -EINVAL; + } + ret = kvm_share_hyp(kvm, kvm + 1); if (ret) return ret; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 39f1b64983367..c1783ad0b74f2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -876,9 +876,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t if (kvm_is_realm(kvm)) kvm_ipa_limit = kvm_realm_ipa_limit(); - if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) - return -EINVAL; - phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); if (is_protected_kvm_enabled()) { phys_shift = kvm_ipa_limit; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0d3043f014389..1229996569eb8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -919,14 +919,25 @@ struct kvm_ppc_resize_hpt { #define KVM_S390_SIE_PAGE_OFFSET 1 /* - * On arm64, machine type can be used to request the physical - * address size for the VM. Bits[7-0] are reserved for the guest - * PA size shift (i.e, log2(PA_Size)). For backward compatibility, - * value 0 implies the default IPA size, 40bits. + * On arm64, machine type can be used to request both the machine type and + * the physical address size for the VM. + * + * Bits[11-8] are reserved for the ARM specific machine type. + * + * Bits[7-0] are reserved for the guest PA size shift (i.e, log2(PA_Size)). + * For backward compatibility, value 0 implies the default IPA size, 40bits. */ +#define KVM_VM_TYPE_ARM_SHIFT 8 +#define KVM_VM_TYPE_ARM_MASK (0xfULL << KVM_VM_TYPE_ARM_SHIFT) +#define KVM_VM_TYPE_ARM(_type) \ + (((_type) << KVM_VM_TYPE_ARM_SHIFT) & KVM_VM_TYPE_ARM_MASK) +#define KVM_VM_TYPE_ARM_NORMAL KVM_VM_TYPE_ARM(0) +#define KVM_VM_TYPE_ARM_REALM KVM_VM_TYPE_ARM(1) + #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL #define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) + /* * ioctls for /dev/kvm fds: */ -- Gitee From 0939d6a11e9fc0719fd72c95b0cec07f19ee7e79 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 13 Oct 2021 16:11:19 +0100 Subject: [PATCH 14/52] arm64: RME: RTT tear down community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The RMM owns the stage 2 page tables for a realm, and KVM must request that the RMM creates/destroys entries as necessary. The physical pages to store the page tables are delegated to the realm as required, and can be undelegated when no longer used. Creating new RTTs is the easy part, tearing down is a little more tricky. The result of realm_rtt_destroy() can be used to effectively walk the tree and destroy the entries (undelegating pages that were given to the realm). Signed-off-by: Steven Price Reviewed-by: Suzuki K Poulose Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_rme.h | 19 ++++++ arch/arm64/kvm/mmu.c | 6 +- arch/arm64/kvm/rme.c | 112 +++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 209cd99f03dd1..32bdedf1d8667 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -71,5 +71,24 @@ u32 kvm_realm_ipa_limit(void); int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); int kvm_init_realm_vm(struct kvm *kvm); void kvm_destroy_realm(struct kvm *kvm); +void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits); + +#define RMM_RTT_BLOCK_LEVEL 2 +#define RMM_RTT_MAX_LEVEL 3 + +#define RMM_PAGE_SHIFT 12 +#define RMM_PAGE_SIZE BIT(RMM_PAGE_SHIFT) +/* See ARM64_HW_PGTABLE_LEVEL_SHIFT() */ +#define RMM_RTT_LEVEL_SHIFT(l) \ + ((RMM_PAGE_SHIFT - 3) * (4 - (l)) + 3) +#define RMM_L2_BLOCK_SIZE BIT(RMM_RTT_LEVEL_SHIFT(2)) + +static inline unsigned long rme_rtt_level_mapsize(int level) +{ + if (WARN_ON(level > RMM_RTT_MAX_LEVEL)) + return RMM_PAGE_SIZE; + + return (1UL << RMM_RTT_LEVEL_SHIFT(level)); +} #endif diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c1783ad0b74f2..5e0f9f3bcf6fd 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1011,14 +1011,15 @@ void stage2_unmap_vm(struct kvm *kvm) void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) { struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); - struct kvm_pgtable *pgt = NULL; + struct kvm_pgtable *pgt; write_lock(&kvm->mmu_lock); + pgt = mmu->pgt; if (kvm_is_realm(kvm) && (kvm_realm_state(kvm) != REALM_STATE_DEAD && kvm_realm_state(kvm) != REALM_STATE_NONE)) { - /* Tearing down RTTs will be added in a later patch */ write_unlock(&kvm->mmu_lock); + kvm_realm_destroy_rtts(kvm, pgt->ia_bits); /* * The physical PGD pages are delegated to the RMM, so cannot @@ -1028,7 +1029,6 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) */ return; } - pgt = mmu->pgt; if (pgt) { mmu->pgd_phys = 0; mmu->pgt = NULL; diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 355bc1773c4f5..567f6b57f66fb 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -141,6 +141,118 @@ static int realm_create_rd(struct kvm *kvm) return r; } +static int realm_rtt_destroy(struct realm *realm, unsigned long addr, + int level, phys_addr_t *rtt_granule, + unsigned long *next_addr) +{ + unsigned long out_rtt; + int ret; + + ret = rmi_rtt_destroy(virt_to_phys(realm->rd), addr, level, + &out_rtt, next_addr); + + *rtt_granule = out_rtt; + + return ret; +} + +static int realm_tear_down_rtt_level(struct realm *realm, int level, + unsigned long start, unsigned long end) +{ + ssize_t map_size; + unsigned long addr, next_addr; + + if (WARN_ON(level > RMM_RTT_MAX_LEVEL)) + return -EINVAL; + + map_size = rme_rtt_level_mapsize(level - 1); + + for (addr = start; addr < end; addr = next_addr) { + phys_addr_t rtt_granule; + int ret; + unsigned long align_addr = ALIGN(addr, map_size); + + next_addr = ALIGN(addr + 1, map_size); + + if (next_addr > end || align_addr != addr) { + /* + * The target range is smaller than what this level + * covers, recurse deeper. + */ + ret = realm_tear_down_rtt_level(realm, + level + 1, + addr, + min(next_addr, end)); + if (ret) + return ret; + continue; + } + + ret = realm_rtt_destroy(realm, addr, level, + &rtt_granule, &next_addr); + + switch (RMI_RETURN_STATUS(ret)) { + case RMI_SUCCESS: + free_delegated_granule(rtt_granule); + break; + case RMI_ERROR_RTT: + if (next_addr > addr) { + /* Missing RTT, skip */ + break; + } + /* + * We tear down the RTT range for the full IPA + * space, after everything is unmapped. Also we + * descend down only if we cannot tear down a + * top level RTT. Thus RMM must be able to walk + * to the requested level. e.g., a block mapping + * exists at L1 or L2. + */ + if (WARN_ON(RMI_RETURN_INDEX(ret) != level)) + return -EBUSY; + if (WARN_ON(level == RMM_RTT_MAX_LEVEL)) + return -EBUSY; + + /* + * The table has active entries in it, recurse deeper + * and tear down the RTTs. + */ + next_addr = ALIGN(addr + 1, map_size); + ret = realm_tear_down_rtt_level(realm, + level + 1, + addr, + next_addr); + if (ret) + return ret; + /* + * Now that the child RTTs are destroyed, + * retry at this level. + */ + next_addr = addr; + break; + default: + WARN_ON(1); + return -ENXIO; + } + } + + return 0; +} + +static int realm_tear_down_rtt_range(struct realm *realm, + unsigned long start, unsigned long end) +{ + return realm_tear_down_rtt_level(realm, get_start_level(realm) + 1, + start, end); +} + +void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits) +{ + struct realm *realm = &kvm->arch.realm; + + WARN_ON(realm_tear_down_rtt_range(realm, 0, (1UL << ia_bits))); +} + /* Protects access to rme_vmid_bitmap */ static DEFINE_SPINLOCK(rme_vmid_lock); static unsigned long *rme_vmid_bitmap; -- Gitee From 6c8dd0f56f05568b22afe4a2478beb906deaa96a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 20 Sep 2023 19:50:29 +0000 Subject: [PATCH 15/52] KVM: arm64: Add generic check for system-supported vCPU features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.7-rc1 commit ef150908b6bd80a54126dbec324bd63a24a5628a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/r/20230920195036.1169791-2-oliver.upton@linux.dev -------------------------------- [ Upstream commit ef150908b6bd80a54126dbec324bd63a24a5628a ] To date KVM has relied on kvm_reset_vcpu() failing when the vCPU feature flags are unsupported by the system. This is a bit messy since kvm_reset_vcpu() is called at runtime outside of the KVM_ARM_VCPU_INIT ioctl when it is expected to succeed. Further complicating the matter is that kvm_reset_vcpu() must tolerate be idemptotent to the config_lock, as it isn't consistently called with the lock held. Prepare to move feature compatibility checks out of kvm_reset_vcpu() with a 'generic' check that compares the user-provided flags with a computed maximum feature set for the system. Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20230920195036.1169791-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/arm.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index dacea66bc6731..c182914f1ee38 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1456,6 +1456,16 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return -EINVAL; } +static unsigned long system_supported_vcpu_features(void) +{ + unsigned long features = KVM_VCPU_VALID_FEATURES; + + if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1)) + clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features); + + return features; +} + static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init) { @@ -1470,12 +1480,12 @@ static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, return -ENOENT; } + if (features & ~system_supported_vcpu_features()) + return -EINVAL; + if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features)) return 0; - if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) - return -EINVAL; - /* MTE is incompatible with AArch32 */ if (kvm_has_mte(vcpu->kvm)) return -EINVAL; -- Gitee From 4bff3390eac97a94d510812a895523872e5e7f36 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 25 May 2022 15:46:50 +0100 Subject: [PATCH 16/52] arm64: RME: Allocate/free RECs to match vCPUs community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The RMM maintains a data structure known as the Realm Execution Context (or REC). It is similar to struct kvm_vcpu and tracks the state of the virtual CPUs. KVM must delegate memory and request the structures are created when vCPUs are created, and suitably tear down on destruction. RECs must also be supplied with addition pages - auxiliary (or AUX) granules - for storing the larger registers state (e.g. for SVE). The number of AUX granules for a REC depends on the parameters with which the Realm was created - the RMM makes this information available via the RMI_REC_AUX_COUNT call performed after creating the Realm Descriptor (RD). Note that only some of register state for the REC can be set by KVM, the rest is defined by the RMM (zeroed). The register state then cannot be changed by KVM after the REC is created (except when the guest explicitly requests this e.g. by performing a PSCI call). The RMM also requires that the VMM creates RECs in ascending order of the MPIDR. See Realm Management Monitor specification (DEN0137) for more information: https://developer.arm.com/documentation/den0137/ Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_emulate.h | 7 ++ arch/arm64/include/asm/kvm_host.h | 2 + arch/arm64/include/asm/kvm_rme.h | 18 ++++ arch/arm64/kvm/arm.c | 9 ++ arch/arm64/kvm/reset.c | 11 ++ arch/arm64/kvm/rme.c | 144 +++++++++++++++++++++++++++ 6 files changed, 191 insertions(+) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index dce57feeaae56..a01acea63f999 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -662,7 +662,14 @@ static inline bool kvm_realm_is_created(struct kvm *kvm) static inline bool vcpu_is_rec(struct kvm_vcpu *vcpu) { + if (static_branch_unlikely(&kvm_rme_is_available)) + return vcpu_has_feature(vcpu, KVM_ARM_VCPU_REC); return false; } +static inline bool kvm_arm_vcpu_rec_finalized(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.rec.mpidr != INVALID_HWID; +} + #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d172503516d86..6b252c9c92860 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -635,6 +635,8 @@ struct kvm_vcpu_arch { /* Per-vcpu CCSIDR override or NULL */ u32 *ccsidr; + /* Realm meta data */ + struct realm_rec rec; #ifdef CONFIG_KVM_HISI_VIRT /* pCPUs this vCPU can be scheduled on. Pure copy of current->cpus_ptr */ cpumask_var_t sched_cpus; diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 32bdedf1d8667..62d4a63d3035f 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -6,6 +6,7 @@ #ifndef __ASM_KVM_RME_H #define __ASM_KVM_RME_H +#include #include /** @@ -65,6 +66,21 @@ struct realm { unsigned int ia_bits; }; +/** + * struct realm_rec - Additional per VCPU data for a Realm + * + * @mpidr: MPIDR (Multiprocessor Affinity Register) value to identify this VCPU + * @rec_page: Kernel VA of the RMM's private page for this REC + * @aux_pages: Additional pages private to the RMM for this REC + * @run: Kernel VA of the RmiRecRun structure shared with the RMM + */ +struct realm_rec { + unsigned long mpidr; + void *rec_page; + struct page *aux_pages[REC_PARAMS_AUX_GRANULES]; + struct rec_run *run; +}; + void kvm_init_rme(void); u32 kvm_realm_ipa_limit(void); @@ -72,6 +88,8 @@ int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); int kvm_init_realm_vm(struct kvm *kvm); void kvm_destroy_realm(struct kvm *kvm); void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits); +int kvm_create_rec(struct kvm_vcpu *vcpu); +void kvm_destroy_rec(struct kvm_vcpu *vcpu); #define RMM_RTT_BLOCK_LEVEL 2 #define RMM_RTT_MAX_LEVEL 3 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c182914f1ee38..78511e545f00b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -583,6 +583,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu_clear_flag(vcpu, VCPU_INITIALIZED); bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + vcpu->arch.rec.mpidr = INVALID_HWID; + vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; /* Set up the timer */ @@ -1463,6 +1465,9 @@ static unsigned long system_supported_vcpu_features(void) if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1)) clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features); + if (!static_branch_unlikely(&kvm_rme_is_available)) + clear_bit(KVM_ARM_VCPU_REC, &features); + return features; } @@ -1494,6 +1499,10 @@ static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, if (test_bit(KVM_ARM_VCPU_HAS_EL2, &features)) return -EINVAL; + /* RME is incompatible with AArch32 */ + if (test_bit(KVM_ARM_VCPU_REC, &features)) + return -EINVAL; + return 0; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index a1830e6393852..f27636690a216 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -139,6 +139,11 @@ int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature) return -EPERM; return kvm_vcpu_finalize_sve(vcpu); + case KVM_ARM_VCPU_REC: + if (!kvm_is_realm(vcpu->kvm) || !vcpu_is_rec(vcpu)) + return -EINVAL; + + return kvm_create_rec(vcpu); } return -EINVAL; @@ -149,6 +154,11 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) if (vcpu_has_sve(vcpu) && !kvm_arm_vcpu_sve_finalized(vcpu)) return false; + if (kvm_is_realm(vcpu->kvm) && + !(vcpu_is_rec(vcpu) && kvm_arm_vcpu_rec_finalized(vcpu) && + READ_ONCE(vcpu->kvm->arch.realm.state) == REALM_STATE_ACTIVE)) + return false; + return true; } @@ -163,6 +173,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); kfree(sve_state); kfree(vcpu->arch.ccsidr); + kvm_destroy_rec(vcpu); #ifdef CONFIG_ARM64_HDBSS if (vcpu->arch.hdbss.br_el2) { diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 567f6b57f66fb..36ad7e5dfa98f 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -426,6 +426,150 @@ void kvm_destroy_realm(struct kvm *kvm) kvm_free_stage2_pgd(&kvm->arch.mmu); } +static void free_rec_aux(struct page **aux_pages, + unsigned int num_aux) +{ + unsigned int i; + + for (i = 0; i < num_aux; i++) + free_delegated_granule(page_to_phys(aux_pages[i])); +} + +static int alloc_rec_aux(struct page **aux_pages, + u64 *aux_phys_pages, + unsigned int num_aux) +{ + int ret; + unsigned int i; + + for (i = 0; i < num_aux; i++) { + struct page *aux_page; + phys_addr_t aux_page_phys; + + aux_page = alloc_page(GFP_KERNEL); + if (!aux_page) { + ret = -ENOMEM; + goto out_err; + } + aux_page_phys = page_to_phys(aux_page); + if (rmi_granule_delegate(aux_page_phys)) { + __free_page(aux_page); + ret = -ENXIO; + goto out_err; + } + aux_pages[i] = aux_page; + aux_phys_pages[i] = aux_page_phys; + } + + return 0; +out_err: + free_rec_aux(aux_pages, i); + return ret; +} + +int kvm_create_rec(struct kvm_vcpu *vcpu) +{ + struct user_pt_regs *vcpu_regs = vcpu_gp_regs(vcpu); + unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + struct realm *realm = &vcpu->kvm->arch.realm; + struct realm_rec *rec = &vcpu->arch.rec; + unsigned long rec_page_phys; + struct rec_params *params; + int r, i; + + if (kvm_realm_state(vcpu->kvm) != REALM_STATE_NEW) + return -ENOENT; + + /* + * The RMM will report PSCI v1.0 to Realms and the KVM_ARM_VCPU_PSCI_0_2 + * flag covers v0.2 and onwards. + */ + if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2)) + return -EINVAL; + + BUILD_BUG_ON(sizeof(*params) > PAGE_SIZE); + BUILD_BUG_ON(sizeof(*rec->run) > PAGE_SIZE); + + params = (struct rec_params *)get_zeroed_page(GFP_KERNEL); + rec->rec_page = (void *)__get_free_page(GFP_KERNEL); + rec->run = (void *)get_zeroed_page(GFP_KERNEL); + if (!params || !rec->rec_page || !rec->run) { + r = -ENOMEM; + goto out_free_pages; + } + + for (i = 0; i < ARRAY_SIZE(params->gprs); i++) + params->gprs[i] = vcpu_regs->regs[i]; + + params->pc = vcpu_regs->pc; + + if (vcpu->vcpu_id == 0) + params->flags |= REC_PARAMS_FLAG_RUNNABLE; + + rec_page_phys = virt_to_phys(rec->rec_page); + + if (rmi_granule_delegate(rec_page_phys)) { + r = -ENXIO; + goto out_free_pages; + } + + r = alloc_rec_aux(rec->aux_pages, params->aux, realm->num_aux); + if (r) + goto out_undelegate_rmm_rec; + + params->num_rec_aux = realm->num_aux; + params->mpidr = mpidr; + + if (rmi_rec_create(virt_to_phys(realm->rd), + rec_page_phys, + virt_to_phys(params))) { + r = -ENXIO; + goto out_free_rec_aux; + } + + rec->mpidr = mpidr; + + free_page((unsigned long)params); + return 0; + +out_free_rec_aux: + free_rec_aux(rec->aux_pages, realm->num_aux); +out_undelegate_rmm_rec: + if (WARN_ON(rmi_granule_undelegate(rec_page_phys))) + rec->rec_page = NULL; +out_free_pages: + free_page((unsigned long)rec->run); + free_page((unsigned long)rec->rec_page); + free_page((unsigned long)params); + return r; +} + +void kvm_destroy_rec(struct kvm_vcpu *vcpu) +{ + struct realm *realm = &vcpu->kvm->arch.realm; + struct realm_rec *rec = &vcpu->arch.rec; + unsigned long rec_page_phys; + + if (!vcpu_is_rec(vcpu)) + return; + + free_page((unsigned long)rec->run); + + rec_page_phys = virt_to_phys(rec->rec_page); + + /* + * The REC and any AUX pages cannot be reclaimed until the REC is + * destroyed. So if the REC destroy fails then the REC page and any AUX + * pages will be leaked. + */ + if (WARN_ON(rmi_rec_destroy(rec_page_phys))) + return; + + free_rec_aux(rec->aux_pages, realm->num_aux); + + free_delegated_granule(rec_page_phys); +} + int kvm_init_realm_vm(struct kvm *kvm) { struct realm_params *params; -- Gitee From 5f0a22e0abb7b3987d9bdad20b9468d949ccab05 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 28 Nov 2024 15:48:22 +0000 Subject: [PATCH 17/52] KVM: arm64: vgic: Provide helper for number of list registers community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Currently the number of list registers available is stored in a global (kvm_vgic_global_state.nr_lr). With Arm CCA the RMM is permitted to reserve list registers for its own use and so the number of available list registers can be fewer for a realm VM. Provide a wrapper function to fetch the global in preparation for restricting nr_lr when dealing with a realm VM. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/vgic/vgic.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 2459b0adea086..07f7e323b0dd0 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -21,6 +21,11 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { .gicv3_cpuif = STATIC_KEY_FALSE_INIT, }; +static inline int kvm_vcpu_vgic_nr_lr(struct kvm_vcpu *vcpu) +{ + return kvm_vgic_global_state.nr_lr; +} + /* * Locking order is always: * kvm->lock (mutex) @@ -840,7 +845,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) lockdep_assert_held(&vgic_cpu->ap_list_lock); count = compute_ap_list_depth(vcpu, &multi_sgi); - if (count > kvm_vgic_global_state.nr_lr || multi_sgi) + if (count > kvm_vcpu_vgic_nr_lr(vcpu) || multi_sgi) vgic_sort_ap_list(vcpu); count = 0; @@ -869,7 +874,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) raw_spin_unlock(&irq->irq_lock); - if (count == kvm_vgic_global_state.nr_lr) { + if (count == kvm_vcpu_vgic_nr_lr(vcpu)) { if (!list_is_last(&irq->ap_list, &vgic_cpu->ap_list_head)) vgic_set_underflow(vcpu); @@ -878,7 +883,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) } /* Nuke remaining LRs */ - for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) + for (i = count ; i < kvm_vcpu_vgic_nr_lr(vcpu); i++) vgic_clear_lr(vcpu, i); if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) -- Gitee From 1759766305aaca6f17642e9edb6e9ae911593c9a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Aug 2024 16:25:09 +0100 Subject: [PATCH 18/52] KVM: arm64: Force GICv3 trap activation when no irqchip is configured on VHE mainline inclusion from mainline-v6.12-rc1 commit 8d917e0a8651377321c06513f42e2ab9a86161f4 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/r/20240827152517.3909653-4-maz@kernel.org -------------------------------- [ Upstream commit 8d917e0a8651377321c06513f42e2ab9a86161f4 ] On a VHE system, no GICv3 traps get configured when no irqchip is present. This is not quite matching the "no GICv3" semantics that we want to present. Force such traps to be configured in this case. Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240827152517.3909653-4-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/vgic/vgic.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 07f7e323b0dd0..c16b8ead81732 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -976,10 +976,13 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_load(struct kvm_vcpu *vcpu) { - if (unlikely(!vgic_initialized(vcpu->kvm))) + if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) { + if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); return; + } - if (kvm_vgic_global_state.type == VGIC_V2) + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_load(vcpu); else vgic_v3_load(vcpu); @@ -987,10 +990,13 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu) void kvm_vgic_put(struct kvm_vcpu *vcpu) { - if (unlikely(!vgic_initialized(vcpu->kvm))) + if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) { + if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); return; + } - if (kvm_vgic_global_state.type == VGIC_V2) + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_put(vcpu); else vgic_v3_put(vcpu); -- Gitee From 8fa7ceb4fd6ed5dfa85786a8b2058b4e635ba617 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 25 May 2022 11:58:47 +0100 Subject: [PATCH 19/52] arm64: RME: Support for the VGIC in realms community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The RMM provides emulation of a VGIC to the realm guest but delegates much of the handling to the host. Implement support in KVM for saving/restoring state to/from the REC structure. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_rme.h | 1 + arch/arm64/kvm/arm.c | 16 +++++++++--- arch/arm64/kvm/rme.c | 5 ++++ arch/arm64/kvm/vgic/vgic-init.c | 2 +- arch/arm64/kvm/vgic/vgic-v3.c | 5 ++++ arch/arm64/kvm/vgic/vgic.c | 43 ++++++++++++++++++++++++++++++-- 6 files changed, 66 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 62d4a63d3035f..be64b749fcacb 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -83,6 +83,7 @@ struct realm_rec { void kvm_init_rme(void); u32 kvm_realm_ipa_limit(void); +u32 kvm_realm_vgic_nr_lr(void); int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); int kvm_init_realm_vm(struct kvm *kvm); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 78511e545f00b..6bce4724bdd67 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -708,17 +708,22 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_timer_vcpu_put(vcpu); + kvm_vgic_put(vcpu); + + vcpu->cpu = -1; + + if (vcpu_is_rec(vcpu)) + return; + kvm_arch_vcpu_put_debug_state_flags(vcpu); kvm_arch_vcpu_put_fp(vcpu); if (has_vhe()) kvm_vcpu_put_sysregs_vhe(vcpu); - kvm_timer_vcpu_put(vcpu); - kvm_vgic_put(vcpu); kvm_vcpu_pmu_restore_host(vcpu); kvm_arm_vmid_clear_active(); vcpu_clear_on_unsupported_cpu(vcpu); - vcpu->cpu = -1; kvm_tlbi_dvmbm_vcpu_put(vcpu); @@ -885,6 +890,11 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (kvm_vm_is_protected(kvm)) kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); + if (!irqchip_in_kernel(kvm) && kvm_is_realm(vcpu->kvm)) { + /* Userspace irqchip not yet supported with Realms */ + return -EOPNOTSUPP; + } + mutex_lock(&kvm->arch.config_lock); set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags); mutex_unlock(&kvm->arch.config_lock); diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 36ad7e5dfa98f..ef40be4ead73a 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -52,6 +52,11 @@ u32 kvm_realm_ipa_limit(void) return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ); } +u32 kvm_realm_vgic_nr_lr(void) +{ + return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_GICV3_NUM_LRS); +} + static int get_start_level(struct realm *realm) { return 4 - stage2_pgtable_levels(realm->ia_bits); diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 5579ca6b1d809..8b8dfd6e6a2b6 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -98,7 +98,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) * the proper checks already. */ if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && - !kvm_vgic_global_state.can_emulate_gicv2) + (!kvm_vgic_global_state.can_emulate_gicv2 || kvm_is_realm(kvm))) return -ENODEV; /* Must be held to avoid race with vCPU creation */ diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 69ca111e349d9..b45416bdbb656 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -7,9 +7,11 @@ #include #include #include +#include #include #include #include +#include #include "vgic.h" @@ -769,6 +771,9 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + if (vcpu_is_rec(vcpu)) + cpu_if->vgic_vmcr = vcpu->arch.rec.run->exit.gicv3_vmcr; + WARN_ON(vgic_v4_put(vcpu)); vgic_v3_vmcr_sync(vcpu); diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index c16b8ead81732..eee76ab9203e5 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -10,7 +10,9 @@ #include #include +#include #include +#include #include "vgic.h" @@ -23,6 +25,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { static inline int kvm_vcpu_vgic_nr_lr(struct kvm_vcpu *vcpu) { + if (unlikely(vcpu_is_rec(vcpu))) + return kvm_realm_vgic_nr_lr(); return kvm_vgic_global_state.nr_lr; } @@ -902,10 +906,23 @@ static inline bool can_access_vgic_from_kernel(void) return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); } +static inline void vgic_rmm_save_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + int i; + + for (i = 0; i < kvm_vcpu_vgic_nr_lr(vcpu); i++) { + cpu_if->vgic_lr[i] = vcpu->arch.rec.run->exit.gicv3_lrs[i]; + vcpu->arch.rec.run->enter.gicv3_lrs[i] = 0; + } +} + static inline void vgic_save_state(struct kvm_vcpu *vcpu) { if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_save_state(vcpu); + else if (vcpu_is_rec(vcpu)) + vgic_rmm_save_state(vcpu); else __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); } @@ -932,10 +949,28 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) vgic_prune_ap_list(vcpu); } +static inline void vgic_rmm_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + int i; + + for (i = 0; i < kvm_vcpu_vgic_nr_lr(vcpu); i++) { + vcpu->arch.rec.run->enter.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + /* + * Also populate the rec.run->exit copies so that a late + * decision to back out from entering the realm doesn't cause + * the state to be lost + */ + vcpu->arch.rec.run->exit.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + } +} + static inline void vgic_restore_state(struct kvm_vcpu *vcpu) { if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_restore_state(vcpu); + else if (vcpu_is_rec(vcpu)) + vgic_rmm_restore_state(vcpu); else __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); } @@ -976,7 +1011,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_load(struct kvm_vcpu *vcpu) { - if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) { + if (unlikely(!irqchip_in_kernel(vcpu->kvm) || + !vgic_initialized(vcpu->kvm) || + vcpu_is_rec(vcpu))) { if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); return; @@ -990,7 +1027,9 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu) void kvm_vgic_put(struct kvm_vcpu *vcpu) { - if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) { + if (unlikely(!irqchip_in_kernel(vcpu->kvm) || + !vgic_initialized(vcpu->kvm) || + vcpu_is_rec(vcpu))) { if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); return; -- Gitee From 83f870e75d521069911a94f54eaebd295cd8373c Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 26 May 2022 10:44:27 +0100 Subject: [PATCH 20/52] KVM: arm64: Support timers in realm RECs community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The RMM keeps track of the timer while the realm REC is running, but on exit to the normal world KVM is responsible for handling the timers. A later patch adds the support for propagating the timer values from the exit data structure and calling kvm_realm_timers_update(). Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/arch_timer.c | 44 +++++++++++++++++++++++++++++++++--- include/kvm/arm_arch_timer.h | 2 ++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 27032290094d6..0605ac4adfb83 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -243,6 +243,13 @@ static inline void cvm_vcpu_put_timer_callback(struct kvm_vcpu *vcpu) static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) { + struct kvm_vcpu *vcpu = ctxt->vcpu; + + if (kvm_is_realm(vcpu->kvm)) { + WARN_ON(offset); + return; + } + #ifdef CONFIG_HISI_VIRTCCA_HOST if (kvm_is_virtcca_cvm(ctxt->vcpu->kvm)) return; @@ -545,6 +552,21 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, } } +void kvm_realm_timers_update(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *arch_timer = &vcpu->arch.timer_cpu; + int i; + + for (i = 0; i < NR_KVM_EL0_TIMERS; i++) { + struct arch_timer_context *timer = &arch_timer->timers[i]; + bool status = timer_get_ctl(timer) & ARCH_TIMER_CTRL_IT_STAT; + bool level = kvm_timer_irq_can_fire(timer) && status; + + if (level != timer->irq.level) + kvm_timer_update_irq(vcpu, level, timer); + } +} + /* Only called for a fully emulated timer */ static void timer_emulate(struct arch_timer_context *ctx) { @@ -960,6 +982,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) if (unlikely(!timer->enabled)) return; + kvm_timer_unblocking(vcpu); + get_timer_map(vcpu, &map); #ifdef CONFIG_VIRT_VTIMER_IRQ_BYPASS @@ -988,7 +1012,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) if (static_branch_likely(&has_gic_active_state) && map.direct_ptimer) kvm_timer_vcpu_load_gic(map.direct_ptimer); #endif - kvm_timer_unblocking(vcpu); timer_restore_state(map.direct_vtimer); @@ -1201,7 +1224,9 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) ctxt->vcpu = vcpu; - if (timerid == TIMER_VTIMER) + if (kvm_is_realm(vcpu->kvm)) + ctxt->offset.vm_offset = NULL; + else if (timerid == TIMER_VTIMER) ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; else ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; @@ -1224,13 +1249,19 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); + u64 cntvoff; for (int i = 0; i < NR_KVM_TIMERS; i++) timer_context_init(vcpu, i); + if (kvm_is_realm(vcpu->kvm)) + cntvoff = 0; + else + cntvoff = kvm_phys_timer_read(); + /* Synchronize offsets across timers of a VM if not already provided */ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { - timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); + timer_set_offset(vcpu_vtimer(vcpu), cntvoff); timer_set_offset(vcpu_ptimer(vcpu), 0); } @@ -1847,6 +1878,13 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return -EINVAL; } + /* + * We don't use mapped IRQs for Realms because the RMI doesn't allow + * us setting the LR.HW bit in the VGIC. + */ + if (vcpu_is_rec(vcpu)) + return 0; + #ifdef CONFIG_HISI_VIRTCCA_HOST /* * We don't use mapped IRQs for CVM because the TMI doesn't allow diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 9d3f034bd8855..b527172e8916d 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -129,6 +129,8 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +void kvm_realm_timers_update(struct kvm_vcpu *vcpu); + u64 kvm_phys_timer_read(void); void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu); -- Gitee From 63f061017d357e50e6eba3544eed3b5d69bba711 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 26 May 2022 15:01:35 +0100 Subject: [PATCH 21/52] arm64: RME: Allow VMM to set RIPAS community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Each page within the protected region of the realm guest can be marked as either RAM or EMPTY. Allow the VMM to control this before the guest has started and provide the equivalent functions to change this (with the guest's approval) at runtime. When transitioning from RIPAS RAM (1) to RIPAS EMPTY (0) the memory is unmapped from the guest and undelegated allowing the memory to be reused by the host. When transitioning to RIPAS RAM the actual population of the leaf RTTs is done later on stage 2 fault, however it may be necessary to allocate additional RTTs to allow the RMM track the RIPAS for the requested range. When freeing a block mapping it is necessary to temporarily unfold the RTT which requires delegating an extra page to the RMM, this page can then be recovered once the contents of the block mapping have been freed. Signed-off-by: Steven Price Conflicts: arch/arm64/kvm/mmu.c arch/arm64/kvm/rme.c [zhuangyiwei: keep using unmap_stage2_range instead of kvm_stage2_unmap_range which is modifed by nv features] [zhuangyiwei: add may_block parameter to unmap_stage2_range instead of introducing bunch of nv patches] [zhuangyiwei: let may_block=true for all unmap_stage2_range calls as original unmap_stage2_range calls' default value] [zhuangyiwei: use get_page on undelegate failure instead of put_page on undelegate success because of missing guest_memfd] Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_rme.h | 17 ++ arch/arm64/kvm/mmu.c | 16 +- arch/arm64/kvm/rme.c | 411 +++++++++++++++++++++++++++++++ 3 files changed, 438 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index be64b749fcacb..4e7758f0e4b56 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -92,6 +92,15 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits); int kvm_create_rec(struct kvm_vcpu *vcpu); void kvm_destroy_rec(struct kvm_vcpu *vcpu); +void kvm_realm_unmap_range(struct kvm *kvm, + unsigned long ipa, + u64 size, + bool unmap_private); +int realm_set_ipa_state(struct kvm_vcpu *vcpu, + unsigned long addr, unsigned long end, + unsigned long ripas, + unsigned long *top_ipa); + #define RMM_RTT_BLOCK_LEVEL 2 #define RMM_RTT_MAX_LEVEL 3 @@ -110,4 +119,12 @@ static inline unsigned long rme_rtt_level_mapsize(int level) return (1UL << RMM_RTT_LEVEL_SHIFT(level)); } +static inline bool realm_is_addr_protected(struct realm *realm, + unsigned long addr) +{ + unsigned int ia_bits = realm->ia_bits; + + return !(addr & ~(BIT(ia_bits - 1) - 1)); +} + #endif diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 5e0f9f3bcf6fd..b5d6b119f839a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -312,6 +312,7 @@ static void invalidate_icache_guest_page(void *va, size_t size) * @start: The intermediate physical base address of the range to unmap * @size: The size of the area to unmap * @may_block: Whether or not we are permitted to block + * @only_shared: If true then protected mappings should not be unmapped * * Clear a range of stage-2 mappings, lowering the various ref-counts. Must * be called while holding mmu_lock (unless for freeing the stage2 pgd before @@ -319,7 +320,7 @@ static void invalidate_icache_guest_page(void *va, size_t size) * with things behind our backs. */ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size, - bool may_block) + bool may_block, bool only_shared) { struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); phys_addr_t end = start + size; @@ -330,9 +331,10 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 may_block)); } -static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) +static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size, + bool may_block) { - __unmap_stage2_range(mmu, start, size, true); + __unmap_stage2_range(mmu, start, size, may_block, false); } static void stage2_flush_memslot(struct kvm *kvm, @@ -976,7 +978,8 @@ static void stage2_unmap_memslot(struct kvm *kvm, if (!(vma->vm_flags & VM_PFNMAP)) { gpa_t gpa = addr + (vm_start - memslot->userspace_addr); - unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start); + unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start, + true); } hva = vm_end; } while (hva < reg_end); @@ -1817,7 +1820,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) __unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT, (range->end - range->start) << PAGE_SHIFT, - range->may_block); + range->may_block, + range->only_shared); return false; } @@ -2127,7 +2131,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, phys_addr_t size = slot->npages << PAGE_SHIFT; write_lock(&kvm->mmu_lock); - unmap_stage2_range(&kvm->arch.mmu, gpa, size); + unmap_stage2_range(&kvm->arch.mmu, gpa, size, true); write_unlock(&kvm->mmu_lock); } diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index ef40be4ead73a..031e003cea927 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -62,6 +62,51 @@ static int get_start_level(struct realm *realm) return 4 - stage2_pgtable_levels(realm->ia_bits); } +static int find_map_level(struct realm *realm, + unsigned long start, + unsigned long end) +{ + int level = RMM_RTT_MAX_LEVEL; + + while (level > get_start_level(realm)) { + unsigned long map_size = rme_rtt_level_mapsize(level - 1); + + if (!IS_ALIGNED(start, map_size) || + (start + map_size) > end) + break; + + level--; + } + + return level; +} + +static phys_addr_t alloc_delegated_granule(struct kvm_mmu_memory_cache *mc, + gfp_t flags) +{ + phys_addr_t phys = PHYS_ADDR_MAX; + void *virt; + + if (mc) + virt = kvm_mmu_memory_cache_alloc(mc); + else + virt = (void *)__get_free_page(flags); + + if (!virt) + goto out; + + phys = virt_to_phys(virt); + + if (rmi_granule_delegate(phys)) { + free_page((unsigned long)virt); + + phys = PHYS_ADDR_MAX; + } + +out: + return phys; +} + static void free_delegated_granule(phys_addr_t phys) { if (WARN_ON(rmi_granule_undelegate(phys))) { @@ -72,6 +117,132 @@ static void free_delegated_granule(phys_addr_t phys) free_page((unsigned long)phys_to_virt(phys)); } +static int realm_rtt_create(struct realm *realm, + unsigned long addr, + int level, + phys_addr_t phys) +{ + addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); + return rmi_rtt_create(virt_to_phys(realm->rd), phys, addr, level); +} + +static int realm_rtt_fold(struct realm *realm, + unsigned long addr, + int level, + phys_addr_t *rtt_granule) +{ + unsigned long out_rtt; + int ret; + + ret = rmi_rtt_fold(virt_to_phys(realm->rd), addr, level, &out_rtt); + + if (RMI_RETURN_STATUS(ret) == RMI_SUCCESS && rtt_granule) + *rtt_granule = out_rtt; + + return ret; +} + +static int realm_destroy_protected(struct realm *realm, + unsigned long ipa, + unsigned long *next_addr) +{ + unsigned long rd = virt_to_phys(realm->rd); + unsigned long addr; + phys_addr_t rtt; + int ret; + +loop: + ret = rmi_data_destroy(rd, ipa, &addr, next_addr); + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { + if (*next_addr > ipa) + return 0; /* UNASSIGNED */ + rtt = alloc_delegated_granule(NULL, GFP_KERNEL); + if (WARN_ON(rtt == PHYS_ADDR_MAX)) + return -1; + /* + * ASSIGNED - ipa is mapped as a block, so split. The index + * from the return code should be 2 otherwise it appears + * there's a huge page bigger than KVM currently supports + */ + WARN_ON(RMI_RETURN_INDEX(ret) != 2); + ret = realm_rtt_create(realm, ipa, 3, rtt); + if (WARN_ON(ret)) { + free_delegated_granule(rtt); + return -1; + } + /* retry */ + goto loop; + } else if (WARN_ON(ret)) { + return -1; + } + ret = rmi_granule_undelegate(addr); + + /* + * If the undelegate fails then something has gone seriously + * wrong: take an extra reference to just leak the page + */ + if (WARN_ON(ret)) + get_page(phys_to_page(addr)); + + return 0; +} + +static void realm_unmap_shared_range(struct kvm *kvm, + int level, + unsigned long start, + unsigned long end) +{ + struct realm *realm = &kvm->arch.realm; + unsigned long rd = virt_to_phys(realm->rd); + ssize_t map_size = rme_rtt_level_mapsize(level); + unsigned long next_addr, addr; + unsigned long shared_bit = BIT(realm->ia_bits - 1); + + if (WARN_ON(level > RMM_RTT_MAX_LEVEL)) + return; + + start |= shared_bit; + end |= shared_bit; + + for (addr = start; addr < end; addr = next_addr) { + unsigned long align_addr = ALIGN(addr, map_size); + int ret; + + next_addr = ALIGN(addr + 1, map_size); + + if (align_addr != addr || next_addr > end) { + /* Need to recurse deeper */ + if (addr < align_addr) + next_addr = align_addr; + realm_unmap_shared_range(kvm, level + 1, addr, + min(next_addr, end)); + continue; + } + + ret = rmi_rtt_unmap_unprotected(rd, addr, level, &next_addr); + switch (RMI_RETURN_STATUS(ret)) { + case RMI_SUCCESS: + break; + case RMI_ERROR_RTT: + if (next_addr == addr) { + /* + * There's a mapping here, but it's not a block + * mapping, so reset next_addr to the next block + * boundary and recurse to clear out the pages + * one level deeper. + */ + next_addr = ALIGN(addr + 1, map_size); + realm_unmap_shared_range(kvm, level + 1, addr, + next_addr); + } + break; + default: + WARN_ON(1); + return; + } + } +} + static int realm_create_rd(struct kvm *kvm) { struct realm *realm = &kvm->arch.realm; @@ -161,6 +332,30 @@ static int realm_rtt_destroy(struct realm *realm, unsigned long addr, return ret; } +static int realm_create_rtt_levels(struct realm *realm, + unsigned long ipa, + int level, + int max_level, + struct kvm_mmu_memory_cache *mc) +{ + if (WARN_ON(level == max_level)) + return 0; + + while (level++ < max_level) { + phys_addr_t rtt = alloc_delegated_granule(mc, GFP_KERNEL); + + if (rtt == PHYS_ADDR_MAX) + return -ENOMEM; + + if (realm_rtt_create(realm, ipa, level, rtt)) { + free_delegated_granule(rtt); + return -ENXIO; + } + } + + return 0; +} + static int realm_tear_down_rtt_level(struct realm *realm, int level, unsigned long start, unsigned long end) { @@ -251,6 +446,61 @@ static int realm_tear_down_rtt_range(struct realm *realm, start, end); } +/* + * Returns 0 on successful fold, a negative value on error, a positive value if + * we were not able to fold all tables at this level. + */ +static int realm_fold_rtt_level(struct realm *realm, int level, + unsigned long start, unsigned long end) +{ + int not_folded = 0; + ssize_t map_size; + unsigned long addr, next_addr; + + if (WARN_ON(level > RMM_RTT_MAX_LEVEL)) + return -EINVAL; + + map_size = rme_rtt_level_mapsize(level - 1); + + for (addr = start; addr < end; addr = next_addr) { + phys_addr_t rtt_granule; + int ret; + unsigned long align_addr = ALIGN(addr, map_size); + + next_addr = ALIGN(addr + 1, map_size); + + ret = realm_rtt_fold(realm, align_addr, level, &rtt_granule); + + switch (RMI_RETURN_STATUS(ret)) { + case RMI_SUCCESS: + free_delegated_granule(rtt_granule); + break; + case RMI_ERROR_RTT: + if (level == RMM_RTT_MAX_LEVEL || + RMI_RETURN_INDEX(ret) < level) { + not_folded++; + break; + } + /* Recurse a level deeper */ + ret = realm_fold_rtt_level(realm, + level + 1, + addr, + next_addr); + if (ret < 0) + return ret; + else if (ret == 0) + /* Try again at this level */ + next_addr = addr; + break; + default: + WARN_ON(1); + return -ENXIO; + } + } + + return not_folded; +} + void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits) { struct realm *realm = &kvm->arch.realm; @@ -258,6 +508,155 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits) WARN_ON(realm_tear_down_rtt_range(realm, 0, (1UL << ia_bits))); } +static void realm_unmap_private_range(struct kvm *kvm, + unsigned long start, + unsigned long end) +{ + struct realm *realm = &kvm->arch.realm; + unsigned long next_addr, addr; + + for (addr = start; addr < end; addr = next_addr) { + int ret; + + ret = realm_destroy_protected(realm, addr, &next_addr); + + if (WARN_ON(ret)) + break; + } + + realm_fold_rtt_level(realm, get_start_level(realm) + 1, + start, end); +} + +void kvm_realm_unmap_range(struct kvm *kvm, unsigned long start, u64 size, + bool unmap_private) +{ + unsigned long end = start + size; + struct realm *realm = &kvm->arch.realm; + + end = min(BIT(realm->ia_bits - 1), end); + + if (realm->state == REALM_STATE_NONE) + return; + + realm_unmap_shared_range(kvm, find_map_level(realm, start, end), + start, end); + if (unmap_private) + realm_unmap_private_range(kvm, start, end); +} + +int realm_set_ipa_state(struct kvm_vcpu *vcpu, + unsigned long start, + unsigned long end, + unsigned long ripas, + unsigned long *top_ipa) +{ + struct kvm *kvm = vcpu->kvm; + struct realm *realm = &kvm->arch.realm; + struct realm_rec *rec = &vcpu->arch.rec; + phys_addr_t rd_phys = virt_to_phys(realm->rd); + phys_addr_t rec_phys = virt_to_phys(rec->rec_page); + struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + unsigned long ipa = start; + int ret = 0; + + while (ipa < end) { + unsigned long next; + + ret = rmi_rtt_set_ripas(rd_phys, rec_phys, ipa, end, &next); + + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { + int walk_level = RMI_RETURN_INDEX(ret); + int level = find_map_level(realm, ipa, end); + + /* + * If the RMM walk ended early then more tables are + * needed to reach the required depth to set the RIPAS. + */ + if (walk_level < level) { + ret = realm_create_rtt_levels(realm, ipa, + walk_level, + level, + memcache); + /* Retry with RTTs created */ + if (!ret) + continue; + } else { + ret = -EINVAL; + } + + break; + } else if (RMI_RETURN_STATUS(ret) != RMI_SUCCESS) { + WARN(1, "Unexpected error in %s: %#x\n", __func__, + ret); + ret = -EINVAL; + break; + } + ipa = next; + } + + *top_ipa = ipa; + + if (ripas == RMI_EMPTY && ipa != start) + realm_unmap_private_range(kvm, start, ipa); + + return ret; +} + +static int realm_init_ipa_state(struct realm *realm, + unsigned long ipa, + unsigned long end) +{ + phys_addr_t rd_phys = virt_to_phys(realm->rd); + int ret; + + while (ipa < end) { + unsigned long next; + + ret = rmi_rtt_init_ripas(rd_phys, ipa, end, &next); + + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { + int err_level = RMI_RETURN_INDEX(ret); + int level = find_map_level(realm, ipa, end); + + if (WARN_ON(err_level >= level)) + return -ENXIO; + + ret = realm_create_rtt_levels(realm, ipa, + err_level, + level, NULL); + if (ret) + return ret; + /* Retry with the RTT levels in place */ + continue; + } else if (WARN_ON(ret)) { + return -ENXIO; + } + + ipa = next; + } + + return 0; +} + +static int kvm_init_ipa_range_realm(struct kvm *kvm, + struct kvm_cap_arm_rme_init_ipa_args *args) +{ + gpa_t addr, end; + struct realm *realm = &kvm->arch.realm; + + addr = args->init_ipa_base; + end = addr + args->init_ipa_size; + + if (end < addr) + return -EINVAL; + + if (kvm_realm_state(kvm) != REALM_STATE_NEW) + return -EINVAL; + + return realm_init_ipa_state(realm, addr, end); +} + /* Protects access to rme_vmid_bitmap */ static DEFINE_SPINLOCK(rme_vmid_lock); static unsigned long *rme_vmid_bitmap; @@ -383,6 +782,18 @@ int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) case KVM_CAP_ARM_RME_CREATE_RD: r = kvm_create_realm(kvm); break; + case KVM_CAP_ARM_RME_INIT_IPA_REALM: { + struct kvm_cap_arm_rme_init_ipa_args args; + void __user *argp = u64_to_user_ptr(cap->args[1]); + + if (copy_from_user(&args, argp, sizeof(args))) { + r = -EFAULT; + break; + } + + r = kvm_init_ipa_range_realm(kvm, &args); + break; + } default: r = -EINVAL; break; -- Gitee From 5282c9f011d2715262484a28f5952178ab472426 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 26 May 2022 10:58:34 +0100 Subject: [PATCH 22/52] arm64: RME: Handle realm enter/exit community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Entering a realm is done using a SMC call to the RMM. On exit the exit-codes need to be handled slightly differently to the normal KVM path so define our own functions for realm enter/exit and hook them in if the guest is a realm guest. Signed-off-by: Steven Price Conflicts: arch/arm64/kvm/rme-exit.c arch/arm64/kvm/rme.c [zhuangyiwei: rec_exit_ripas_change return guest instead of userspace and is done on next entry] [zhuangyiwei: kvm_mmu_cache_min_pages's input parameter is kvm in OLK-6.6] Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_rme.h | 3 + arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/arm.c | 19 +++- arch/arm64/kvm/rme-exit.c | 163 +++++++++++++++++++++++++++++++ arch/arm64/kvm/rme.c | 48 +++++++++ 5 files changed, 229 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/kvm/rme-exit.c diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 4e7758f0e4b56..0410650cd5456 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -92,6 +92,9 @@ void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits); int kvm_create_rec(struct kvm_vcpu *vcpu); void kvm_destroy_rec(struct kvm_vcpu *vcpu); +int kvm_rec_enter(struct kvm_vcpu *vcpu); +int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_status); + void kvm_realm_unmap_range(struct kvm *kvm, unsigned long ipa, u64 size, diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 3a704db002919..dacae31420058 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -21,7 +21,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o pvsched.o \ vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/vgic-debug.o \ - rme.o + rme.o rme-exit.o kvm-$(CONFIG_VIRT_PLAT_DEV) += vgic/shadow_dev.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6bce4724bdd67..b471c4476eeb7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1264,7 +1264,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) trace_kvm_entry(*vcpu_pc(vcpu)); guest_timing_enter_irqoff(); - ret = kvm_arm_vcpu_enter_exit(vcpu); + if (vcpu_is_rec(vcpu)) + ret = kvm_rec_enter(vcpu); + else + ret = kvm_arm_vcpu_enter_exit(vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -1318,10 +1321,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) local_irq_enable(); - trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); - /* Exit types that need handling before we can be preempted */ - handle_exit_early(vcpu, ret); + if (!vcpu_is_rec(vcpu)) { + trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), + *vcpu_pc(vcpu)); + + handle_exit_early(vcpu, ret); + } preempt_enable(); @@ -1344,7 +1350,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ret = ARM_EXCEPTION_IL; } - ret = handle_exit(vcpu, ret); + if (vcpu_is_rec(vcpu)) + ret = handle_rec_exit(vcpu, ret); + else + ret = handle_exit(vcpu, ret); #ifdef CONFIG_ARCH_VCPU_STAT update_vcpu_stat_time(&vcpu->stat); #endif diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c new file mode 100644 index 0000000000000..a722bbc9d6c49 --- /dev/null +++ b/arch/arm64/kvm/rme-exit.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#include +#include +#include + +#include +#include +#include +#include + +typedef int (*exit_handler_fn)(struct kvm_vcpu *vcpu); + +static int rec_exit_reason_notimpl(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + + pr_err("[vcpu %d] Unhandled exit reason from realm (ESR: %#llx)\n", + vcpu->vcpu_id, rec->run->exit.esr); + return -ENXIO; +} + +static int rec_exit_sync_dabt(struct kvm_vcpu *vcpu) +{ + return kvm_handle_guest_abort(vcpu); +} + +static int rec_exit_sync_iabt(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + + pr_err("[vcpu %d] Unhandled instruction abort (ESR: %#llx).\n", + vcpu->vcpu_id, rec->run->exit.esr); + return -ENXIO; +} + +static int rec_exit_sys_reg(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + int rt = kvm_vcpu_sys_get_rt(vcpu); + bool is_write = !(esr & 1); + int ret; + + if (is_write) + vcpu_set_reg(vcpu, rt, rec->run->exit.gprs[0]); + + ret = kvm_handle_sys_reg(vcpu); + + if (ret >= 0 && !is_write) + rec->run->enter.gprs[0] = vcpu_get_reg(vcpu, rt); + + return ret; +} + +static exit_handler_fn rec_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = rec_exit_reason_notimpl, + [ESR_ELx_EC_SYS64] = rec_exit_sys_reg, + [ESR_ELx_EC_DABT_LOW] = rec_exit_sync_dabt, + [ESR_ELx_EC_IABT_LOW] = rec_exit_sync_iabt +}; + +static int rec_exit_psci(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + int i; + + for (i = 0; i < REC_RUN_GPRS; i++) + vcpu_set_reg(vcpu, i, rec->run->exit.gprs[i]); + + return kvm_smccc_call_handler(vcpu); +} + +static int rec_exit_ripas_change(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct realm *realm = &kvm->arch.realm; + struct realm_rec *rec = &vcpu->arch.rec; + unsigned long base = rec->run->exit.ripas_base; + unsigned long top = rec->run->exit.ripas_top; + unsigned long ripas = rec->run->exit.ripas_value; + + if (!realm_is_addr_protected(realm, base) || + !realm_is_addr_protected(realm, top - 1)) { + kvm_err("Invalid RIPAS_CHANGE for %#lx - %#lx, ripas: %#lx\n", + base, top, ripas); + return -EINVAL; + } + + return 1; +} + +static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = rec->run->exit.cntv_ctl; + __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = rec->run->exit.cntv_cval; + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = rec->run->exit.cntp_ctl; + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = rec->run->exit.cntp_cval; + + kvm_realm_timers_update(vcpu); +} + +/* + * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on + * proper exit to userspace. + */ +int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) +{ + struct realm_rec *rec = &vcpu->arch.rec; + u8 esr_ec = ESR_ELx_EC(rec->run->exit.esr); + unsigned long status, index; + + status = RMI_RETURN_STATUS(rec_run_ret); + index = RMI_RETURN_INDEX(rec_run_ret); + + /* + * If a PSCI_SYSTEM_OFF request raced with a vcpu executing, we might + * see the following status code and index indicating an attempt to run + * a REC when the RD state is SYSTEM_OFF. In this case, we just need to + * return to user space which can deal with the system event or will try + * to run the KVM VCPU again, at which point we will no longer attempt + * to enter the Realm because we will have a sleep request pending on + * the VCPU as a result of KVM's PSCI handling. + */ + if (status == RMI_ERROR_REALM && index == 1) { + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; + return 0; + } + + if (rec_run_ret) + return -ENXIO; + + vcpu->arch.fault.esr_el2 = rec->run->exit.esr; + vcpu->arch.fault.far_el2 = rec->run->exit.far; + vcpu->arch.fault.hpfar_el2 = rec->run->exit.hpfar; + + update_arch_timer_irq_lines(vcpu); + + /* Reset the emulation flags for the next run of the REC */ + rec->run->enter.flags = 0; + + switch (rec->run->exit.exit_reason) { + case RMI_EXIT_SYNC: + return rec_exit_handlers[esr_ec](vcpu); + case RMI_EXIT_IRQ: + case RMI_EXIT_FIQ: + return 1; + case RMI_EXIT_PSCI: + return rec_exit_psci(vcpu); + case RMI_EXIT_RIPAS_CHANGE: + return rec_exit_ripas_change(vcpu); + } + + kvm_pr_unimpl("Unsupported exit reason: %u\n", + rec->run->exit.exit_reason); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return 0; +} diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 031e003cea927..edf2d7b369bf8 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -842,6 +842,54 @@ void kvm_destroy_realm(struct kvm *kvm) kvm_free_stage2_pgd(&kvm->arch.mmu); } +static void kvm_complete_ripas_change(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct realm_rec *rec = &vcpu->arch.rec; + unsigned long base = rec->run->exit.ripas_base; + unsigned long top = rec->run->exit.ripas_top; + unsigned long ripas = rec->run->exit.ripas_value; + unsigned long top_ipa; + int ret; + + do { + kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache, + kvm_mmu_cache_min_pages(kvm)); + write_lock(&kvm->mmu_lock); + ret = realm_set_ipa_state(vcpu, base, top, ripas, &top_ipa); + write_unlock(&kvm->mmu_lock); + + if (WARN(ret && ret != -ENOMEM, + "Unable to satisfy RIPAS_CHANGE for %#lx - %#lx, ripas: %#lx\n", + base, top, ripas)) + break; + + base = top_ipa; + } while (top_ipa < top); +} + +int kvm_rec_enter(struct kvm_vcpu *vcpu) +{ + struct realm_rec *rec = &vcpu->arch.rec; + + switch (rec->run->exit.exit_reason) { + case RMI_EXIT_HOST_CALL: + case RMI_EXIT_PSCI: + for (int i = 0; i < REC_RUN_GPRS; i++) + rec->run->enter.gprs[i] = vcpu_get_reg(vcpu, i); + break; + case RMI_EXIT_RIPAS_CHANGE: + kvm_complete_ripas_change(vcpu); + break; + } + + if (kvm_realm_state(vcpu->kvm) != REALM_STATE_ACTIVE) + return -EINVAL; + + return rmi_rec_enter(virt_to_phys(rec->rec_page), + virt_to_phys(rec->run)); +} + static void free_rec_aux(struct page **aux_pages, unsigned int num_aux) { -- Gitee From 2143f2a88d9d424243d746cc2c2fac10e581a56a Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 26 May 2022 11:01:29 +0100 Subject: [PATCH 23/52] KVM: arm64: Handle realm MMIO emulation community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- MMIO emulation for a realm cannot be done directly with the VM's registers as they are protected from the host. However, for emulatable data aborts, the RMM uses GPRS[0] to provide the read/written value. We can transfer this from/to the equivalent VCPU's register entry and then depend on the generic MMIO handling code in KVM. For a MMIO read, the value is placed in the shared RecExit structure during kvm_handle_mmio_return() rather than in the VCPU's register entry. Signed-off-by: Steven Price Conflicts: arch/arm64/kvm/mmio.c [zhuangyiwei: directly inject dabt to realm if invalid dabt syndrome(https://lore.kernel.org/r/20240423150538.2103045-31-tabba@google.com)] Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/inject_fault.c | 4 +++- arch/arm64/kvm/mmio.c | 17 +++++++++++++++-- arch/arm64/kvm/rme-exit.c | 6 ++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 0bd93a5f21ce3..8316737bfcd0d 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -165,7 +165,9 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) */ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { - if (vcpu_el1_is_32bit(vcpu)) + if (unlikely(vcpu_is_rec(vcpu))) + vcpu->arch.rec.run->enter.flags |= REC_ENTER_INJECT_SEA; + else if (vcpu_el1_is_32bit(vcpu)) inject_abt32(vcpu, false, addr); else inject_abt64(vcpu, false, addr); diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 2aa503ff742ee..113f2c9edaec2 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -6,6 +6,7 @@ #include #include +#include #include #include "trace.h" @@ -136,14 +137,21 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, &data); data = vcpu_data_host_to_guest(vcpu, data, len); - vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); + + if (vcpu_is_rec(vcpu)) + vcpu->arch.rec.run->enter.gprs[0] = data; + else + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); } /* * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - kvm_incr_pc(vcpu); + if (vcpu_is_rec(vcpu)) + vcpu->arch.rec.run->enter.flags |= REC_ENTER_EMULATED_MMIO; + else + kvm_incr_pc(vcpu); return 1; } @@ -163,6 +171,11 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) * volunteered to do so, and bail out otherwise. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { + if (vcpu_is_rec(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, &vcpu->kvm->arch.flags)) { run->exit_reason = KVM_EXIT_ARM_NISV; diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c index a722bbc9d6c49..97374c56910de 100644 --- a/arch/arm64/kvm/rme-exit.c +++ b/arch/arm64/kvm/rme-exit.c @@ -25,6 +25,12 @@ static int rec_exit_reason_notimpl(struct kvm_vcpu *vcpu) static int rec_exit_sync_dabt(struct kvm_vcpu *vcpu) { + struct realm_rec *rec = &vcpu->arch.rec; + + if (kvm_vcpu_dabt_iswrite(vcpu) && kvm_vcpu_dabt_isvalid(vcpu)) + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), + rec->run->exit.gprs[0]); + return kvm_handle_guest_abort(vcpu); } -- Gitee From 7d289fb7efa7c6c552b23cdd09db25a188327065 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 26 May 2022 16:06:04 +0100 Subject: [PATCH 24/52] arm64: RME: Allow populating initial contents community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The VMM needs to populate the realm with some data before starting (e.g. a kernel and initrd). This is measured by the RMM and used as part of the attestation later on. For now only 4k mappings are supported, future work may add support for larger mappings. Co-developed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/rme.c | 243 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index edf2d7b369bf8..1f0e8ccaaeb9b 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -4,6 +4,7 @@ */ #include +#include #include #include @@ -545,6 +546,236 @@ void kvm_realm_unmap_range(struct kvm *kvm, unsigned long start, u64 size, realm_unmap_private_range(kvm, start, end); } +static int realm_create_protected_data_page(struct realm *realm, + unsigned long ipa, + kvm_pfn_t dst_pfn, + kvm_pfn_t src_pfn, + unsigned long flags) +{ + phys_addr_t dst_phys, src_phys; + int ret; + + dst_phys = __pfn_to_phys(dst_pfn); + src_phys = __pfn_to_phys(src_pfn); + + if (rmi_granule_delegate(dst_phys)) + return -ENXIO; + + ret = rmi_data_create(virt_to_phys(realm->rd), dst_phys, ipa, src_phys, + flags); + + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { + /* Create missing RTTs and retry */ + int level = RMI_RETURN_INDEX(ret); + + ret = realm_create_rtt_levels(realm, ipa, level, + RMM_RTT_MAX_LEVEL, NULL); + if (ret) + goto err; + + ret = rmi_data_create(virt_to_phys(realm->rd), dst_phys, ipa, + src_phys, flags); + } + + if (!ret) + return 0; + +err: + if (WARN_ON(rmi_granule_undelegate(dst_phys))) { + /* Page can't be returned to NS world so is lost */ + get_page(pfn_to_page(dst_pfn)); + } + return -ENXIO; +} + +static int fold_rtt(struct realm *realm, unsigned long addr, int level) +{ + phys_addr_t rtt_addr; + int ret; + + ret = realm_rtt_fold(realm, addr, level + 1, &rtt_addr); + if (ret) + return ret; + + free_delegated_granule(rtt_addr); + + return 0; +} + +static int populate_par_region(struct kvm *kvm, + phys_addr_t ipa_base, + phys_addr_t ipa_end, + u32 flags) +{ + struct realm *realm = &kvm->arch.realm; + struct kvm_memory_slot *memslot; + gfn_t base_gfn, end_gfn; + int idx; + phys_addr_t ipa; + int ret = 0; + unsigned long data_flags = 0; + + base_gfn = gpa_to_gfn(ipa_base); + end_gfn = gpa_to_gfn(ipa_end); + + if (flags & KVM_ARM_RME_POPULATE_FLAGS_MEASURE) + data_flags = RMI_MEASURE_CONTENT; + + idx = srcu_read_lock(&kvm->srcu); + memslot = gfn_to_memslot(kvm, base_gfn); + if (!memslot) { + ret = -EFAULT; + goto out; + } + + /* We require the region to be contained within a single memslot */ + if (memslot->base_gfn + memslot->npages < end_gfn) { + ret = -EINVAL; + goto out; + } + + if (!kvm_slot_can_be_private(memslot)) { + ret = -EINVAL; + goto out; + } + + write_lock(&kvm->mmu_lock); + + ipa = ipa_base; + while (ipa < ipa_end) { + struct vm_area_struct *vma; + unsigned long map_size; + unsigned int vma_shift; + unsigned long offset; + unsigned long hva; + struct page *page; + bool writeable; + kvm_pfn_t pfn; + int level, i; + + hva = gfn_to_hva_memslot(memslot, gpa_to_gfn(ipa)); + vma = vma_lookup(current->mm, hva); + if (!vma) { + ret = -EFAULT; + break; + } + + /* FIXME: Currently we only support 4k sized mappings */ + vma_shift = PAGE_SHIFT; + + map_size = 1 << vma_shift; + + ipa = ALIGN_DOWN(ipa, map_size); + + switch (map_size) { + case RMM_L2_BLOCK_SIZE: + level = 2; + break; + case PAGE_SIZE: + level = 3; + break; + default: + WARN_ONCE(1, "Unsupported vma_shift %d", vma_shift); + ret = -EFAULT; + break; + } + + pfn = __kvm_faultin_pfn(memslot, gpa_to_gfn(ipa), FOLL_WRITE, + &writeable, &page); + + if (is_error_pfn(pfn)) { + ret = -EFAULT; + break; + } + + if (level < RMM_RTT_MAX_LEVEL) { + /* + * A temporary RTT is needed during the map, precreate + * it, however if there is an error (e.g. missing + * parent tables) this will be handled in the + * realm_create_protected_data_page() call. + */ + realm_create_rtt_levels(realm, ipa, level, + RMM_RTT_MAX_LEVEL, NULL); + } + + for (offset = 0, i = 0; offset < map_size && !ret; + offset += PAGE_SIZE, i++) { + phys_addr_t page_ipa = ipa + offset; + kvm_pfn_t priv_pfn; + struct page *gmem_page; + int order; + + ret = kvm_gmem_get_pfn(kvm, memslot, + page_ipa >> PAGE_SHIFT, + &priv_pfn, &gmem_page, &order); + if (ret) + break; + + ret = realm_create_protected_data_page(realm, page_ipa, + priv_pfn, + pfn + i, + data_flags); + } + + kvm_release_faultin_page(kvm, page, false, false); + + if (ret) + break; + + if (level == 2) + fold_rtt(realm, ipa, level); + + ipa += map_size; + } + + write_unlock(&kvm->mmu_lock); + +out: + srcu_read_unlock(&kvm->srcu, idx); + return ret; +} + +static int kvm_populate_realm(struct kvm *kvm, + struct kvm_cap_arm_rme_populate_realm_args *args) +{ + phys_addr_t ipa_base, ipa_end; + + if (kvm_realm_state(kvm) != REALM_STATE_NEW) + return -EINVAL; + + if (!IS_ALIGNED(args->populate_ipa_base, PAGE_SIZE) || + !IS_ALIGNED(args->populate_ipa_size, PAGE_SIZE)) + return -EINVAL; + + if (args->flags & ~RMI_MEASURE_CONTENT) + return -EINVAL; + + ipa_base = args->populate_ipa_base; + ipa_end = ipa_base + args->populate_ipa_size; + + if (ipa_end < ipa_base) + return -EINVAL; + + /* + * Perform the populate in parts to ensure locks are not held for too + * long + */ + while (ipa_base < ipa_end) { + phys_addr_t end = min(ipa_end, ipa_base + SZ_2M); + + int ret = populate_par_region(kvm, ipa_base, end, + args->flags); + + if (ret) + return ret; + + ipa_base = end; + } + + return 0; +} + int realm_set_ipa_state(struct kvm_vcpu *vcpu, unsigned long start, unsigned long end, @@ -794,6 +1025,18 @@ int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = kvm_init_ipa_range_realm(kvm, &args); break; } + case KVM_CAP_ARM_RME_POPULATE_REALM: { + struct kvm_cap_arm_rme_populate_realm_args args; + void __user *argp = u64_to_user_ptr(cap->args[1]); + + if (copy_from_user(&args, argp, sizeof(args))) { + r = -EFAULT; + break; + } + + r = kvm_populate_realm(kvm, &args); + break; + } default: r = -EINVAL; break; -- Gitee From 0dc92e72d95d26f6cba1084aec542fe8ca76a9d6 Mon Sep 17 00:00:00 2001 From: Yiwei Zhuang Date: Wed, 2 Apr 2025 19:53:05 +0800 Subject: [PATCH 25/52] rme: populate guest memeory region without guest_memfd cca inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- The cca-host-v6 uses guest_memfd feature in populate_par_region which is not supported yet. Thus, we implement it in the OLK-6.6 way. Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/rme.c | 66 ++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 1f0e8ccaaeb9b..257f30f559a97 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -548,15 +548,17 @@ void kvm_realm_unmap_range(struct kvm *kvm, unsigned long start, u64 size, static int realm_create_protected_data_page(struct realm *realm, unsigned long ipa, - kvm_pfn_t dst_pfn, - kvm_pfn_t src_pfn, + struct page *dst_page, + struct page *src_page, unsigned long flags) { phys_addr_t dst_phys, src_phys; int ret; - dst_phys = __pfn_to_phys(dst_pfn); - src_phys = __pfn_to_phys(src_pfn); + copy_page(page_address(src_page), page_address(dst_page)); + + dst_phys = page_to_phys(dst_page); + src_phys = page_to_phys(src_page); if (rmi_granule_delegate(dst_phys)) return -ENXIO; @@ -583,7 +585,7 @@ static int realm_create_protected_data_page(struct realm *realm, err: if (WARN_ON(rmi_granule_undelegate(dst_phys))) { /* Page can't be returned to NS world so is lost */ - get_page(pfn_to_page(dst_pfn)); + get_page(dst_page); } return -ENXIO; } @@ -613,6 +615,7 @@ static int populate_par_region(struct kvm *kvm, int idx; phys_addr_t ipa; int ret = 0; + struct page *tmp_page; unsigned long data_flags = 0; base_gfn = gpa_to_gfn(ipa_base); @@ -634,12 +637,13 @@ static int populate_par_region(struct kvm *kvm, goto out; } - if (!kvm_slot_can_be_private(memslot)) { - ret = -EINVAL; + tmp_page = alloc_page(GFP_KERNEL); + if (!tmp_page) { + ret = -ENOMEM; goto out; } - write_lock(&kvm->mmu_lock); + mmap_read_lock(current->mm); ipa = ipa_base; while (ipa < ipa_end) { @@ -649,9 +653,8 @@ static int populate_par_region(struct kvm *kvm, unsigned long offset; unsigned long hva; struct page *page; - bool writeable; kvm_pfn_t pfn; - int level, i; + int level; hva = gfn_to_hva_memslot(memslot, gpa_to_gfn(ipa)); vma = vma_lookup(current->mm, hva); @@ -680,8 +683,7 @@ static int populate_par_region(struct kvm *kvm, break; } - pfn = __kvm_faultin_pfn(memslot, gpa_to_gfn(ipa), FOLL_WRITE, - &writeable, &page); + pfn = gfn_to_pfn_memslot(memslot, gpa_to_gfn(ipa)); if (is_error_pfn(pfn)) { ret = -EFAULT; @@ -699,38 +701,36 @@ static int populate_par_region(struct kvm *kvm, RMM_RTT_MAX_LEVEL, NULL); } - for (offset = 0, i = 0; offset < map_size && !ret; - offset += PAGE_SIZE, i++) { - phys_addr_t page_ipa = ipa + offset; - kvm_pfn_t priv_pfn; - struct page *gmem_page; - int order; + page = pfn_to_page(pfn); - ret = kvm_gmem_get_pfn(kvm, memslot, - page_ipa >> PAGE_SHIFT, - &priv_pfn, &gmem_page, &order); - if (ret) - break; + for (offset = 0; offset < map_size && !ret; + offset += PAGE_SIZE, page++) { + phys_addr_t page_ipa = ipa + offset; ret = realm_create_protected_data_page(realm, page_ipa, - priv_pfn, - pfn + i, + page, tmp_page, data_flags); } - - kvm_release_faultin_page(kvm, page, false, false); - if (ret) - break; + goto err_release_pfn; - if (level == 2) - fold_rtt(realm, ipa, level); + if (level == 2) { + ret = fold_rtt(realm, ipa, level); + if (ret) + goto err_release_pfn; + } ipa += map_size; + kvm_release_pfn_dirty(pfn); +err_release_pfn: + if (ret) { + kvm_release_pfn_clean(pfn); + break; + } } - write_unlock(&kvm->mmu_lock); - + mmap_read_unlock(current->mm); + __free_page(tmp_page); out: srcu_read_unlock(&kvm->srcu, idx); return ret; -- Gitee From 1a34a66ed77aa9b0b89940fb5bfe2b2d346aa06a Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 26 May 2022 16:29:47 +0100 Subject: [PATCH 26/52] arm64: RME: Runtime faulting of memory community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- At runtime if the realm guest accesses memory which hasn't yet been mapped then KVM needs to either populate the region or fault the guest. For memory in the lower (protected) region of IPA a fresh page is provided to the RMM which will zero the contents. For memory in the upper (shared) region of IPA, the memory from the memslot is mapped into the realm VM non secure. Signed-off-by: Steven Price Conflicts: arch/arm64/kvm/mmu.c [zhuangyiwei: remove kvm_slot_can_be_private call because there is no guest_memfd] [zhuangyiwei: remove unused private_memslot_fault] [zhuangyiwei: keep using stage2_apply_range_resched instead of kvm_stage2_flush_range which is introduced in nv patches] [zhuangyiwei: revert naming ipa as fault_ipa in user_mem_abort and kvm_handle_guest_abort] [zhuangyiwei: use unmap_stage2_range instead of kvm_stage2_unmap_range which was changed in previous patch] Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_emulate.h | 10 ++ arch/arm64/include/asm/kvm_rme.h | 10 ++ arch/arm64/kvm/mmu.c | 64 +++++++++-- arch/arm64/kvm/rme.c | 156 +++++++++++++++++++++++++++ 4 files changed, 234 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index a01acea63f999..4b155fa39d2a6 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -660,6 +660,16 @@ static inline bool kvm_realm_is_created(struct kvm *kvm) return kvm_is_realm(kvm) && kvm_realm_state(kvm) != REALM_STATE_NONE; } +static inline gpa_t kvm_gpa_from_fault(struct kvm *kvm, phys_addr_t fault_ipa) +{ + if (kvm_is_realm(kvm)) { + struct realm *realm = &kvm->arch.realm; + + return fault_ipa & ~BIT(realm->ia_bits - 1); + } + return fault_ipa; +} + static inline bool vcpu_is_rec(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_rme_is_available)) diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 0410650cd5456..158f77e24a260 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -99,6 +99,16 @@ void kvm_realm_unmap_range(struct kvm *kvm, unsigned long ipa, u64 size, bool unmap_private); +int realm_map_protected(struct realm *realm, + unsigned long base_ipa, + kvm_pfn_t pfn, + unsigned long size, + struct kvm_mmu_memory_cache *memcache); +int realm_map_non_secure(struct realm *realm, + unsigned long ipa, + kvm_pfn_t pfn, + unsigned long size, + struct kvm_mmu_memory_cache *memcache); int realm_set_ipa_state(struct kvm_vcpu *vcpu, unsigned long addr, unsigned long end, unsigned long ripas, diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index b5d6b119f839a..3b0e7fdf72f05 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -327,8 +327,13 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 lockdep_assert_held_write(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); - WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap, - may_block)); + + if (kvm_is_realm(kvm)) + kvm_realm_unmap_range(kvm, start, size, !only_shared); + else + WARN_ON(stage2_apply_range(mmu, start, end, + kvm_pgtable_stage2_unmap, + may_block)); } static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size, @@ -343,7 +348,11 @@ static void stage2_flush_memslot(struct kvm *kvm, phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = addr + PAGE_SIZE * memslot->npages; - stage2_apply_range_resched(&kvm->arch.mmu, addr, end, kvm_pgtable_stage2_flush); + if (kvm_is_realm(kvm)) + kvm_realm_unmap_range(kvm, addr, end - addr, false); + else + stage2_apply_range_resched(&kvm->arch.mmu, addr, end, + kvm_pgtable_stage2_flush); } /** @@ -998,6 +1007,10 @@ void stage2_unmap_vm(struct kvm *kvm) struct kvm_memory_slot *memslot; int idx, bkt; + /* For realms this is handled by the RMM so nothing to do here */ + if (kvm_is_realm(kvm)) + return; + idx = srcu_read_lock(&kvm->srcu); mmap_read_lock(current->mm); write_lock(&kvm->mmu_lock); @@ -1021,6 +1034,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) if (kvm_is_realm(kvm) && (kvm_realm_state(kvm) != REALM_STATE_DEAD && kvm_realm_state(kvm) != REALM_STATE_NONE)) { + unmap_stage2_range(mmu, 0, (~0ULL) & PAGE_MASK, false); write_unlock(&kvm->mmu_lock); kvm_realm_destroy_rtts(kvm, pgt->ia_bits); @@ -1410,6 +1424,23 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) return vma->vm_flags & VM_MTE_ALLOWED; } +static int realm_map_ipa(struct kvm *kvm, phys_addr_t ipa, + kvm_pfn_t pfn, unsigned long map_size, + enum kvm_pgtable_prot prot, + struct kvm_mmu_memory_cache *memcache) +{ + struct realm *realm = &kvm->arch.realm; + + if (WARN_ON(!(prot & KVM_PGTABLE_PROT_W))) + return -EFAULT; + + if (!realm_is_addr_protected(realm, ipa)) + return realm_map_non_secure(realm, ipa, pfn, map_size, + memcache); + + return realm_map_protected(realm, ipa, pfn, map_size, memcache); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -1433,6 +1464,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level); write_fault = kvm_is_write_fault(vcpu); + + /* + * Realms cannot map protected pages read-only + * FIXME: It should be possible to map unprotected pages read-only + */ + if (vcpu_is_rec(vcpu)) + write_fault = true; + exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); vcpu->stat.mabt_exit_stat++; @@ -1508,6 +1547,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, fault_ipa &= ~(vma_pagesize - 1); gfn = fault_ipa >> PAGE_SHIFT; + gfn = kvm_gpa_from_fault(kvm, fault_ipa) >> PAGE_SHIFT; mte_allowed = kvm_vma_mte_allowed(vma); /* Don't use the VMA after the unlock -- it may have vanished */ @@ -1565,7 +1605,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { + /* FIXME: We shouldn't need to disable this for realms */ + if (vma_pagesize == PAGE_SIZE && !(force_pte || device || kvm_is_realm(kvm))) { if (fault_status == ESR_ELx_FSC_PERM && fault_granule > PAGE_SIZE) vma_pagesize = fault_granule; @@ -1613,6 +1654,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule) ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); + else if (kvm_is_realm(kvm)) + ret = realm_map_ipa(kvm, fault_ipa, pfn, vma_pagesize, + prot, memcache); else ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, @@ -1736,7 +1780,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) idx = srcu_read_lock(&vcpu->kvm->srcu); - gfn = fault_ipa >> PAGE_SHIFT; + gfn = kvm_gpa_from_fault(vcpu->kvm, fault_ipa) >> PAGE_SHIFT; memslot = gfn_to_memslot(vcpu->kvm, gfn); hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); write_fault = kvm_is_write_fault(vcpu); @@ -1781,7 +1825,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * of the page size. */ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); - ret = io_mem_abort(vcpu, fault_ipa); + ret = io_mem_abort(vcpu, kvm_gpa_from_fault(vcpu->kvm, fault_ipa)); goto out_unlock; } @@ -1867,6 +1911,10 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (!kvm->arch.mmu.pgt) return false; + /* We don't support aging for Realms */ + if (kvm_is_realm(kvm)) + return true; + return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, size, true); @@ -1879,6 +1927,10 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (!kvm->arch.mmu.pgt) return false; + /* We don't support aging for Realms */ + if (kvm_is_realm(kvm)) + return true; + return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, size, false); diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 257f30f559a97..c44975a12f5c9 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -604,6 +604,162 @@ static int fold_rtt(struct realm *realm, unsigned long addr, int level) return 0; } +int realm_map_protected(struct realm *realm, + unsigned long ipa, + kvm_pfn_t pfn, + unsigned long map_size, + struct kvm_mmu_memory_cache *memcache) +{ + phys_addr_t phys = __pfn_to_phys(pfn); + phys_addr_t rd = virt_to_phys(realm->rd); + unsigned long base_ipa = ipa; + unsigned long size; + int map_level; + int ret = 0; + + if (WARN_ON(!IS_ALIGNED(ipa, map_size))) + return -EINVAL; + + switch (map_size) { + case PAGE_SIZE: + map_level = 3; + break; + case RMM_L2_BLOCK_SIZE: + map_level = 2; + break; + default: + return -EINVAL; + } + + if (map_level < RMM_RTT_MAX_LEVEL) { + /* + * A temporary RTT is needed during the map, precreate it, + * however if there is an error (e.g. missing parent tables) + * this will be handled below. + */ + realm_create_rtt_levels(realm, ipa, map_level, + RMM_RTT_MAX_LEVEL, memcache); + } + + for (size = 0; size < map_size; size += PAGE_SIZE) { + if (rmi_granule_delegate(phys)) { + /* + * It's likely we raced with another VCPU on the same + * fault. Assume the other VCPU has handled the fault + * and return to the guest. + */ + return 0; + } + + ret = rmi_data_create_unknown(rd, phys, ipa); + + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { + /* Create missing RTTs and retry */ + int level = RMI_RETURN_INDEX(ret); + + ret = realm_create_rtt_levels(realm, ipa, level, + RMM_RTT_MAX_LEVEL, + memcache); + WARN_ON(ret); + if (ret) + goto err_undelegate; + + ret = rmi_data_create_unknown(rd, phys, ipa); + } + WARN_ON(ret); + + if (ret) + goto err_undelegate; + + phys += PAGE_SIZE; + ipa += PAGE_SIZE; + } + + if (map_size == RMM_L2_BLOCK_SIZE) + ret = fold_rtt(realm, base_ipa, map_level); + if (WARN_ON(ret)) + goto err; + + return 0; + +err_undelegate: + if (WARN_ON(rmi_granule_undelegate(phys))) { + /* Page can't be returned to NS world so is lost */ + get_page(phys_to_page(phys)); + } +err: + while (size > 0) { + unsigned long data, top; + + phys -= PAGE_SIZE; + size -= PAGE_SIZE; + ipa -= PAGE_SIZE; + + WARN_ON(rmi_data_destroy(rd, ipa, &data, &top)); + + if (WARN_ON(rmi_granule_undelegate(phys))) { + /* Page can't be returned to NS world so is lost */ + get_page(phys_to_page(phys)); + } + } + return -ENXIO; +} + +int realm_map_non_secure(struct realm *realm, + unsigned long ipa, + kvm_pfn_t pfn, + unsigned long map_size, + struct kvm_mmu_memory_cache *memcache) +{ + phys_addr_t rd = virt_to_phys(realm->rd); + int map_level; + int ret = 0; + unsigned long desc = __pfn_to_phys(pfn) | + PTE_S2_MEMATTR(MT_S2_FWB_NORMAL) | + /* FIXME: Read+Write permissions for now */ + (3 << 6); + + if (WARN_ON(!IS_ALIGNED(ipa, map_size))) + return -EINVAL; + + switch (map_size) { + case PAGE_SIZE: + map_level = 3; + break; + case RMM_L2_BLOCK_SIZE: + map_level = 2; + break; + default: + return -EINVAL; + } + + ret = rmi_rtt_map_unprotected(rd, ipa, map_level, desc); + + if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { + /* Create missing RTTs and retry */ + int level = RMI_RETURN_INDEX(ret); + + ret = realm_create_rtt_levels(realm, ipa, level, map_level, + memcache); + if (WARN_ON(ret)) + return -ENXIO; + + ret = rmi_rtt_map_unprotected(rd, ipa, map_level, desc); + } + /* + * RMI_ERROR_RTT can be reported for two reasons: either the RTT tables + * are not there, or there is an RTTE already present for the address. + * The call to realm_create_rtt_levels() above handles the first case, + * and in the second case this indicates that another thread has + * already populated the RTTE for us, so we can ignore the error and + * continue. + */ + if (WARN_ON(ret && RMI_RETURN_STATUS(ret) != RMI_ERROR_RTT)) + return -ENXIO; + + return 0; +} + static int populate_par_region(struct kvm *kvm, phys_addr_t ipa_base, phys_addr_t ipa_end, -- Gitee From 89b8f5b8b204e1a4d6da83b5d666e65caea6b95d Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 20 Jun 2022 11:24:00 +0100 Subject: [PATCH 27/52] KVM: arm64: Handle realm VCPU load community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- When loading a realm VCPU much of the work is handled by the RMM so only some of the actions are required. Rearrange kvm_arch_vcpu_load() slightly so we can bail out early for a realm guest. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/arm.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b471c4476eeb7..febe4f865ec55 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -678,10 +678,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); - if (has_vhe()) - kvm_vcpu_load_sysregs_vhe(vcpu); - kvm_arch_vcpu_load_fp(vcpu); - kvm_vcpu_pmu_restore_guest(vcpu); if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); @@ -695,6 +691,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) #endif if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); + + /* No additional state needs to be loaded on Realmed VMs */ + if (vcpu_is_rec(vcpu)) + return; + + if (has_vhe()) + kvm_vcpu_load_sysregs_vhe(vcpu); + kvm_arch_vcpu_load_fp(vcpu); + kvm_vcpu_pmu_restore_guest(vcpu); + kvm_arch_vcpu_load_debug_state_flags(vcpu); if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) -- Gitee From 425d65164c70cc2d8b347b5d5c2783e49ded16ea Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 20 Jun 2022 11:26:06 +0100 Subject: [PATCH 28/52] KVM: arm64: Validate register access for a Realm VM community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The RMM only allows setting the GPRS (x0-x30) and PC for a realm guest. Check this in kvm_arm_set_reg() so that the VMM can receive a suitable error return if other registers are accessed. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/guest.c | 43 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d3161a683838d..818727090e3c2 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -111,6 +111,24 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } +static bool kvm_realm_validate_core_reg(u64 off) +{ + /* + * Note that GPRs can only sometimes be controlled by the VMM. + * For PSCI only X0-X6 are used, higher registers are ignored (restored + * from the REC). + * For HOST_CALL all of X0-X30 are copied to the RsiHostCall structure. + * For emulated MMIO X0 is always used. + */ + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + case KVM_REG_ARM_CORE_REG(regs.pc): + return true; + } + return false; +} + static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off) { int size; @@ -153,6 +171,9 @@ static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off) if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off)) return -EINVAL; + if (kvm_is_realm(vcpu->kvm) && !kvm_realm_validate_core_reg(off)) + return -EPERM; + return size; } @@ -818,12 +839,34 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return kvm_arm_sys_reg_get_reg(vcpu, reg); } +/* + * The RMI ABI only enables setting some GPRs and PC. The selection of GPRs + * that are available depends on the Realm state and the reason for the last + * exit. All other registers are reset to architectural or otherwise defined + * reset values by the RMM, except for a few configuration fields that + * correspond to Realm parameters. + */ +static bool validate_realm_set_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) { + u64 off = core_reg_offset_from_id(reg->id); + + return kvm_realm_validate_core_reg(off); + } + + return false; +} + int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* We currently use nothing arch-specific in upper 32 bits */ if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) return -EINVAL; + if (kvm_is_realm(vcpu->kvm) && !validate_realm_set_reg(vcpu, reg)) + return -EINVAL; + switch (reg->id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg); case KVM_REG_ARM_FW: -- Gitee From e7ad70781d793aab4b10b9b1059ab51e9d93c20a Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 20 Jun 2022 11:29:38 +0100 Subject: [PATCH 29/52] KVM: arm64: Handle Realm PSCI requests community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The RMM needs to be informed of the target REC when a PSCI call is made with an MPIDR argument. Expose an ioctl to the userspace in case the PSCI is handled by it. Co-developed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_rme.h | 3 +++ arch/arm64/kvm/arm.c | 25 +++++++++++++++++++++++++ arch/arm64/kvm/psci.c | 29 +++++++++++++++++++++++++++++ arch/arm64/kvm/rme.c | 15 +++++++++++++++ 4 files changed, 72 insertions(+) diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 158f77e24a260..90a4537ad38da 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -113,6 +113,9 @@ int realm_set_ipa_state(struct kvm_vcpu *vcpu, unsigned long addr, unsigned long end, unsigned long ripas, unsigned long *top_ipa); +int realm_psci_complete(struct kvm_vcpu *calling, + struct kvm_vcpu *target, + unsigned long status); #define RMM_RTT_BLOCK_LEVEL 2 #define RMM_RTT_MAX_LEVEL 3 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index febe4f865ec55..da4d330eae6d8 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1715,6 +1715,22 @@ static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, return __kvm_arm_vcpu_set_events(vcpu, events); } +static int kvm_arm_vcpu_rmm_psci_complete(struct kvm_vcpu *vcpu, + struct kvm_arm_rmm_psci_complete *arg) +{ + struct kvm_vcpu *target = kvm_mpidr_to_vcpu(vcpu->kvm, arg->target_mpidr); + + if (!target) + return -EINVAL; + + /* + * RMM v1.0 only supports PSCI_RET_SUCCESS or PSCI_RET_DENIED + * for the status. But, let us leave it to the RMM to filter + * for making this future proof. + */ + return realm_psci_complete(vcpu, target, arg->psci_status); +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1837,6 +1853,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return kvm_arm_vcpu_finalize(vcpu, what); } + case KVM_ARM_VCPU_RMM_PSCI_COMPLETE: { + struct kvm_arm_rmm_psci_complete req; + + if (!kvm_is_realm(vcpu->kvm)) + return -EINVAL; + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + return kvm_arm_vcpu_rmm_psci_complete(vcpu, &req); + } default: r = -EINVAL; } diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 1f69b667332b2..f9abab5d50d74 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -103,6 +103,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) reset_state->reset = true; kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); + /* + * Make sure we issue PSCI_COMPLETE before the VCPU can be + * scheduled. + */ + if (vcpu_is_rec(vcpu)) + realm_psci_complete(source_vcpu, vcpu, PSCI_RET_SUCCESS); /* * Make sure the reset request is observed if the RUNNABLE mp_state is @@ -115,6 +121,10 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) out_unlock: spin_unlock(&vcpu->arch.mp_state_lock); + if (vcpu_is_rec(vcpu) && ret != PSCI_RET_SUCCESS) + realm_psci_complete(source_vcpu, vcpu, + ret == PSCI_RET_ALREADY_ON ? + PSCI_RET_SUCCESS : PSCI_RET_DENIED); return ret; } @@ -142,6 +152,25 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) /* Ignore other bits of target affinity */ target_affinity &= target_affinity_mask; + if (vcpu_is_rec(vcpu)) { + struct kvm_vcpu *target_vcpu; + + /* RMM supports only zero affinity level */ + if (lowest_affinity_level != 0) + return PSCI_RET_INVALID_PARAMS; + + target_vcpu = kvm_mpidr_to_vcpu(kvm, target_affinity); + if (!target_vcpu) + return PSCI_RET_INVALID_PARAMS; + + /* + * Provide the references of running and target RECs to the RMM + * so that the RMM can complete the PSCI request. + */ + realm_psci_complete(vcpu, target_vcpu, PSCI_RET_SUCCESS); + return PSCI_RET_SUCCESS; + } + /* * If one or more VCPU matching target affinity are running * then ON else OFF diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index c44975a12f5c9..6df58bab2ae72 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -118,6 +118,21 @@ static void free_delegated_granule(phys_addr_t phys) free_page((unsigned long)phys_to_virt(phys)); } +int realm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, + unsigned long status) +{ + int ret; + + ret = rmi_psci_complete(virt_to_phys(calling->arch.rec.rec_page), + virt_to_phys(target->arch.rec.rec_page), + status); + + if (ret) + return -EINVAL; + + return 0; +} + static int realm_rtt_create(struct realm *realm, unsigned long addr, int level, -- Gitee From 23f13378de4c951250531bc3aac746f5727ce69d Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 20 Jun 2022 11:49:56 +0100 Subject: [PATCH 30/52] KVM: arm64: WARN on injected undef exceptions community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The RMM doesn't allow injection of a undefined exception into a realm guest. Add a WARN to catch if this ever happens. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/inject_fault.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 8316737bfcd0d..1542b2605113c 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -226,6 +226,8 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu) */ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { + if (vcpu_is_rec(vcpu)) + WARN(1, "Cannot inject undefined exception into REC. Continuing with unknown behaviour"); if (vcpu_el1_is_32bit(vcpu)) inject_undef32(vcpu); else -- Gitee From 10fd0dba7e0fa2826f2b171c052c62363cd1830b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 15 Aug 2022 15:43:25 +0100 Subject: [PATCH 31/52] arm64: Don't expose stolen time for realm guests community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- It doesn't make much sense as a realm guest wouldn't want to trust the host. It will also need some extra work to ensure that KVM will only attempt to write into a shared memory region. So for now just disable it. Reviewed-by: Suzuki K Poulose Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/arm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index da4d330eae6d8..e4016dc12e403 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -476,7 +476,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = system_supports_mte(); break; case KVM_CAP_STEAL_TIME: - r = kvm_arm_pvtime_supported(); + if (kvm_is_realm(kvm)) + r = 0; + else + r = kvm_arm_pvtime_supported(); break; case KVM_CAP_ARM_EL1_32BIT: r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); -- Gitee From 9438a5776c82311e42fefcc278ffbb68f030e45a Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 4 Oct 2022 11:10:03 +0100 Subject: [PATCH 32/52] arm64: rme: allow userspace to inject aborts community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Extend KVM_SET_VCPU_EVENTS to support realms, where KVM cannot set the system registers, and the RMM must perform it on next REC entry. Signed-off-by: Joey Gouly Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- Documentation/virt/kvm/api.rst | 2 ++ arch/arm64/kvm/guest.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 6aaac32abd399..8010187db38bd 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1271,6 +1271,8 @@ User space may need to inject several types of events to the guest. Set the pending SError exception state for this VCPU. It is not possible to 'cancel' an Serror that has been made pending. +User space cannot inject SErrors into Realms. + If the guest performed an access to I/O memory which could not be handled by userspace, for example because of missing instruction syndrome decode information or because there is no device mapped at the accessed IPA, then diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 818727090e3c2..7fe28755ce21e 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -918,6 +918,30 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, bool has_esr = events->exception.serror_has_esr; bool ext_dabt_pending = events->exception.ext_dabt_pending; + if (vcpu_is_rec(vcpu)) { + /* Cannot inject SError into a Realm. */ + if (serror_pending) + return -EINVAL; + + /* + * If a data abort is pending, set the flag and let the RMM + * inject an SEA when the REC is scheduled to be run. + */ + if (ext_dabt_pending) { + /* + * Can only inject SEA into a Realm if the previous exit + * was due to a data abort of an Unprotected IPA. + */ + if (!(vcpu->arch.rec.run->enter.flags & REC_ENTER_EMULATED_MMIO)) + return -EINVAL; + + vcpu->arch.rec.run->enter.flags &= ~REC_ENTER_EMULATED_MMIO; + vcpu->arch.rec.run->enter.flags |= REC_ENTER_INJECT_SEA; + } + + return 0; + } + if (serror_pending && has_esr) { if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) return -EINVAL; -- Gitee From 9513d464cfaa1caebe2afec4878deec565423d48 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 24 Aug 2022 18:40:46 +0100 Subject: [PATCH 33/52] arm64: rme: support RSI_HOST_CALL community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Forward RSI_HOST_CALLS to KVM's HVC handler. Signed-off-by: Joey Gouly Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/rme-exit.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c index 97374c56910de..033f63449487a 100644 --- a/arch/arm64/kvm/rme-exit.c +++ b/arch/arm64/kvm/rme-exit.c @@ -99,6 +99,26 @@ static int rec_exit_ripas_change(struct kvm_vcpu *vcpu) return 1; } +static int rec_exit_host_call(struct kvm_vcpu *vcpu) +{ + int ret, i; + struct realm_rec *rec = &vcpu->arch.rec; + + vcpu->stat.hvc_exit_stat++; + + for (i = 0; i < REC_RUN_GPRS; i++) + vcpu_set_reg(vcpu, i, rec->run->exit.gprs[i]); + + ret = kvm_smccc_call_handler(vcpu); + + if (ret < 0) { + vcpu_set_reg(vcpu, 0, ~0UL); + ret = 1; + } + + return ret; +} + static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu) { struct realm_rec *rec = &vcpu->arch.rec; @@ -160,6 +180,8 @@ int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) return rec_exit_psci(vcpu); case RMI_EXIT_RIPAS_CHANGE: return rec_exit_ripas_change(vcpu); + case RMI_EXIT_HOST_CALL: + return rec_exit_host_call(vcpu); } kvm_pr_unimpl("Unsupported exit reason: %u\n", -- Gitee From 29a782ca58f1f9ec21565a1542c5523764d06730 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 10 Oct 2022 23:53:24 +0100 Subject: [PATCH 34/52] arm64: rme: Allow checking SVE on VM instance community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Given we have different types of VMs supported, check the support for SVE for the given instance of the VM to accurately report the status. Signed-off-by: Suzuki K Poulose Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_rme.h | 2 ++ arch/arm64/kvm/arm.c | 5 ++++- arch/arm64/kvm/rme.c | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 90a4537ad38da..0d89ab1645c12 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -85,6 +85,8 @@ void kvm_init_rme(void); u32 kvm_realm_ipa_limit(void); u32 kvm_realm_vgic_nr_lr(void); +bool kvm_rme_supports_sve(void); + int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); int kvm_init_realm_vm(struct kvm *kvm); void kvm_destroy_realm(struct kvm *kvm); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e4016dc12e403..fbafd1d18acda 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -500,7 +500,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = get_kvm_ipa_limit(); break; case KVM_CAP_ARM_SVE: - r = system_supports_sve(); + if (kvm_is_realm(kvm)) + r = kvm_rme_supports_sve(); + else + r = system_supports_sve(); break; case KVM_CAP_ARM_PTRAUTH_ADDRESS: case KVM_CAP_ARM_PTRAUTH_GENERIC: diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 6df58bab2ae72..127882e59a041 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -20,6 +20,11 @@ static bool rme_supports(unsigned long feature) return !!u64_get_bits(rmm_feat_reg0, feature); } +bool kvm_rme_supports_sve(void) +{ + return rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN); +} + static int rmi_check_version(void) { struct arm_smccc_res res; -- Gitee From 9f0b4453dd0a1d8e81f3ed787138a2225b67048d Mon Sep 17 00:00:00 2001 From: Steven Price Date: Fri, 21 Oct 2022 09:35:02 +0100 Subject: [PATCH 35/52] arm64: RME: Always use 4k pages for realms community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Always split up huge pages to avoid problems managing huge pages. There are two issues currently: 1. The uABI for the VMM allows populating memory on 4k boundaries even if the underlying allocator (e.g. hugetlbfs) is using a larger page size. Using a memfd for private allocations will push this issue onto the VMM as it will need to respect the granularity of the allocator. 2. The guest is able to request arbitrary ranges to be remapped as shared. Again with a memfd approach it will be up to the VMM to deal with the complexity and either overmap (need the huge mapping and add an additional 'overlapping' shared mapping) or reject the request as invalid due to the use of a huge page allocator. For now just break everything down to 4k pages in the RMM controlled stage 2. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3b0e7fdf72f05..adf0e2a76c9d7 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1514,6 +1514,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; + } else if (kvm_is_realm(kvm)) { + // Force PTE level mappings for realms + force_pte = true; + vma_shift = PAGE_SHIFT; } else { vma_shift = get_vma_page_shift(vma, hva); } -- Gitee From 55cf1b71f5b952b4ff837531161bea5bbe8ef2c2 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 22 May 2023 09:27:39 +0100 Subject: [PATCH 36/52] arm64: rme: Prevent Device mappings for Realms community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Physical device assignment is not yet supported by the RMM, so it doesn't make much sense to allow device mappings within the realm. Prevent them when the guest is a realm. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/mmu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index adf0e2a76c9d7..1e053f06edcc3 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1108,6 +1108,10 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (is_protected_kvm_enabled()) return -EPERM; + /* We don't support mapping special pages into a Realm */ + if (kvm_is_realm(kvm)) + return -EINVAL; + size += offset_in_page(guest_ipa); guest_ipa &= PAGE_MASK; @@ -1600,6 +1604,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && device) return -ENOEXEC; + /* + * Don't allow device accesses to protected memory as we don't (yet) + * support protected devices. + */ + if (device && kvm_is_realm(kvm) && + kvm_gpa_from_fault(kvm, fault_ipa) == fault_ipa) + return -EINVAL; + read_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; if (mmu_invalidate_retry(kvm, mmu_seq)) -- Gitee From 027c840f58361be65b785bbf680fb110c8e83c65 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 6 Apr 2023 15:28:07 +0100 Subject: [PATCH 37/52] arm_pmu: Provide a mechanism for disabling the physical IRQ community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Arm CCA assigns the physical PMU device to the guest running in realm world, however the IRQs are routed via the host. To enter a realm guest while a PMU IRQ is pending it is necessary to block the physical IRQ to prevent an immediate exit. Provide a mechanism in the PMU driver for KVM to control the physical IRQ. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- drivers/perf/arm_pmu.c | 15 +++++++++++++++ include/linux/perf/arm_pmu.h | 5 +++++ 2 files changed, 20 insertions(+) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 227cf80895bea..247b038ff4d9e 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -770,6 +770,21 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) return 0; } +void arm_pmu_set_phys_irq(bool enable) +{ + int cpu = get_cpu(); + struct arm_pmu *pmu = per_cpu(cpu_armpmu, cpu); + int irq; + + irq = armpmu_get_cpu_irq(pmu, cpu); + if (irq && !enable) + per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq); + else if (irq && enable) + per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq); + + put_cpu(); +} + #ifdef CONFIG_CPU_PM static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) { diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 0bbb5094b87b6..475c5cdf13c8c 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -209,6 +209,7 @@ void kvm_host_pmu_init(struct arm_pmu *pmu); #endif bool arm_pmu_irq_is_nmi(void); +void arm_pmu_set_phys_irq(bool enable); /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); @@ -219,6 +220,10 @@ void armpmu_free_irq(int irq, int cpu); #define ARMV8_PMU_PDEV_NAME "armv8-pmu" +#else /* CONFIG_ARM_PMU */ + +static inline void arm_pmu_set_phys_irq(bool enable) {} + #endif /* CONFIG_ARM_PMU */ #if defined(CONFIG_ARM_PMU) -- Gitee From 24de674a012029c6e2993a75e468c474416014d1 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 3 Aug 2023 11:22:48 +0100 Subject: [PATCH 38/52] arm64: rme: Enable PMU support with a realm guest community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://gitlab.arm.com/linux-arm/linux-cca/-/commit/b903cc15e7fccf8ed0ecb47928513538a6b670e3 -------------------------------- Use the PMU registers from the RmiRecExit structure to identify when an overflow interrupt is due and inject it into the guest. Also hook up the configuration option for enabling the PMU within the guest. When entering a realm guest with a PMU interrupt pending, it is necessary to disable the physical interrupt. Otherwise when the RMM restores the PMU state the physical interrupt will trigger causing an immediate exit back to the host. The guest is expected to acknowledge the interrupt causing a host exit (to update the GIC state) which gives the opportunity to re-enable the physical interrupt before the next PMU event. The existing PMU option (KVM_ARM_VCPU_PMU_V3_CTRL) is a per-VCPU option, but the RMM interface requires the PMU configuration to be set at the VM level. So in kvm_create_rec() we enforce that the per-VCPU configuration matches that configured for the realm. Signed-off-by: Steven Price Conflicts: cca-host-v6 -> cca-host-eac5 [zhuangyiwei: set pmcr value by ioctrl KVM_CAP_ARM_RME_CFG_PMU] Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_rme.h | 2 ++ arch/arm64/include/uapi/asm/kvm.h | 6 ++++++ arch/arm64/kvm/arm.c | 15 +++++++++++++++ arch/arm64/kvm/pmu-emul.c | 3 +++ arch/arm64/kvm/rme.c | 25 +++++++++++++++++++++++++ 5 files changed, 51 insertions(+) diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 0d89ab1645c12..73d8f4813ff20 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -54,6 +54,7 @@ enum realm_state { * @num_aux: The number of auxiliary pages required by the RMM * @vmid: VMID to be used by the RMM for the realm * @ia_bits: Number of valid Input Address bits in the IPA + * @pmu_enabled: PMU enabled in the realm */ struct realm { enum realm_state state; @@ -64,6 +65,7 @@ struct realm { unsigned long num_aux; unsigned int vmid; unsigned int ia_bits; + bool pmu_enabled; }; /** diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index bd3033ab139df..719ac2cc25bf2 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -431,6 +431,7 @@ enum { /* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ #define KVM_CAP_ARM_RME_CFG_RPV 0 #define KVM_CAP_ARM_RME_CFG_HASH_ALGO 1 +#define KVM_CAP_ARM_RME_CFG_PMU 4 struct kvm_cap_arm_rme_config_item { __u32 cfg; @@ -445,6 +446,11 @@ struct kvm_cap_arm_rme_config_item { __u32 hash_algo; }; + /* cfg == KVM_CAP_ARM_RME_CFG_PMU */ + struct { + __u32 num_pmu_cntrs; + }; + /* Fix the size of the union */ __u8 reserved[256]; }; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fbafd1d18acda..4da5311e1bb31 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1211,6 +1212,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; run->flags = 0; while (ret > 0) { + bool pmu_stopped = false; + /* * Check conditions before entering the guest */ @@ -1239,6 +1242,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_pmu_flush_hwstate(vcpu); + if (vcpu_is_rec(vcpu)) { + struct kvm_pmu *pmu = &vcpu->arch.pmu; + + if (pmu->irq_level) { + pmu_stopped = true; + arm_pmu_set_phys_irq(false); + } + } + local_irq_disable(); kvm_vgic_flush_hwstate(vcpu); @@ -1343,6 +1355,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) preempt_enable(); + if (pmu_stopped) + arm_pmu_set_phys_irq(true); + /* * The ARMv8 architecture doesn't give the hypervisor * a mechanism to prevent a guest from dropping to AArch32 EL0 diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 20e7b97376cb7..4b0403d52bfec 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -340,6 +340,9 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; + if (vcpu_is_rec(vcpu)) + return vcpu->arch.rec.run->exit.pmu_ovf_status; + if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 127882e59a041..a19befd163694 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -1149,6 +1149,25 @@ static int config_realm_hash_algo(struct realm *realm, return 0; } +static int config_realm_pmu(struct realm *realm, + struct kvm_cap_arm_rme_config_item *cfg) +{ + int pmu_max_ctrs = u64_get_bits(rmm_feat_reg0, + RMI_FEATURE_REGISTER_0_PMU_NUM_CTRS); + + if (!rme_supports(RMI_FEATURE_REGISTER_0_PMU_EN)) + return -EINVAL; + + if (cfg->num_pmu_cntrs > pmu_max_ctrs) + return -EINVAL; + + realm->params->pmu_num_ctrs = cfg->num_pmu_cntrs; + realm->params->flags |= RMI_REALM_PARAM_FLAG_PMU; + + realm->pmu_enabled = true; + return 0; +} + static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) { struct kvm_cap_arm_rme_config_item cfg; @@ -1168,6 +1187,9 @@ static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) case KVM_CAP_ARM_RME_CFG_HASH_ALGO: r = config_realm_hash_algo(realm, &cfg); break; + case KVM_CAP_ARM_RME_CFG_PMU: + r = config_realm_pmu(realm, &cfg); + break; default: r = -EINVAL; } @@ -1370,6 +1392,9 @@ int kvm_create_rec(struct kvm_vcpu *vcpu) if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2)) return -EINVAL; + if (kvm_vcpu_has_pmu(vcpu) != realm->pmu_enabled) + return -EINVAL; + BUILD_BUG_ON(sizeof(*params) > PAGE_SIZE); BUILD_BUG_ON(sizeof(*rec->run) > PAGE_SIZE); -- Gitee From e022e47cab3334da3425520651416fc7a1340d87 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Fri, 5 May 2023 16:04:16 +0100 Subject: [PATCH 39/52] kvm: rme: Hide KVM_CAP_READONLY_MEM for realm guests community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- For protected memory read only isn't supported. While it may be possible to support read only for unprotected memory, this isn't supported at the present time. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4da5311e1bb31..9061241bb14ae 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -421,7 +421,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ARM_PSCI: case KVM_CAP_ARM_PSCI_0_2: - case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_VCPU_EVENTS: @@ -435,6 +434,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_COUNTER_OFFSET: r = 1; break; + case KVM_CAP_READONLY_MEM: case KVM_CAP_SET_GUEST_DEBUG: r = !kvm_is_realm(kvm); break; -- Gitee From cf4a10abf6b07a3772dc98341c64aba8e73b3c92 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 4 Dec 2023 14:48:12 +0000 Subject: [PATCH 40/52] arm64: RME: Propagate number of breakpoints and watchpoints to userspace community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- The RMM describes the maximum number of BPs/WPs available to the guest in the Feature Register 0. Propagate those numbers into ID_AA64DFR0_EL1, which is visible to userspace. A VMM needs this information in order to set up realm parameters. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_rme.h | 2 ++ arch/arm64/kvm/rme.c | 22 ++++++++++++++++++++++ arch/arm64/kvm/sys_regs.c | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 73d8f4813ff20..51b0aad8bbedb 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -87,6 +87,8 @@ void kvm_init_rme(void); u32 kvm_realm_ipa_limit(void); u32 kvm_realm_vgic_nr_lr(void); +u64 kvm_realm_reset_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); + bool kvm_rme_supports_sve(void); int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index a19befd163694..ae00d1cf91f1b 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -63,6 +63,28 @@ u32 kvm_realm_vgic_nr_lr(void) return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_GICV3_NUM_LRS); } +u64 kvm_realm_reset_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) +{ + u32 bps = u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_NUM_BPS); + u32 wps = u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_NUM_WPS); + u32 ctx_cmps; + + if (!kvm_is_realm(vcpu->kvm)) + return val; + + /* Ensure CTX_CMPs is still valid */ + ctx_cmps = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, val); + ctx_cmps = min(bps, ctx_cmps); + + val &= ~(ID_AA64DFR0_EL1_BRPs_MASK | ID_AA64DFR0_EL1_WRPs_MASK | + ID_AA64DFR0_EL1_CTX_CMPs); + val |= FIELD_PREP(ID_AA64DFR0_EL1_BRPs_MASK, bps) | + FIELD_PREP(ID_AA64DFR0_EL1_WRPs_MASK, wps) | + FIELD_PREP(ID_AA64DFR0_EL1_CTX_CMPs, ctx_cmps); + + return val; +} + static int get_start_level(struct realm *realm) { return 4 - stage2_pgtable_levels(realm->ia_bits); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6a3dddaddd94d..505424082c65d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1536,7 +1536,7 @@ static u64 read_sanitised_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, /* Hide BRBE from guests */ val &= ~ID_AA64DFR0_EL1_BRBE_MASK; - return val; + return kvm_realm_reset_id_aa64dfr0_el1(vcpu, val); } static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, -- Gitee From c46b26609bd383a6f079960cd73c8ceb0d1dd960 Mon Sep 17 00:00:00 2001 From: Yiwei Zhuang Date: Thu, 3 Apr 2025 11:33:12 +0800 Subject: [PATCH 41/52] arm64: RME: Set breakpoint and watchpoint cca inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- Allow userspace to configure the number of breakpoints and watchpoints of a Realm VM through ioctl. Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_rme.h | 2 ++ arch/arm64/include/uapi/asm/kvm.h | 7 +++++++ arch/arm64/kvm/rme.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 51b0aad8bbedb..c773dda34ce5a 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -86,6 +86,8 @@ struct realm_rec { void kvm_init_rme(void); u32 kvm_realm_ipa_limit(void); u32 kvm_realm_vgic_nr_lr(void); +u32 kvm_realm_get_num_brps(void); +u32 kvm_realm_get_num_wrps(void); u64 kvm_realm_reset_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 719ac2cc25bf2..a984062e9892f 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -431,6 +431,7 @@ enum { /* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ #define KVM_CAP_ARM_RME_CFG_RPV 0 #define KVM_CAP_ARM_RME_CFG_HASH_ALGO 1 +#define KVM_CAP_ARM_RME_CFG_DBG 3 #define KVM_CAP_ARM_RME_CFG_PMU 4 struct kvm_cap_arm_rme_config_item { @@ -446,6 +447,12 @@ struct kvm_cap_arm_rme_config_item { __u32 hash_algo; }; + /* cfg == KVM_CAP_ARM_RME_CFG_DBG */ + struct { + __u32 num_brps; + __u32 num_wrps; + }; + /* cfg == KVM_CAP_ARM_RME_CFG_PMU */ struct { __u32 num_pmu_cntrs; diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index ae00d1cf91f1b..00953426dc8e2 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -63,6 +63,16 @@ u32 kvm_realm_vgic_nr_lr(void) return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_GICV3_NUM_LRS); } +u32 kvm_realm_get_num_brps(void) +{ + return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_NUM_BPS); +} + +u32 kvm_realm_get_num_wrps(void) +{ + return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_NUM_WPS); +} + u64 kvm_realm_reset_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) { u32 bps = u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_NUM_BPS); @@ -1190,6 +1200,21 @@ static int config_realm_pmu(struct realm *realm, return 0; } +static int config_realm_debug(struct realm *realm, + struct kvm_cap_arm_rme_config_item *cfg) +{ + if (cfg->num_brps > kvm_realm_get_num_brps()) + return -EINVAL; + + if (cfg->num_wrps > kvm_realm_get_num_wrps()) + return -EINVAL; + + realm->params->num_bps = cfg->num_brps; + realm->params->num_wps = cfg->num_wrps; + + return 0; +} + static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) { struct kvm_cap_arm_rme_config_item cfg; @@ -1212,6 +1237,9 @@ static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) case KVM_CAP_ARM_RME_CFG_PMU: r = config_realm_pmu(realm, &cfg); break; + case KVM_CAP_ARM_RME_CFG_DBG: + r = config_realm_debug(realm, &cfg); + break; default: r = -EINVAL; } -- Gitee From c2710d0094c338ec12e0abe8519d93bd78843329 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 8 Dec 2023 09:04:29 +0000 Subject: [PATCH 42/52] arm64: RME: Propagate max SVE vector length from RMM community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- RMM provides the maximum vector length it supports for a guest in its feature register. Make it visible to the rest of KVM and to userspace via KVM_REG_ARM64_SVE_VLS. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_rme.h | 1 + arch/arm64/kvm/guest.c | 2 +- arch/arm64/kvm/reset.c | 12 ++++++++++-- arch/arm64/kvm/rme.c | 6 ++++++ 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 6b252c9c92860..30b1244b33568 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -78,8 +78,8 @@ enum kvm_mode kvm_get_mode(void); static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; }; #endif -extern unsigned int __ro_after_init kvm_sve_max_vl; int __init kvm_arm_init_sve(void); +unsigned int kvm_sve_get_max_vl(struct kvm *kvm); u32 __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index c773dda34ce5a..513708916e074 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -88,6 +88,7 @@ u32 kvm_realm_ipa_limit(void); u32 kvm_realm_vgic_nr_lr(void); u32 kvm_realm_get_num_brps(void); u32 kvm_realm_get_num_wrps(void); +unsigned int kvm_realm_sve_max_vl(void); u64 kvm_realm_reset_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 7fe28755ce21e..69bb19c63cf92 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -415,7 +415,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (vq_present(vqs, vq)) max_vq = vq; - if (max_vq > sve_vq_from_vl(kvm_sve_max_vl)) + if (max_vq > sve_vq_from_vl(kvm_sve_get_max_vl(vcpu->kvm))) return -EINVAL; /* diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f27636690a216..cd1b39690378e 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -45,7 +45,7 @@ static u32 __ro_after_init kvm_ipa_limit; #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ PSR_AA32_I_BIT | PSR_AA32_F_BIT) -unsigned int __ro_after_init kvm_sve_max_vl; +static unsigned int __ro_after_init kvm_sve_max_vl; int __init kvm_arm_init_sve(void) { @@ -73,12 +73,20 @@ int __init kvm_arm_init_sve(void) return 0; } +unsigned int kvm_sve_get_max_vl(struct kvm *kvm) +{ + if (kvm_is_realm(kvm)) + return kvm_realm_sve_max_vl(); + else + return kvm_sve_max_vl; +} + static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) { if (!system_supports_sve()) return -EINVAL; - vcpu->arch.sve_max_vl = kvm_sve_max_vl; + vcpu->arch.sve_max_vl = kvm_sve_get_max_vl(vcpu->kvm); /* * Userspace can still customize the vector lengths by writing diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 00953426dc8e2..08ea8351a4d0a 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -73,6 +73,12 @@ u32 kvm_realm_get_num_wrps(void) return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_NUM_WPS); } +unsigned int kvm_realm_sve_max_vl(void) +{ + return sve_vl_from_vq(u64_get_bits(rmm_feat_reg0, + RMI_FEATURE_REGISTER_0_SVE_VL) + 1); +} + u64 kvm_realm_reset_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) { u32 bps = u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_NUM_BPS); -- Gitee From f9f2e2d0e4029ee9932d515f27684b3b9736dabf Mon Sep 17 00:00:00 2001 From: Yiwei Zhuang Date: Thu, 3 Apr 2025 12:04:51 +0800 Subject: [PATCH 43/52] arm64: RME: Configure max SVE vector length for a Realm cca inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- Allow userspace to configure max SVE vector legnth for a Realm. Signed-off-by: Yiwei Zhuang --- arch/arm64/include/uapi/asm/kvm.h | 6 ++++++ arch/arm64/kvm/rme.c | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index a984062e9892f..050b29651b6c3 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -431,6 +431,7 @@ enum { /* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ #define KVM_CAP_ARM_RME_CFG_RPV 0 #define KVM_CAP_ARM_RME_CFG_HASH_ALGO 1 +#define KVM_CAP_ARM_RME_CFG_SVE 2 #define KVM_CAP_ARM_RME_CFG_DBG 3 #define KVM_CAP_ARM_RME_CFG_PMU 4 @@ -447,6 +448,11 @@ struct kvm_cap_arm_rme_config_item { __u32 hash_algo; }; + /* cfg == KVM_CAP_ARM_RME_CFG_SVE */ + struct { + __u32 sve_vq; + }; + /* cfg == KVM_CAP_ARM_RME_CFG_DBG */ struct { __u32 num_brps; diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 08ea8351a4d0a..86b283ac22f85 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -1187,6 +1187,24 @@ static int config_realm_hash_algo(struct realm *realm, return 0; } +static int config_realm_sve(struct realm *realm, + struct kvm_cap_arm_rme_config_item *cfg) +{ + int max_sve_vq = u64_get_bits(rmm_feat_reg0, + RMI_FEATURE_REGISTER_0_SVE_VL); + + if (!kvm_rme_supports_sve()) + return -EINVAL; + + if (cfg->sve_vq > max_sve_vq) + return -EINVAL; + + realm->params->sve_vl = cfg->sve_vq; + realm->params->flags |= RMI_REALM_PARAM_FLAG_SVE; + + return 0; +} + static int config_realm_pmu(struct realm *realm, struct kvm_cap_arm_rme_config_item *cfg) { @@ -1240,6 +1258,9 @@ static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) case KVM_CAP_ARM_RME_CFG_HASH_ALGO: r = config_realm_hash_algo(realm, &cfg); break; + case KVM_CAP_ARM_RME_CFG_SVE: + r = config_realm_sve(realm, &cfg); + break; case KVM_CAP_ARM_RME_CFG_PMU: r = config_realm_pmu(realm, &cfg); break; -- Gitee From 62c7af87c97cdb924be2d5c1a78af76199ebcd14 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 4 Jan 2024 12:33:36 +0000 Subject: [PATCH 44/52] arm64: RME: Provide register list for unfinalized RME RECs community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- KVM_GET_REG_LIST should not be called before SVE is finalized. The ioctl handler currently returns -EPERM in this case. But because it uses kvm_arm_vcpu_is_finalized(), it now also rejects the call for unfinalized REC even though finalizing the REC can only be done late, after Realm descriptor creation. Move the check to copy_sve_reg_indices(). One adverse side effect of this change is that a KVM_GET_REG_LIST call that only probes for the array size will now succeed even if SVE is not finalized, but that seems harmless since the following KVM_GET_REG_LIST with the full array will fail. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/arm.c | 4 ---- arch/arm64/kvm/guest.c | 9 +++------ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9061241bb14ae..c998286123ca0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1806,10 +1806,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (unlikely(!kvm_vcpu_initialized(vcpu))) break; - r = -EPERM; - if (!kvm_arm_vcpu_is_finalized(vcpu)) - break; - r = -EFAULT; if (copy_from_user(®_list, user_list, sizeof(reg_list))) break; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 69bb19c63cf92..3a247bb5298ce 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -712,12 +712,9 @@ static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) { const unsigned int slices = vcpu_sve_slices(vcpu); - if (!vcpu_has_sve(vcpu)) + if (!vcpu_has_sve(vcpu) || !kvm_arm_vcpu_sve_finalized(vcpu)) return 0; - /* Policed by KVM_GET_REG_LIST: */ - WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu)); - return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */) + 1; /* KVM_REG_ARM64_SVE_VLS */ } @@ -733,8 +730,8 @@ static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu, if (!vcpu_has_sve(vcpu)) return 0; - /* Policed by KVM_GET_REG_LIST: */ - WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu)); + if (!kvm_arm_vcpu_sve_finalized(vcpu)) + return -EPERM; /* * Enumerate this first, so that userspace can save/restore in -- Gitee From cb802e55021f4d1560b4d593a9bd6940b18f1bb0 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 4 Jan 2024 17:46:05 +0000 Subject: [PATCH 45/52] arm64: RME: Provide accurate register list community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Userspace can set a few registers with KVM_SET_ONE_REG (9 GP registers at runtime, and 3 system registers during initialization). Update the register list returned by KVM_GET_REG_LIST. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/guest.c | 19 ++++++++---- arch/arm64/kvm/hypercalls.c | 4 +-- arch/arm64/kvm/sys_regs.c | 58 ++++++++++++++++++++++++++++--------- 3 files changed, 60 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3a247bb5298ce..09f2a5761943b 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -659,8 +659,6 @@ static const u64 timer_reg_list[] = { KVM_REG_ARM_PTIMER_CVAL, }; -#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) - static bool is_timer_reg(u64 index) { switch (index) { @@ -675,9 +673,14 @@ static bool is_timer_reg(u64 index) return false; } +static unsigned long num_timer_regs(struct kvm_vcpu *vcpu) +{ + return kvm_is_realm(vcpu->kvm) ? 0 : ARRAY_SIZE(timer_reg_list); +} + static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { - for (int i = 0; i < NUM_TIMER_REGS; i++) { + for (int i = 0; i < num_timer_regs(vcpu); i++) { if (put_user(timer_reg_list[i], uindices)) return -EFAULT; uindices++; @@ -715,6 +718,9 @@ static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) if (!vcpu_has_sve(vcpu) || !kvm_arm_vcpu_sve_finalized(vcpu)) return 0; + if (kvm_is_realm(vcpu->kvm)) + return 1; /* KVM_REG_ARM64_SVE_VLS */ + return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */) + 1; /* KVM_REG_ARM64_SVE_VLS */ } @@ -742,6 +748,9 @@ static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu, return -EFAULT; ++num_regs; + if (kvm_is_realm(vcpu->kvm)) + return num_regs; + for (i = 0; i < slices; i++) { for (n = 0; n < SVE_NUM_ZREGS; n++) { reg = KVM_REG_ARM64_SVE_ZREG(n, i); @@ -779,7 +788,7 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) res += num_sve_regs(vcpu); res += kvm_arm_num_sys_reg_descs(vcpu); res += kvm_arm_get_fw_num_regs(vcpu); - res += NUM_TIMER_REGS; + res += num_timer_regs(vcpu); return res; } @@ -811,7 +820,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) ret = copy_timer_indices(vcpu, uindices); if (ret < 0) return ret; - uindices += NUM_TIMER_REGS; + uindices += num_timer_regs(vcpu); return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 4c9fc5df5142b..94cf44e7b93f1 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -418,14 +418,14 @@ void kvm_arm_teardown_hypercalls(struct kvm *kvm) int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { - return ARRAY_SIZE(kvm_arm_fw_reg_ids); + return kvm_is_realm(vcpu->kvm) ? 0 : ARRAY_SIZE(kvm_arm_fw_reg_ids); } int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { int i; - for (i = 0; i < ARRAY_SIZE(kvm_arm_fw_reg_ids); i++) { + for (i = 0; i < kvm_arm_get_fw_num_regs(vcpu); i++) { if (put_user(kvm_arm_fw_reg_ids[i], uindices++)) return -EFAULT; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 505424082c65d..5ab097e7d3abd 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3600,18 +3600,18 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); } -static unsigned int num_demux_regs(void) +static unsigned int num_demux_regs(struct kvm_vcpu *vcpu) { - return CSSELR_MAX; + return kvm_is_realm(vcpu->kvm) ? 0 : CSSELR_MAX; } -static int write_demux_regids(u64 __user *uindices) +static int write_demux_regids(struct kvm_vcpu *vcpu, u64 __user *uindices) { u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; unsigned int i; val |= KVM_REG_ARM_DEMUX_ID_CCSIDR; - for (i = 0; i < CSSELR_MAX; i++) { + for (i = 0; i < num_demux_regs(vcpu); i++) { if (put_user(val | i, uindices)) return -EFAULT; uindices++; @@ -3619,6 +3619,23 @@ static int write_demux_regids(u64 __user *uindices) return 0; } +static unsigned int num_invariant_regs(struct kvm_vcpu *vcpu) +{ + return kvm_is_realm(vcpu->kvm) ? 0 : ARRAY_SIZE(invariant_sys_regs); +} + +static int write_invariant_regids(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + + for (i = 0; i < num_invariant_regs(vcpu); i++) { + if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices)) + return -EFAULT; + uindices++; + } + return 0; +} + static u64 sys_reg_to_index(const struct sys_reg_desc *reg) { return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | @@ -3642,11 +3659,27 @@ static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) return true; } +static bool kvm_realm_sys_reg_hidden_user(const struct kvm_vcpu *vcpu, u64 reg) +{ + if (!kvm_is_realm(vcpu->kvm)) + return false; + + switch (reg) { + case SYS_ID_AA64DFR0_EL1: + case SYS_PMCR_EL0: + return false; + } + return true; +} + static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 __user **uind, unsigned int *total) { + if (kvm_realm_sys_reg_hidden_user(vcpu, reg_to_encoding(rd))) + return 0; + /* * Ignore registers we trap but don't save, * and for which no custom user accessor is provided. @@ -3684,29 +3717,26 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu) { - return ARRAY_SIZE(invariant_sys_regs) - + num_demux_regs() + return num_invariant_regs(vcpu) + + num_demux_regs(vcpu) + walk_sys_regs(vcpu, (u64 __user *)NULL); } int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { - unsigned int i; int err; - /* Then give them all the invariant registers' indices. */ - for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) { - if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices)) - return -EFAULT; - uindices++; - } + err = write_invariant_regids(vcpu, uindices); + if (err) + return err; + uindices += num_invariant_regs(vcpu); err = walk_sys_regs(vcpu, uindices); if (err < 0) return err; uindices += err; - return write_demux_regids(uindices); + return write_demux_regids(vcpu, uindices); } int __init kvm_sys_reg_table_init(void) -- Gitee From 7f3e40f942459f269a5e3c852ddff469beca4ab1 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 9 Dec 2024 16:46:03 +0000 Subject: [PATCH 46/52] KVM: arm64: Expose KVM_ARM_VCPU_REC to user space community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Increment KVM_VCPU_MAX_FEATURES to expose the new capability to user space. *NOTE*: This also exposes KVM_ARM_VCPU_HAS_EL2 (as it is one less than KVM_ARM_VCPU_REC) - so this currently depends on nested virt being 'finished' before merging. See below for discussion: https://lore.kernel.org/r/a7011738-a084-46fa-947f-395d90b37f8b%40arm.com Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 30b1244b33568..093684c70b1fd 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -42,7 +42,7 @@ #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS -#define KVM_VCPU_MAX_FEATURES 7 +#define KVM_VCPU_MAX_FEATURES 9 #define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1) #define KVM_REQ_SLEEP \ -- Gitee From 7ba443f776ea1a5148e1b10bd14bf615a650b692 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 20 Jun 2022 11:54:11 +0100 Subject: [PATCH 47/52] KVM: arm64: Allow activating realms community inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N Reference: https://lore.kernel.org/kvm/yq5amsgsrzva.fsf@kernel.org/T/#meb4cb12031992f1275fe7f5b40ac38c1e883ac87 -------------------------------- Add the ioctl to activate a realm and set the static branch to enable access to the realm functionality if the RMM is detected. Signed-off-by: Steven Price Signed-off-by: Yiwei Zhuang --- arch/arm64/kvm/rme.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 86b283ac22f85..1ae33e15181a7 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -1102,6 +1102,20 @@ static int kvm_init_ipa_range_realm(struct kvm *kvm, return realm_init_ipa_state(realm, addr, end); } +static int kvm_activate_realm(struct kvm *kvm) +{ + struct realm *realm = &kvm->arch.realm; + + if (kvm_realm_state(kvm) != REALM_STATE_NEW) + return -EINVAL; + + if (rmi_realm_activate(virt_to_phys(realm->rd))) + return -ENXIO; + + WRITE_ONCE(realm->state, REALM_STATE_ACTIVE); + return 0; +} + /* Protects access to rme_vmid_bitmap */ static DEFINE_SPINLOCK(rme_vmid_lock); static unsigned long *rme_vmid_bitmap; @@ -1312,6 +1326,9 @@ int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = kvm_populate_realm(kvm, &args); break; } + case KVM_CAP_ARM_RME_ACTIVATE_REALM: + r = kvm_activate_realm(kvm); + break; default: r = -EINVAL; break; @@ -1583,5 +1600,5 @@ void kvm_init_rme(void) if (rme_vmid_init()) return; - /* Future patch will enable static branch kvm_rme_is_available */ + static_branch_enable(&kvm_rme_is_available); } -- Gitee From 1e9f867db58bfe3b8211c2321da6f4ef2b8a13a7 Mon Sep 17 00:00:00 2001 From: yxk Date: Mon, 7 Apr 2025 19:02:22 +0000 Subject: [PATCH 48/52] Add CCA base operations. virtcca inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- Add common code for CCA and VirtCCA. We Refactor VirtCCA to reuse CCA code where possible, differentiated by `cca_cvm_type` Signed-off-by: hjx_gitff Signed-off-by: yxk Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/cca_base.h | 62 +++++++++++ arch/arm64/include/asm/cca_type.h | 14 +++ arch/arm64/include/asm/virtcca_cvm_host.h | 8 +- arch/arm64/kernel/virtcca_cvm_host.c | 4 +- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/cca_base.c | 123 ++++++++++++++++++++++ 6 files changed, 209 insertions(+), 4 deletions(-) create mode 100644 arch/arm64/include/asm/cca_base.h create mode 100644 arch/arm64/include/asm/cca_type.h create mode 100644 arch/arm64/kvm/cca_base.c diff --git a/arch/arm64/include/asm/cca_base.h b/arch/arm64/include/asm/cca_base.h new file mode 100644 index 0000000000000..685240d6abe38 --- /dev/null +++ b/arch/arm64/include/asm/cca_base.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved. + */ +#ifndef __CCA_BASE_H +#define __CCA_BASE_H + +#include +#include +#include + +#include +#include +#include +#include +#include + +struct cca_operations { + int (*enable_cap)(struct kvm *kvm, struct kvm_enable_cap *cap); + int (*init_realm_vm)(struct kvm *kvm); + int (*realm_vm_enter)(struct kvm_vcpu *vcpu); + int (*realm_vm_exit)(struct kvm_vcpu *vcpu, int ret); + void (*init_sel2_hypervisor)(void); + int (*psci_complete)(struct kvm_vcpu *calling, struct kvm_vcpu *target, + unsigned long status); + int (*create_vcpu)(struct kvm_vcpu *vcpu); + void (*destroy_vcpu)(struct kvm_vcpu *vcpu); + void (*destroy_vm)(struct kvm *kvm); + int (*enable_realm)(struct kvm *kvm); + u32 (*vgic_nr_lr)(void); +} ____cacheline_aligned; + +struct cca_share_pages_operations { + int (*alloc_shared_pages)(int p1, gfp_t p2, unsigned int p3); + void (*free_shared_pages)(void *p1, unsigned int p2); +} ____cacheline_aligned; + +int __init cca_operations_register(enum cca_cvm_type type, struct cca_operations *ops); +int __init cca_share_pages_ops_register(enum cca_cvm_type type, + struct cca_share_pages_operations *ops); + +int kvm_get_cvm_type(void); + +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); +void kvm_init_rme(void); + +int kvm_rec_enter(struct kvm_vcpu *vcpu); +int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret); + +int kvm_init_realm_vm(struct kvm *kvm); +void kvm_destroy_realm(struct kvm *kvm); + +int kvm_create_rec(struct kvm_vcpu *vcpu); +void kvm_destroy_rec(struct kvm_vcpu *vcpu); + +int realm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsigned long status); + +u32 kvm_realm_vgic_nr_lr(void); + +bool _kvm_is_realm(struct kvm *kvm); + +#endif /* __CCA_BASE_H */ diff --git a/arch/arm64/include/asm/cca_type.h b/arch/arm64/include/asm/cca_type.h new file mode 100644 index 0000000000000..480e8e266af4a --- /dev/null +++ b/arch/arm64/include/asm/cca_type.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved. + */ +#ifndef __CCA_TYPE_H +#define __CCA_TYPE_H + +enum cca_cvm_type { + ARMCCA_CVM, + VIRTCCA_CVM, + CCA_CVM_MAX, +}; + +#endif /* __CCA_TYPE_H */ diff --git a/arch/arm64/include/asm/virtcca_cvm_host.h b/arch/arm64/include/asm/virtcca_cvm_host.h index c379ad17cffef..3d55928874adf 100644 --- a/arch/arm64/include/asm/virtcca_cvm_host.h +++ b/arch/arm64/include/asm/virtcca_cvm_host.h @@ -4,6 +4,7 @@ */ #ifndef __VIRTCCA_CVM_HOST_H #define __VIRTCCA_CVM_HOST_H +#include #ifdef CONFIG_HISI_VIRTCCA_HOST @@ -11,6 +12,7 @@ #define UEFI_SIZE 0x8000000 bool is_virtcca_cvm_enable(void); +void set_cca_cvm_type(int type); #else @@ -19,5 +21,7 @@ static inline bool is_virtcca_cvm_enable(void) return false; } -#endif /* CONFIG_HISI_VIRTCCA_GUEST */ -#endif /* __VIRTCCA_CVM_GUEST_H */ +static inline void set_cca_cvm_type(int type) {} + +#endif /* CONFIG_HISI_VIRTCCA_HOST */ +#endif /* __VIRTCCA_CVM_HOST_H */ diff --git a/arch/arm64/kernel/virtcca_cvm_host.c b/arch/arm64/kernel/virtcca_cvm_host.c index 4522782b1342e..c70c90542427d 100644 --- a/arch/arm64/kernel/virtcca_cvm_host.c +++ b/arch/arm64/kernel/virtcca_cvm_host.c @@ -28,8 +28,10 @@ static int __init setup_virtcca_cvm_host(char *str) if (ret) { pr_warn("Unable to parse cvm_guest.\n"); } else { - if (val) + if (val) { static_branch_enable(&virtcca_cvm_is_enable); + set_cca_cvm_type(VIRTCCA_CVM); + } } return ret; } diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index dacae31420058..fb78e8674c48b 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -21,7 +21,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o pvsched.o \ vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/vgic-debug.o \ - rme.o rme-exit.o + rme.o rme-exit.o cca_base.o kvm-$(CONFIG_VIRT_PLAT_DEV) += vgic/shadow_dev.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o diff --git a/arch/arm64/kvm/cca_base.c b/arch/arm64/kvm/cca_base.c new file mode 100644 index 0000000000000..19999d4a8a064 --- /dev/null +++ b/arch/arm64/kvm/cca_base.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved. + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +static int cca_cvm_type; +static struct cca_operations *g_cca_operations[CCA_CVM_MAX]; + +/* please use 'cca_cvm_type=$type' to enable cca cvm feature */ +static int __init setup_cca_cvm_type(char *str) +{ + int ret; + unsigned int val; + + if (!str) + return 0; + + ret = kstrtouint(str, 10, &val); + if (ret) { + pr_warn("Unable to parse cca cvm_type.\n"); + } else { + if (val >= ARMCCA_CVM && val < CCA_CVM_MAX) + cca_cvm_type = val; + } + return ret; +} +early_param("cca_cvm_type", setup_cca_cvm_type); + +int __init cca_operations_register(enum cca_cvm_type type, struct cca_operations *ops) +{ + if (type >= CCA_CVM_MAX) + return -EINVAL; + + g_cca_operations[type] = ops; + return 0; +} + +int kvm_get_cvm_type(void) +{ + return cca_cvm_type; +} + +void set_cca_cvm_type(int type) +{ + cca_cvm_type = type; +} +EXPORT_SYMBOL_GPL(set_cca_cvm_type); + +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + if (g_cca_operations[cca_cvm_type]->enable_cap) + return g_cca_operations[cca_cvm_type]->enable_cap(kvm, cap); + return 0; +} + +int kvm_init_realm_vm(struct kvm *kvm) +{ + if (g_cca_operations[cca_cvm_type]->init_realm_vm) + return g_cca_operations[cca_cvm_type]->init_realm_vm(kvm); + return 0; +} + +int kvm_rec_enter(struct kvm_vcpu *vcpu) +{ + if (g_cca_operations[cca_cvm_type]->realm_vm_enter) + return g_cca_operations[cca_cvm_type]->realm_vm_enter(vcpu); + return 0; +} + +int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) +{ + if (g_cca_operations[cca_cvm_type]->realm_vm_exit) + return g_cca_operations[cca_cvm_type]->realm_vm_exit(vcpu, rec_run_ret); + return 0; +} + +void kvm_destroy_realm(struct kvm *kvm) +{ + if (g_cca_operations[cca_cvm_type]->destroy_vm) + g_cca_operations[cca_cvm_type]->destroy_vm(kvm); +} + +int kvm_create_rec(struct kvm_vcpu *vcpu) +{ + if (g_cca_operations[cca_cvm_type]->create_vcpu) + return g_cca_operations[cca_cvm_type]->create_vcpu(vcpu); + return 0; +} + +void kvm_destroy_rec(struct kvm_vcpu *vcpu) +{ + if (g_cca_operations[cca_cvm_type]->destroy_vcpu) + g_cca_operations[cca_cvm_type]->destroy_vcpu(vcpu); +} + +void kvm_init_rme(void) +{ + if (g_cca_operations[cca_cvm_type]->init_sel2_hypervisor) + g_cca_operations[cca_cvm_type]->init_sel2_hypervisor(); +} + +int realm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsigned long status) +{ + if (g_cca_operations[cca_cvm_type]->psci_complete) + return g_cca_operations[cca_cvm_type]->psci_complete(calling, target, status); + return 0; +} + +u32 kvm_realm_vgic_nr_lr(void) +{ + if (g_cca_operations[cca_cvm_type]->vgic_nr_lr) + return g_cca_operations[cca_cvm_type]->vgic_nr_lr(); + return 0; +} -- Gitee From c14cb02caf03980fdbba19fc2b0e1f5645b7c3ad Mon Sep 17 00:00:00 2001 From: yxk Date: Mon, 7 Apr 2025 19:06:07 +0000 Subject: [PATCH 49/52] Adapt realm operations by cca_base. virtcca inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- Refactor CCA implementation to adopt shared code, ensuring CCA module utilizes cca_base with minimal modification. Signed-off-by: hjx_gitff Signed-off-by: yxk Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/cca_base.h | 2 -- arch/arm64/include/asm/kvm_emulate.h | 11 ++++++++ arch/arm64/include/asm/kvm_rme.h | 20 +++++++-------- arch/arm64/kvm/guest.c | 2 +- arch/arm64/kvm/inject_fault.c | 2 +- arch/arm64/kvm/mmu.c | 8 +++--- arch/arm64/kvm/pmu-emul.c | 1 + arch/arm64/kvm/reset.c | 2 +- arch/arm64/kvm/rme-exit.c | 2 +- arch/arm64/kvm/rme.c | 38 +++++++++++++++++++++------- 10 files changed, 59 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/cca_base.h b/arch/arm64/include/asm/cca_base.h index 685240d6abe38..22dbf2e687380 100644 --- a/arch/arm64/include/asm/cca_base.h +++ b/arch/arm64/include/asm/cca_base.h @@ -57,6 +57,4 @@ int realm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsig u32 kvm_realm_vgic_nr_lr(void); -bool _kvm_is_realm(struct kvm *kvm); - #endif /* __CCA_BASE_H */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 4b155fa39d2a6..2cc8007c522f2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -20,6 +20,7 @@ #include #include #include +#include #define CURRENT_EL_SP_EL0_VECTOR 0x0 #define CURRENT_EL_SP_ELx_VECTOR 0x200 @@ -650,6 +651,11 @@ static inline bool kvm_is_realm(struct kvm *kvm) return false; } +static inline bool _kvm_is_realm(struct kvm *kvm) +{ + return kvm_is_realm(kvm) && (kvm_get_cvm_type() == ARMCCA_CVM); +} + static inline enum realm_state kvm_realm_state(struct kvm *kvm) { return READ_ONCE(kvm->arch.realm.state); @@ -677,6 +683,11 @@ static inline bool vcpu_is_rec(struct kvm_vcpu *vcpu) return false; } +static inline bool _vcpu_is_rec(struct kvm_vcpu *vcpu) +{ + return vcpu_is_rec(vcpu) && (kvm_get_cvm_type() == ARMCCA_CVM); +} + static inline bool kvm_arm_vcpu_rec_finalized(struct kvm_vcpu *vcpu) { return vcpu->arch.rec.mpidr != INVALID_HWID; diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h index 513708916e074..93389c5db752e 100644 --- a/arch/arm64/include/asm/kvm_rme.h +++ b/arch/arm64/include/asm/kvm_rme.h @@ -83,9 +83,9 @@ struct realm_rec { struct rec_run *run; }; -void kvm_init_rme(void); +void _kvm_init_rme(void); u32 kvm_realm_ipa_limit(void); -u32 kvm_realm_vgic_nr_lr(void); +u32 _kvm_realm_vgic_nr_lr(void); u32 kvm_realm_get_num_brps(void); u32 kvm_realm_get_num_wrps(void); unsigned int kvm_realm_sve_max_vl(void); @@ -94,15 +94,15 @@ u64 kvm_realm_reset_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); bool kvm_rme_supports_sve(void); -int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); -int kvm_init_realm_vm(struct kvm *kvm); -void kvm_destroy_realm(struct kvm *kvm); +int _kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); +int _kvm_init_realm_vm(struct kvm *kvm); +void _kvm_destroy_realm(struct kvm *kvm); void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits); -int kvm_create_rec(struct kvm_vcpu *vcpu); -void kvm_destroy_rec(struct kvm_vcpu *vcpu); +int _kvm_create_rec(struct kvm_vcpu *vcpu); +void _kvm_destroy_rec(struct kvm_vcpu *vcpu); -int kvm_rec_enter(struct kvm_vcpu *vcpu); -int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_status); +int _kvm_rec_enter(struct kvm_vcpu *vcpu); +int _handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_status); void kvm_realm_unmap_range(struct kvm *kvm, unsigned long ipa, @@ -122,7 +122,7 @@ int realm_set_ipa_state(struct kvm_vcpu *vcpu, unsigned long addr, unsigned long end, unsigned long ripas, unsigned long *top_ipa); -int realm_psci_complete(struct kvm_vcpu *calling, +int _realm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsigned long status); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 09f2a5761943b..42dd9ba8daa1c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -870,7 +870,7 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) return -EINVAL; - if (kvm_is_realm(vcpu->kvm) && !validate_realm_set_reg(vcpu, reg)) + if (_kvm_is_realm(vcpu->kvm) && !validate_realm_set_reg(vcpu, reg)) return -EINVAL; switch (reg->id & KVM_REG_ARM_COPROC_MASK) { diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 1542b2605113c..ee19708fc611a 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -165,7 +165,7 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) */ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { - if (unlikely(vcpu_is_rec(vcpu))) + if (unlikely(_vcpu_is_rec(vcpu))) vcpu->arch.rec.run->enter.flags |= REC_ENTER_INJECT_SEA; else if (vcpu_el1_is_32bit(vcpu)) inject_abt32(vcpu, false, addr); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1e053f06edcc3..fece849076373 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -884,7 +884,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t u64 mmfr0, mmfr1; u32 phys_shift; - if (kvm_is_realm(kvm)) + if (_kvm_is_realm(kvm)) kvm_ipa_limit = kvm_realm_ipa_limit(); phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); @@ -1518,7 +1518,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; - } else if (kvm_is_realm(kvm)) { + } else if (_kvm_is_realm(kvm)) { // Force PTE level mappings for realms force_pte = true; vma_shift = PAGE_SHIFT; @@ -1622,7 +1622,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * backed by a THP and thus use block mapping if possible. */ /* FIXME: We shouldn't need to disable this for realms */ - if (vma_pagesize == PAGE_SIZE && !(force_pte || device || kvm_is_realm(kvm))) { + if (vma_pagesize == PAGE_SIZE && !(force_pte || device || _kvm_is_realm(kvm))) { if (fault_status == ESR_ELx_FSC_PERM && fault_granule > PAGE_SIZE) vma_pagesize = fault_granule; @@ -1670,7 +1670,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule) ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); - else if (kvm_is_realm(kvm)) + else if (_kvm_is_realm(kvm)) ret = realm_map_ipa(kvm, fault_ipa, pfn, vma_pagesize, prot, memcache); else diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 4b0403d52bfec..879f9da1fc74e 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -15,6 +15,7 @@ #include #include #include +#include #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index cd1b39690378e..191658c8ee801 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -162,7 +162,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) if (vcpu_has_sve(vcpu) && !kvm_arm_vcpu_sve_finalized(vcpu)) return false; - if (kvm_is_realm(vcpu->kvm) && + if (_kvm_is_realm(vcpu->kvm) && !(vcpu_is_rec(vcpu) && kvm_arm_vcpu_rec_finalized(vcpu) && READ_ONCE(vcpu->kvm->arch.realm.state) == REALM_STATE_ACTIVE)) return false; diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c index 033f63449487a..53b60bbfc20b8 100644 --- a/arch/arm64/kvm/rme-exit.c +++ b/arch/arm64/kvm/rme-exit.c @@ -135,7 +135,7 @@ static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu) * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. */ -int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) +int _handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) { struct realm_rec *rec = &vcpu->arch.rec; u8 esr_ec = ESR_ELx_EC(rec->run->exit.esr); diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c index 1ae33e15181a7..588aa69cfb74c 100644 --- a/arch/arm64/kvm/rme.c +++ b/arch/arm64/kvm/rme.c @@ -12,6 +12,7 @@ #include #include +#include static unsigned long rmm_feat_reg0; @@ -58,7 +59,7 @@ u32 kvm_realm_ipa_limit(void) return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ); } -u32 kvm_realm_vgic_nr_lr(void) +u32 _kvm_realm_vgic_nr_lr(void) { return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_GICV3_NUM_LRS); } @@ -161,7 +162,7 @@ static void free_delegated_granule(phys_addr_t phys) free_page((unsigned long)phys_to_virt(phys)); } -int realm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, +int _realm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsigned long status) { int ret; @@ -1288,7 +1289,7 @@ static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) return r; } -int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +int _kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { int r = 0; @@ -1337,7 +1338,7 @@ int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) return r; } -void kvm_destroy_realm(struct kvm *kvm) +void _kvm_destroy_realm(struct kvm *kvm) { struct realm *realm = &kvm->arch.realm; size_t pgd_size = kvm_pgtable_stage2_pgd_size(kvm->arch.vtcr); @@ -1403,7 +1404,7 @@ static void kvm_complete_ripas_change(struct kvm_vcpu *vcpu) } while (top_ipa < top); } -int kvm_rec_enter(struct kvm_vcpu *vcpu) +int _kvm_rec_enter(struct kvm_vcpu *vcpu) { struct realm_rec *rec = &vcpu->arch.rec; @@ -1466,7 +1467,7 @@ static int alloc_rec_aux(struct page **aux_pages, return ret; } -int kvm_create_rec(struct kvm_vcpu *vcpu) +int _kvm_create_rec(struct kvm_vcpu *vcpu) { struct user_pt_regs *vcpu_regs = vcpu_gp_regs(vcpu); unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); @@ -1546,7 +1547,7 @@ int kvm_create_rec(struct kvm_vcpu *vcpu) return r; } -void kvm_destroy_rec(struct kvm_vcpu *vcpu) +void _kvm_destroy_rec(struct kvm_vcpu *vcpu) { struct realm *realm = &vcpu->kvm->arch.realm; struct realm_rec *rec = &vcpu->arch.rec; @@ -1572,7 +1573,7 @@ void kvm_destroy_rec(struct kvm_vcpu *vcpu) free_delegated_granule(rec_page_phys); } -int kvm_init_realm_vm(struct kvm *kvm) +int _kvm_init_realm_vm(struct kvm *kvm) { struct realm_params *params; @@ -1584,7 +1585,7 @@ int kvm_init_realm_vm(struct kvm *kvm) return 0; } -void kvm_init_rme(void) +void _kvm_init_rme(void) { if (PAGE_SIZE != SZ_4K) /* Only 4k page size on the host is supported */ @@ -1602,3 +1603,22 @@ void kvm_init_rme(void) static_branch_enable(&kvm_rme_is_available); } + +static struct cca_operations armcca_operations = { + .enable_cap = _kvm_realm_enable_cap, + .init_realm_vm = _kvm_init_realm_vm, + .realm_vm_enter = _kvm_rec_enter, + .realm_vm_exit = _handle_rec_exit, + .init_sel2_hypervisor = _kvm_init_rme, + .psci_complete = _realm_psci_complete, + .destroy_vm = _kvm_destroy_realm, + .create_vcpu = _kvm_create_rec, + .destroy_vcpu = _kvm_destroy_rec, + .vgic_nr_lr = _kvm_realm_vgic_nr_lr, +}; + +static int __init armcca_register(void) +{ + return cca_operations_register(ARMCCA_CVM, &armcca_operations); +} +core_initcall(armcca_register); -- Gitee From a41895a1f6b6eaad548284b8c0c48039e7502887 Mon Sep 17 00:00:00 2001 From: hjx_gitff Date: Mon, 7 Apr 2025 20:25:08 +0000 Subject: [PATCH 50/52] Adapt virtcca operations by cca_base. virtcca inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- Refactor the code of virtCCA to utilize the code from cca_base as much as possible. Let virtCCA and CCA share the common components vput_is_rec and kvm_is_realm, while retaining vcpu_is_tec as a virtCCA-specific identifier to distinguish scenarios where code sharing is not feasible. Signed-off-by: hjx_gitff Signed-off-by: yxk Signed-off-by: Yiwei Zhuang --- arch/arm64/include/asm/kvm_emulate.h | 12 ++ arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/kvm_tmi.h | 4 +- arch/arm64/include/asm/kvm_tmm.h | 15 +- arch/arm64/include/uapi/asm/kvm.h | 3 + arch/arm64/kvm/Makefile | 3 + arch/arm64/kvm/arch_timer.c | 4 - arch/arm64/kvm/arm.c | 7 +- arch/arm64/kvm/guest.c | 9 +- arch/arm64/kvm/mmio.c | 13 +- arch/arm64/kvm/mmu.c | 2 + arch/arm64/kvm/pmu-emul.c | 7 +- arch/arm64/kvm/vgic/vgic-v3.c | 16 +- arch/arm64/kvm/vgic/vgic.c | 35 +++-- arch/arm64/kvm/virtcca_cvm.c | 92 ++++++----- arch/arm64/kvm/virtcca_cvm_exit.c | 220 +++++++++++++++++++++++++++ include/linux/kvm_host.h | 14 ++ include/uapi/linux/kvm.h | 13 ++ 18 files changed, 405 insertions(+), 65 deletions(-) create mode 100644 arch/arm64/kvm/virtcca_cvm_exit.c diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 2cc8007c522f2..bb526e2181a3e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -693,4 +693,16 @@ static inline bool kvm_arm_vcpu_rec_finalized(struct kvm_vcpu *vcpu) return vcpu->arch.rec.mpidr != INVALID_HWID; } +#ifdef CONFIG_HISI_VIRTCCA_HOST + +static inline enum virtcca_cvm_state virtcca_cvm_state(struct kvm *kvm) +{ + struct virtcca_cvm *virtcca_cvm = kvm->arch.virtcca_cvm; + + if (!virtcca_cvm) + return 0; + return READ_ONCE(virtcca_cvm->state); +} +#endif + #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 093684c70b1fd..872f1cb892b73 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -637,6 +637,7 @@ struct kvm_vcpu_arch { /* Realm meta data */ struct realm_rec rec; + #ifdef CONFIG_KVM_HISI_VIRT /* pCPUs this vCPU can be scheduled on. Pure copy of current->cpus_ptr */ cpumask_var_t sched_cpus; diff --git a/arch/arm64/include/asm/kvm_tmi.h b/arch/arm64/include/asm/kvm_tmi.h index 11eacf161a02b..f31f9778972d3 100644 --- a/arch/arm64/include/asm/kvm_tmi.h +++ b/arch/arm64/include/asm/kvm_tmi.h @@ -146,8 +146,8 @@ struct tmi_tec_exit { }; struct tmi_tec_run { - struct tmi_tec_entry tec_entry; - struct tmi_tec_exit tec_exit; + struct tmi_tec_entry enter; + struct tmi_tec_exit exit; }; #define TMI_FNUM_MIN_VALUE U(0x150) diff --git a/arch/arm64/include/asm/kvm_tmm.h b/arch/arm64/include/asm/kvm_tmm.h index f6e773c4aa13c..6ae219cef8def 100644 --- a/arch/arm64/include/asm/kvm_tmm.h +++ b/arch/arm64/include/asm/kvm_tmm.h @@ -6,6 +6,7 @@ #define __ASM_KVM_TMM_H #include +#include /* * There is a conflict with the internal iova of CVM, @@ -14,10 +15,10 @@ * iova is not being used, so it is used as the iova range for msi * mapping. */ -#define CVM_MSI_ORIG_IOVA 0x8000000 -#define CVM_MSI_MIN_IOVA 0x0a001000 -#define CVM_MSI_MAX_IOVA 0x0b000000 -#define CVM_MSI_IOVA_OFFSET 0x1000 +#define CVM_MSI_ORIG_IOVA 0x8000000 +#define CVM_MSI_MIN_IOVA 0x0a001000 +#define CVM_MSI_MAX_IOVA 0x0b000000 +#define CVM_MSI_IOVA_OFFSET 0x1000 #define CVM_RW_8_BIT 0x8 #define CVM_RW_16_BIT 0x10 @@ -101,7 +102,7 @@ struct virtcca_cvm { struct virtcca_cvm_tec { u64 tec; bool tec_created; - void *tec_run; + struct tmi_tec_run *run; }; struct cvm_ttt_addr { @@ -109,7 +110,7 @@ struct cvm_ttt_addr { u64 addr; }; -int kvm_init_tmm(void); +void kvm_init_tmm(void); int kvm_cvm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); void kvm_destroy_cvm(struct kvm *kvm); int kvm_finalize_vcpu_tec(struct kvm_vcpu *vcpu); @@ -118,7 +119,7 @@ int kvm_tec_enter(struct kvm_vcpu *vcpu); int handle_cvm_exit(struct kvm_vcpu *vcpu, int rec_run_status); int kvm_arm_create_cvm(struct kvm *kvm); void kvm_free_rd(struct kvm *kvm); -int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target); +int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsigned long status); void kvm_cvm_unmap_destroy_range(struct kvm *kvm); int kvm_cvm_map_range(struct kvm *kvm); diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 050b29651b6c3..e2319c3384e29 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -416,6 +416,9 @@ enum { #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 #define KVM_DEV_ARM_ITS_CTRL_RESET 4 +#define KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256 0 +#define KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512 1 + /* KVM_CAP_ARM_RME on VM fd */ #define KVM_CAP_ARM_RME_CONFIG_REALM 0 #define KVM_CAP_ARM_RME_CREATE_RD 1 diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index fb78e8674c48b..f48a34d5b0bc0 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -25,6 +25,9 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o pvsched.o \ kvm-$(CONFIG_VIRT_PLAT_DEV) += vgic/shadow_dev.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o +kvm-$(CONFIG_HISI_VIRTCCA_HOST) += tmi.o +kvm-$(CONFIG_HISI_VIRTCCA_HOST) += virtcca_cvm.o +kvm-$(CONFIG_HISI_VIRTCCA_HOST) += virtcca_cvm_exit.o obj-$(CONFIG_KVM_HISI_VIRT) += hisilicon/ always-y := hyp_constants.h hyp-constants.s diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 0605ac4adfb83..69b9bd5e8eaca 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -250,10 +250,6 @@ static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) return; } -#ifdef CONFIG_HISI_VIRTCCA_HOST - if (kvm_is_virtcca_cvm(ctxt->vcpu->kvm)) - return; -#endif if (!ctxt->offset.vm_offset) { WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); return; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c998286123ca0..366c432b5746e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -42,7 +42,7 @@ #include #include #include - +#include #include #include #include @@ -1943,6 +1943,11 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) struct kvm_device_attr attr; switch (ioctl) { +#ifdef CONFIG_HISI_VIRTCCA_HOST + case KVM_LOAD_USER_DATA: { + return kvm_load_user_data(kvm, arg); + } +#endif case KVM_CREATE_IRQCHIP: { int ret; if (!vgic_present) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 42dd9ba8daa1c..5f578e91875c9 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "trace.h" @@ -171,7 +172,7 @@ static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off) if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off)) return -EINVAL; - if (kvm_is_realm(vcpu->kvm) && !kvm_realm_validate_core_reg(off)) + if (_kvm_is_realm(vcpu->kvm) && !kvm_realm_validate_core_reg(off)) return -EPERM; return size; @@ -924,7 +925,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, bool has_esr = events->exception.serror_has_esr; bool ext_dabt_pending = events->exception.ext_dabt_pending; - if (vcpu_is_rec(vcpu)) { + if (_vcpu_is_rec(vcpu)) { /* Cannot inject SError into a Realm. */ if (serror_pending) return -EINVAL; @@ -947,6 +948,10 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, return 0; } +#ifdef CONFIG_HISI_VIRTCCA_HOST + if (vcpu_is_tec(vcpu)) + return kvm_cvm_vcpu_set_events(vcpu, serror_pending, ext_dabt_pending); +#endif if (serror_pending && has_esr) { if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 113f2c9edaec2..099b7517bd5c4 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "trace.h" @@ -138,8 +139,12 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) &data); data = vcpu_data_host_to_guest(vcpu, data, len); - if (vcpu_is_rec(vcpu)) + if (_vcpu_is_rec(vcpu)) vcpu->arch.rec.run->enter.gprs[0] = data; +#ifdef CONFIG_HISI_VIRTCCA_HOST + else if (vcpu_is_tec(vcpu)) + vcpu->arch.tec.run->enter.gprs[0] = data; +#endif else vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); } @@ -148,8 +153,12 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - if (vcpu_is_rec(vcpu)) + if (_vcpu_is_rec(vcpu)) vcpu->arch.rec.run->enter.flags |= REC_ENTER_EMULATED_MMIO; +#ifdef CONFIG_HISI_VIRTCCA_HOST + else if (vcpu_is_tec(vcpu)) + vcpu->arch.tec.run->enter.flags |= REC_ENTER_EMULATED_MMIO; +#endif else kvm_incr_pc(vcpu); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index fece849076373..4b676db3eca06 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1475,6 +1475,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ if (vcpu_is_rec(vcpu)) write_fault = true; + if (vcpu_is_tec(vcpu)) + prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 879f9da1fc74e..fc1ae98b922e5 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -16,6 +16,7 @@ #include #include #include +#include #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) @@ -341,8 +342,12 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; - if (vcpu_is_rec(vcpu)) + if (_vcpu_is_rec(vcpu)) return vcpu->arch.rec.run->exit.pmu_ovf_status; +#ifdef CONFIG_HISI_VIRTCCA_HOST + if (vcpu_is_tec(vcpu)) + return vcpu->arch.tec.run->exit.pmu_ovf_status; +#endif if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index b45416bdbb656..97ddf44c93372 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "vgic.h" @@ -763,6 +764,13 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; +#ifdef CONFIG_HISI_VIRTCCA_HOST + if (vcpu_is_tec(vcpu)) { + cpu_if->vgic_vmcr = vcpu->arch.tec.run->exit.gicv3_vmcr; + return; + } +#endif + if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); } @@ -771,9 +779,13 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - if (vcpu_is_rec(vcpu)) - cpu_if->vgic_vmcr = vcpu->arch.rec.run->exit.gicv3_vmcr; + if (_vcpu_is_rec(vcpu)) + cpu_if->vgic_vmcr = vcpu->arch.rec.run->exit.gicv3_vmcr; +#ifdef CONFIG_HISI_VIRTCCA_HOST + if (vcpu_is_tec(vcpu)) + cpu_if->vgic_vmcr = vcpu->arch.tec.run->exit.gicv3_vmcr; +#endif WARN_ON(vgic_v4_put(vcpu)); vgic_v3_vmcr_sync(vcpu); diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index eee76ab9203e5..fec6c9dd0c27a 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "vgic.h" @@ -912,8 +913,16 @@ static inline void vgic_rmm_save_state(struct kvm_vcpu *vcpu) int i; for (i = 0; i < kvm_vcpu_vgic_nr_lr(vcpu); i++) { - cpu_if->vgic_lr[i] = vcpu->arch.rec.run->exit.gicv3_lrs[i]; - vcpu->arch.rec.run->enter.gicv3_lrs[i] = 0; + if (_vcpu_is_rec(vcpu)) { + cpu_if->vgic_lr[i] = vcpu->arch.rec.run->exit.gicv3_lrs[i]; + vcpu->arch.rec.run->enter.gicv3_lrs[i] = 0; + } +#ifdef CONFIG_HISI_VIRTCCA_HOST + if (vcpu_is_tec(vcpu)) { + cpu_if->vgic_lr[i] = vcpu->arch.tec.run->exit.gicv3_lrs[i]; + vcpu->arch.tec.run->enter.gicv3_lrs[i] = 0; + } +#endif } } @@ -955,13 +964,21 @@ static inline void vgic_rmm_restore_state(struct kvm_vcpu *vcpu) int i; for (i = 0; i < kvm_vcpu_vgic_nr_lr(vcpu); i++) { - vcpu->arch.rec.run->enter.gicv3_lrs[i] = cpu_if->vgic_lr[i]; - /* - * Also populate the rec.run->exit copies so that a late - * decision to back out from entering the realm doesn't cause - * the state to be lost - */ - vcpu->arch.rec.run->exit.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + if (_vcpu_is_rec(vcpu)) { + vcpu->arch.rec.run->enter.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + /* + * Also populate the rec.run->exit copies so that a late + * decision to back out from entering the realm doesn't cause + * the state to be lost + */ + vcpu->arch.rec.run->exit.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + } +#ifdef CONFIG_HISI_VIRTCCA_HOST + if (vcpu_is_tec(vcpu)) { + vcpu->arch.tec.run->enter.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + vcpu->arch.tec.run->exit.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + } +#endif } } diff --git a/arch/arm64/kvm/virtcca_cvm.c b/arch/arm64/kvm/virtcca_cvm.c index cb2ee7e4fe769..26bbca1dfa3e1 100644 --- a/arch/arm64/kvm/virtcca_cvm.c +++ b/arch/arm64/kvm/virtcca_cvm.c @@ -22,14 +22,15 @@ /* Protects access to cvm_vmid_bitmap */ static DEFINE_SPINLOCK(cvm_vmid_lock); static unsigned long *cvm_vmid_bitmap; -DEFINE_STATIC_KEY_FALSE(virtcca_cvm_is_available); +DECLARE_STATIC_KEY_FALSE(virtcca_cvm_is_enable); #define SIMD_PAGE_SIZE 0x3000 #define UEFI_MAX_SIZE 0x8000000 #define UEFI_DTB_START 0x40000000 #define DTB_MAX_SIZE 0x200000 + int kvm_enable_virtcca_cvm(struct kvm *kvm) { - if (!static_key_enabled(&virtcca_cvm_is_available)) + if (!static_key_enabled(&kvm_rme_is_available)) return -EFAULT; kvm->arch.is_virtcca_cvm = true; @@ -137,7 +138,7 @@ int kvm_arm_create_cvm(struct kvm *kvm) /* get affine host numa set by default vcpu 0 */ u64 numa_set = kvm_get_host_numa_set_by_vcpu(0, kvm); - if (!kvm_is_virtcca_cvm(kvm) || virtcca_cvm_state(kvm) != CVM_STATE_NONE) + if (!kvm_is_realm(kvm) || virtcca_cvm_state(kvm) != CVM_STATE_NONE) return 0; if (!cvm->params) { @@ -398,8 +399,8 @@ int kvm_finalize_vcpu_tec(struct kvm_vcpu *vcpu) struct virtcca_cvm_tec *tec = &vcpu->arch.tec; mutex_lock(&vcpu->kvm->lock); - tec->tec_run = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); - if (!tec->tec_run) { + tec->run = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); + if (!tec->run) { ret = -ENOMEM; goto tec_free; } @@ -428,7 +429,7 @@ int kvm_finalize_vcpu_tec(struct kvm_vcpu *vcpu) return ret; tec_free: - kfree(tec->tec_run); + kfree(tec->run); kfree(params_ptr); mutex_unlock(&vcpu->kvm->lock); return ret; @@ -687,7 +688,6 @@ int kvm_cvm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { int r = 0; - mutex_lock(&kvm->lock); switch (cap->args[0]) { case KVM_CAP_ARM_TMM_CONFIG_CVM_HOST: r = kvm_tmm_config_cvm(kvm, cap); @@ -713,7 +713,6 @@ int kvm_cvm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = -EINVAL; break; } - mutex_unlock(&kvm->lock); return r; } @@ -722,14 +721,14 @@ void kvm_destroy_tec(struct kvm_vcpu *vcpu) { struct virtcca_cvm_tec *tec = &vcpu->arch.tec; - if (!vcpu_is_tec(vcpu)) + if (!vcpu_is_rec(vcpu)) return; if (tmi_tec_destroy(tec->tec) != 0) kvm_err("%s vcpu id : %d failed!\n", __func__, vcpu->vcpu_id); tec->tec = 0; - kfree(tec->tec_run); + kfree(tec->run); } static int tmi_check_version(void) @@ -761,25 +760,25 @@ int kvm_tec_enter(struct kvm_vcpu *vcpu) struct virtcca_cvm_tec *tec = &vcpu->arch.tec; struct virtcca_cvm *cvm = vcpu->kvm->arch.virtcca_cvm; + run = (struct tmi_tec_run *)tec->run; if (READ_ONCE(cvm->state) != CVM_STATE_ACTIVE) return -EINVAL; - run = tec->tec_run; /* set/clear TWI TWE flags */ if (vcpu->arch.hcr_el2 & HCR_TWI) - run->tec_entry.flags |= TEC_ENTRY_FLAG_TRAP_WFI; + run->enter.flags |= TEC_ENTRY_FLAG_TRAP_WFI; else - run->tec_entry.flags &= ~TEC_ENTRY_FLAG_TRAP_WFI; + run->enter.flags &= ~TEC_ENTRY_FLAG_TRAP_WFI; if (vcpu->arch.hcr_el2 & HCR_TWE) - run->tec_entry.flags |= TEC_ENTRY_FLAG_TRAP_WFE; + run->enter.flags |= TEC_ENTRY_FLAG_TRAP_WFE; else - run->tec_entry.flags &= ~TEC_ENTRY_FLAG_TRAP_WFE; + run->enter.flags &= ~TEC_ENTRY_FLAG_TRAP_WFE; return tmi_tec_enter(tec->tec, __pa(run)); } -int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target) +int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsigned long status) { int ret; struct virtcca_cvm_tec *calling_tec = &calling->arch.tec; @@ -791,29 +790,30 @@ int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target) return 0; } -int kvm_init_tmm(void) +void kvm_init_tmm(void) { int ret; if (PAGE_SIZE != SZ_4K) - return 0; + return; if (tmi_check_version()) - return 0; + return; if (tmi_kae_init()) pr_warn("kvm [%i]: Warning: kae init failed!\n", task_pid_nr(current)); ret = cvm_vmid_init(); if (ret) - return ret; + return; tmm_feat_reg0 = tmi_features(0); kvm_info("TMM feature0: 0x%lx\n", tmm_feat_reg0); - static_branch_enable(&virtcca_cvm_is_available); + static_branch_enable(&kvm_rme_is_available); + static_branch_enable(&virtcca_cvm_is_enable); - return 0; + return; } u64 virtcca_get_tmi_version(void) @@ -857,7 +857,7 @@ int kvm_load_user_data(struct kvm *kvm, unsigned long arg) struct virtcca_cvm *cvm = kvm->arch.virtcca_cvm; struct kvm_numa_info *numa_info; - if (!kvm_is_virtcca_cvm(kvm)) + if (!kvm_is_realm(kvm)) return -EFAULT; if (copy_from_user(&user_data, argp, sizeof(user_data))) @@ -932,27 +932,22 @@ unsigned long cvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu, if (!target_vcpu) return PSCI_RET_INVALID_PARAMS; - cvm_psci_complete(vcpu, target_vcpu); + cvm_psci_complete(vcpu, target_vcpu, PSCI_RET_SUCCESS); return PSCI_RET_SUCCESS; } int kvm_cvm_vcpu_set_events(struct kvm_vcpu *vcpu, bool serror_pending, bool ext_dabt_pending) { - struct virtcca_cvm_tec *tec = &vcpu->arch.tec; - + struct tmi_tec_run *run = vcpu->arch.tec.run; if (serror_pending) return -EINVAL; if (ext_dabt_pending) { - if (!(((struct tmi_tec_run *)tec->tec_run)->tec_entry.flags & - TEC_ENTRY_FLAG_EMUL_MMIO)) + if (!(run->enter.flags & REC_ENTER_EMULATED_MMIO)) return -EINVAL; - - ((struct tmi_tec_run *)tec->tec_run)->tec_entry.flags - &= ~TEC_ENTRY_FLAG_EMUL_MMIO; - ((struct tmi_tec_run *)tec->tec_run)->tec_entry.flags - |= TEC_ENTRY_FLAG_INJECT_SEA; + run->enter.flags &= ~REC_ENTER_EMULATED_MMIO; + run->enter.flags |= REC_ENTER_INJECT_SEA; } return 0; } @@ -982,9 +977,36 @@ int kvm_init_cvm_vm(struct kvm *kvm) cvm->params = params; WRITE_ONCE(cvm->state, CVM_STATE_NONE); + kvm_enable_virtcca_cvm(kvm); return 0; } +extern struct vgic_global kvm_vgic_global_state; + +u32 kvm_cvm_vgic_nr_lr(void) +{ + return kvm_vgic_global_state.nr_lr; +} + +static struct cca_operations virtcca_operations = { + .enable_cap = kvm_cvm_enable_cap, + .init_realm_vm = kvm_init_cvm_vm, + .realm_vm_enter = kvm_tec_enter, + .realm_vm_exit = handle_cvm_exit, + .init_sel2_hypervisor = kvm_init_tmm, + .psci_complete = cvm_psci_complete, + .destroy_vm = kvm_destroy_cvm, + .create_vcpu = kvm_finalize_vcpu_tec, + .destroy_vcpu = kvm_destroy_tec, + .vgic_nr_lr = kvm_cvm_vgic_nr_lr, +}; + +static int __init virtcca_register(void) +{ + return cca_operations_register(VIRTCCA_CVM, &virtcca_operations); +} +core_initcall(virtcca_register); + #ifdef CONFIG_HISI_VIRTCCA_CODA /* * Coda (Confidential Device Assignment) feature @@ -1244,7 +1266,7 @@ int kvm_cvm_map_ipa_mmio(struct kvm *kvm, phys_addr_t ipa_base, int kvm_cvm_map_ipa(struct kvm *kvm, phys_addr_t ipa, kvm_pfn_t pfn, unsigned long map_size, enum kvm_pgtable_prot prot, int ret) { - if (!is_virtcca_cvm_enable() || !kvm_is_virtcca_cvm(kvm)) + if (!is_virtcca_cvm_enable() || !kvm_is_realm(kvm)) return ret; struct page *dst_page = pfn_to_page(pfn); @@ -1295,7 +1317,7 @@ int cvm_arm_smmu_domain_set_kvm(struct device *dev, void *data) return 1; kvm = virtcca_arm_smmu_get_kvm(arm_smmu_domain); - if (kvm && kvm_is_virtcca_cvm(kvm)) + if (kvm && kvm_is_realm(kvm)) arm_smmu_domain->kvm = kvm; return 1; diff --git a/arch/arm64/kvm/virtcca_cvm_exit.c b/arch/arm64/kvm/virtcca_cvm_exit.c new file mode 100644 index 0000000000000..be14e08e136e9 --- /dev/null +++ b/arch/arm64/kvm/virtcca_cvm_exit.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2025. Huawei Technologies Co., Ltd. All rights reserved. + */ +#include +#include +#include + +#include +#include +#include + +typedef int (*exit_handler_fn)(struct kvm_vcpu *vcpu); + +static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu, bool unmask_ctl) +{ + struct tmi_tec_run *run = vcpu->arch.tec.run; + + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = run->exit.cntv_ctl; + __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = run->exit.cntv_cval; + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = run->exit.cntp_ctl; + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = run->exit.cntp_cval; + + /* Because the timer mask is tainted by TMM, we don't know the + * true intent of the guest. Here, we assume mask is always + * cleared during WFI. + */ + if (unmask_ctl) { + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) &= ~ARCH_TIMER_CTRL_IT_MASK; + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) &= ~ARCH_TIMER_CTRL_IT_MASK; + } + + kvm_cvm_timers_update(vcpu); +} + +static int tec_exit_reason_notimpl(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run = vcpu->arch.tec.run; + + pr_err("[vcpu %d] Unhandled exit reason from cvm (ESR: %#llx)\n", + vcpu->vcpu_id, run->exit.esr); + return -ENXIO; +} + +/* The process is the same as kvm_handle_wfx, + * except the tracing and updating operation for pc, + * we copy kvm_handle_wfx process here + * to avoid changing kvm_handle_wfx function. + */ +static int tec_exit_wfx(struct kvm_vcpu *vcpu) +{ + u64 esr = kvm_vcpu_get_esr(vcpu); + + if (esr & ESR_ELx_WFx_ISS_WFE) + vcpu->stat.wfe_exit_stat++; + else + vcpu->stat.wfi_exit_stat++; + + if (esr & ESR_ELx_WFx_ISS_WFxT) { + if (esr & ESR_ELx_WFx_ISS_RV) { + u64 val, now; + + now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + + if (now >= val) + goto out; + } else { + /* Treat WFxT as WFx if RN is invalid */ + esr &= ~ESR_ELx_WFx_ISS_WFxT; + } + } + + if (esr & ESR_ELx_WFx_ISS_WFE) { + kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); + } else { + if (esr & ESR_ELx_WFx_ISS_WFxT) + vcpu_set_flag(vcpu, IN_WFIT); + + kvm_vcpu_wfi(vcpu); + } + +out: + return 1; +} + +static int tec_exit_sys_reg(struct kvm_vcpu *vcpu) +{ + int ret; + struct tmi_tec_run *run = vcpu->arch.tec.run; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + int rt = kvm_vcpu_sys_get_rt(vcpu); + bool is_write = !(esr & 1); + + if (is_write) + vcpu_set_reg(vcpu, rt, run->exit.gprs[0]); + + ret = kvm_handle_sys_reg(vcpu); + + if (ret >= 0 && !is_write) + run->enter.gprs[0] = vcpu_get_reg(vcpu, rt); + + return ret; +} + +static int tec_exit_sync_dabt(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run = vcpu->arch.tec.run; + + if (kvm_vcpu_dabt_iswrite(vcpu) && kvm_vcpu_dabt_isvalid(vcpu)) { + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), + run->exit.gprs[0]); + } + return kvm_handle_guest_abort(vcpu); +} + +static int tec_exit_sync_iabt(struct kvm_vcpu *vcpu) +{ + struct tmi_tec_run *run = vcpu->arch.tec.run; + + pr_err("[vcpu %d] Unhandled instruction abort (ESR: %#llx).\n", + vcpu->vcpu_id, run->exit.esr); + + return -ENXIO; +} + +static exit_handler_fn tec_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = tec_exit_reason_notimpl, + [ESR_ELx_EC_WFx] = tec_exit_wfx, + [ESR_ELx_EC_SYS64] = tec_exit_sys_reg, + [ESR_ELx_EC_DABT_LOW] = tec_exit_sync_dabt, + [ESR_ELx_EC_IABT_LOW] = tec_exit_sync_iabt +}; + +static int tec_exit_psci(struct kvm_vcpu *vcpu) +{ + int i; + struct tmi_tec_run *run = vcpu->arch.tec.run; + + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) + vcpu_set_reg(vcpu, i, run->exit.gprs[i]); + + return kvm_psci_call(vcpu); +} + +static int tec_exit_host_call(struct kvm_vcpu *vcpu) +{ + int ret, i; + struct tmi_tec_run *run = vcpu->arch.tec.run; + + vcpu->stat.hvc_exit_stat++; + + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) + vcpu_set_reg(vcpu, i, run->exit.gprs[i]); + + ret = kvm_smccc_call_handler(vcpu); + + if (ret < 0) { + vcpu_set_reg(vcpu, 0, ~0UL); + ret = 1; + } + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) + run->enter.gprs[i] = vcpu_get_reg(vcpu, i); + + return ret; +} + +/* + * Return > 0 to return to guest, < 0 on error, 0(and set exit_reason) on + * proper exit to userspace + */ + +int handle_cvm_exit(struct kvm_vcpu *vcpu, int tec_run_ret) +{ + unsigned long status; + struct tmi_tec_run *run = vcpu->arch.tec.run; + u8 esr_ec = ESR_ELx_EC(run->exit.esr); + bool is_wfx; + + status = TMI_RETURN_STATUS(tec_run_ret); + + if (status == TMI_ERROR_CVM_POWEROFF) { + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SHUTDOWN; + return 0; + } + + if (status == TMI_ERROR_CVM_STATE) { + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; + return 0; + } + + if (tec_run_ret) + return -ENXIO; + + vcpu->arch.fault.esr_el2 = run->exit.esr; + vcpu->arch.fault.far_el2 = run->exit.far; + vcpu->arch.fault.hpfar_el2 = run->exit.hpfar; + + is_wfx = (run->exit.exit_reason == TMI_EXIT_SYNC) && (esr_ec == ESR_ELx_EC_WFx); + update_arch_timer_irq_lines(vcpu, is_wfx); + + run->enter.flags = 0; + + switch (run->exit.exit_reason) { + case TMI_EXIT_FIQ: + case TMI_EXIT_IRQ: + return 1; + case TMI_EXIT_PSCI: + return tec_exit_psci(vcpu); + case TMI_EXIT_SYNC: + return tec_exit_handlers[esr_ec](vcpu); + case TMI_EXIT_HOST_CALL: + return tec_exit_host_call(vcpu); + } + + kvm_pr_unimpl("Unsupported exit reason : %llu\n", + run->exit.exit_reason); + return 0; +} diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a7790325665b2..06c407b2f6715 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -546,6 +546,20 @@ static __always_inline void guest_state_exit_irqoff(void) instrumentation_end(); } +#ifdef CONFIG_HISI_VIRTCCA_HOST +#define CVM_MAX_HALT_POLL_NS 100000 + +static __always_inline bool vcpu_is_tec(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.tec.run != NULL); +} +#else +static __always_inline bool vcpu_is_tec(struct kvm_vcpu *vcpu) +{ + return false; +} +#endif /* CONFIG_HISI_VIRTCCA_HOST */ + static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1229996569eb8..c87c43514aa4c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1540,6 +1540,19 @@ struct kvm_numa_info { #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x49, struct kvm_user_data) + +#define KVM_CAP_ARM_TMM 300 /* FIXME: Large number to prevent conflicts */ + +struct kvm_user_data { + __u64 loader_start; + __u64 image_end; + __u64 initrd_start; + __u64 dtb_end; + __u64 ram_size; + struct kvm_numa_info numa_info; +}; + /* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping { __u64 user_addr; -- Gitee From 85768e372dba77efaa4902a654e5741d71f603c2 Mon Sep 17 00:00:00 2001 From: yxk Date: Mon, 7 Apr 2025 19:37:01 +0000 Subject: [PATCH 51/52] Reapply "VirtCCA: cvm support UEFI boot" virtcca inclusion category: clean bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- This reverts commit 8ea38c27b1aa13fb351254713d91f62bd4c44b5d. Signed-off-by: hjx_gitff Signed-off-by: yxk Signed-off-by: Yiwei Zhuang --- include/uapi/linux/kvm.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c87c43514aa4c..4fb0542e8db0a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1546,9 +1546,15 @@ struct kvm_numa_info { struct kvm_user_data { __u64 loader_start; - __u64 image_end; - __u64 initrd_start; - __u64 dtb_end; + /* + * When the lowest bit of dtb_info is 0, the value of dtb_info represents the size of the + * DTB, and data_start and data_size represent the address base and size of the MMIO. + * When the lowest bit of dtb_info is 1, data_start and data_size represent the address base + * and size of the DTB. + */ + __u64 dtb_info; + __u64 data_start; + __u64 data_size; __u64 ram_size; struct kvm_numa_info numa_info; }; -- Gitee From eb8bb27c437d1118f7f37ce528c0ff8760a561b5 Mon Sep 17 00:00:00 2001 From: yxk Date: Sun, 6 Apr 2025 23:31:40 -0400 Subject: [PATCH 52/52] Modify openeuler_defconfig virtcca inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N -------------------------------- Modify openeuler_defconfig for virtcca Signed-off-by: hjx_gitff Signed-off-by: yxk Signed-off-by: Yiwei Zhuang --- arch/arm64/configs/openeuler_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 8705b478ab7db..cdf8b09cdb8d7 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -602,7 +602,9 @@ CONFIG_DMI=y # end of Boot options CONFIG_COMPAT=y +CONFIG_HISI_VIRTCCA_HOST=y CONFIG_HISI_VIRTCCA_GUEST=y +CONFIG_HISI_VIRTCCA_CODA=y # # Power management options -- Gitee