diff --git a/0001-Support-RME-feature-for-CCA-host.patch b/0001-Support-RME-feature-for-CCA-host.patch new file mode 100644 index 0000000000000000000000000000000000000000..aeb73b4b25cad82bcaca8dfe0b71e11901d27db3 --- /dev/null +++ b/0001-Support-RME-feature-for-CCA-host.patch @@ -0,0 +1,8017 @@ +From ae7487085c4aff0a3222f02db48cf9bea27b1af8 Mon Sep 17 00:00:00 2001 +From: Xu Raoqing +Date: Sat, 14 Jun 2025 15:01:43 +0800 +Subject: [PATCH] Support RME feature for CCA host +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +virtcca inclusion +category: cleanup +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N + +-------------------------------- + +This reverts commit 1182be89002c268a781bd90b25f18da76cfea732. + +Signed-off-by: Yang Xiangkai +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +Revert "VirtCCA: cvm support UEFI boot" + +virtcca inclusion +category: cleanup +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N + +-------------------------------- + +This partially reverts commit 67773feaf933058b9e64a9089345b3c64f823acd. + +Signed-off-by: Yang Xiangkai +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +Revert "kvm: add virtcca cvm host feature" + +virtcca inclusion +category: cleanup +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N + +-------------------------------- + +This partially reverts commit 67e11ee6347c43a97e8987b03a5b3534cd8095d9. + +Signed-off-by: Yang Xiangkai +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +kvm: arm64: Include kvm_emulate.h in kvm/arm_psci.h + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Fix a potential build error (like below, when asm/kvm_emulate.h gets +included after the kvm/arm_psci.h) by including the missing header file +in kvm/arm_psci.h: + +./include/kvm/arm_psci.h: In function ‘kvm_psci_version’: +./include/kvm/arm_psci.h:29:13: error: implicit declaration of function + ‘vcpu_has_feature’; did you mean ‘cpu_have_feature’? [-Werror=implicit-function-declaration] + 29 | if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2)) { + | ^~~~~~~~~~~~~~~~ + | cpu_have_feature + +Signed-off-by: Suzuki K Poulose +Signed-off-by: Steven Price +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +arm64: RME: Handle Granule Protection Faults (GPFs) + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +If the host attempts to access granules that have been delegated for use +in a realm these accesses will be caught and will trigger a Granule +Protection Fault (GPF). + +A fault during a page walk signals a bug in the kernel and is handled by +oopsing the kernel. A non-page walk fault could be caused by user space +having access to a page which has been delegated to the kernel and will +trigger a SIGBUS to allow debugging why user space is trying to access a +delegated page. + +Reviewed-by: Suzuki K Poulose +Reviewed-by: Gavin Shan +Signed-off-by: Steven Price + +arm64: RME: Add SMC definitions for calling the RMM + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The RMM (Realm Management Monitor) provides functionality that can be +accessed by SMC calls from the host. + +The SMC definitions are based on DEN0137[1] version 1.0-rel0 + +[1] https://developer.arm.com/documentation/den0137/1-0rel0/ + +Reviewed-by: Gavin Shan +Signed-off-by: Steven Price + +arm64: RME: Add wrappers for RMI calls + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The wrappers make the call sites easier to read and deal with the +boiler plate of handling the error codes from the RMM. + +Reviewed-by: Gavin Shan +Signed-off-by: Steven Price +Reviewed-by: Suzuki K Poulose + +arm64: RME: Check for RME support at KVM init + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Query the RMI version number and check if it is a compatible version. A +static key is also provided to signal that a supported RMM is available. + +Functions are provided to query if a VM or VCPU is a realm (or rec) +which currently will always return false. + +Later patches make use of struct realm and the states as the ioctls +interfaces are added to support realm and REC creation and destruction. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +arm64: RME: Define the user ABI + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +There is one (multiplexed) CAP which can be used to create, populate and +then activate the realm. + +Co-developed-by: Suzuki K Poulose +Signed-off-by: Suzuki K Poulose +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +arm64: RME: ioctls to create and configure realms + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Add the KVM_CAP_ARM_RME_CREATE_RD ioctl to create a realm. This involves +delegating pages to the RMM to hold the Realm Descriptor (RD) and for +the base level of the Realm Translation Tables (RTT). A VMID also need +to be picked, since the RMM has a separate VMID address space a +dedicated allocator is added for this purpose. + +KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm +before it is created. Configuration options can be classified as: + + 1. Parameters specific to the Realm stage2 (e.g. IPA Size, vmid, stage2 + entry level, entry level RTTs, number of RTTs in start level, LPA2) + Most of these are not measured by RMM and comes from KVM book + keeping. + + 2. Parameters controlling "Arm Architecture features for the VM". (e.g. + SVE VL, PMU counters, number of HW BRPs/WPs), configured by the VMM + using the "user ID register write" mechanism. These will be + supported in the later patches. + + 3. Parameters are not part of the core Arm architecture but defined + by the RMM spec (e.g. Hash algorithm for measurement, + Personalisation value). These are programmed via + KVM_CAP_ARM_RME_CONFIG_REALM. + +For the IPA size there is the possibility that the RMM supports a +different size to the IPA size supported by KVM for normal guests. At +the moment the 'normal limit' is exposed by KVM_CAP_ARM_VM_IPA_SIZE and +the IPA size is configured by the bottom bits of vm_type in +KVM_CREATE_VM. This means that it isn't easy for the VMM to discover +what IPA sizes are supported for Realm guests. Since the IPA is part of +the measurement of the realm guest the current expectation is that the +VMM will be required to pick the IPA size demanded by attestation and +therefore simply failing if this isn't available is fine. An option +would be to expose a new capability ioctl to obtain the RMM's maximum +IPA size if this is needed in the future. + +Co-developed-by: Suzuki K Poulose +Signed-off-by: Suzuki K Poulose +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +kvm: arm64: Don't expose debug capabilities for realm guests + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +RMM v1.0 provides no mechanism for the host to perform debug operations +on the guest. So don't expose KVM_CAP_SET_GUEST_DEBUG and report 0 +breakpoints and 0 watch points. + +Signed-off-by: Suzuki K Poulose +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +KVM: arm64: Allow passing machine type in KVM creation + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Previously machine type was used purely for specifying the physical +address size of the guest. Reserve the higher bits to specify an ARM +specific machine type and declare a new type 'KVM_VM_TYPE_ARM_REALM' +used to create a realm guest. + +Reviewed-by: Suzuki K Poulose +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +arm64: RME: RTT tear down + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The RMM owns the stage 2 page tables for a realm, and KVM must request +that the RMM creates/destroys entries as necessary. The physical pages +to store the page tables are delegated to the realm as required, and can +be undelegated when no longer used. + +Creating new RTTs is the easy part, tearing down is a little more +tricky. The result of realm_rtt_destroy() can be used to effectively +walk the tree and destroy the entries (undelegating pages that were +given to the realm). + +Signed-off-by: Steven Price +Reviewed-by: Suzuki K Poulose +Reviewed-by: Gavin Shan + +KVM: arm64: Add generic check for system-supported vCPU features + +mainline inclusion +from mainline-v6.7-rc1 +commit ef150908b6bd80a54126dbec324bd63a24a5628a +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ef150908b6bd80a54126dbec324bd63a24a5628a + +-------------------------------- + +[ Upstream commit ef150908b6bd80a54126dbec324bd63a24a5628a ] + +To date KVM has relied on kvm_reset_vcpu() failing when the vCPU feature +flags are unsupported by the system. This is a bit messy since +kvm_reset_vcpu() is called at runtime outside of the KVM_ARM_VCPU_INIT +ioctl when it is expected to succeed. Further complicating the matter is +that kvm_reset_vcpu() must tolerate be idemptotent to the config_lock, +as it isn't consistently called with the lock held. + +Prepare to move feature compatibility checks out of kvm_reset_vcpu() with +a 'generic' check that compares the user-provided flags with a computed +maximum feature set for the system. + +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/20230920195036.1169791-2-oliver.upton@linux.dev +Signed-off-by: Oliver Upton +Signed-off-by: Yiwei Zhuang +[xuraoqing:backport patch to check system vcpu support] +Signed-off-by: Xu Raoqing + +arm64: RME: Allocate/free RECs to match vCPUs + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The RMM maintains a data structure known as the Realm Execution Context +(or REC). It is similar to struct kvm_vcpu and tracks the state of the +virtual CPUs. KVM must delegate memory and request the structures are +created when vCPUs are created, and suitably tear down on destruction. + +RECs must also be supplied with addition pages - auxiliary (or AUX) +granules - for storing the larger registers state (e.g. for SVE). The +number of AUX granules for a REC depends on the parameters with which +the Realm was created - the RMM makes this information available via the +RMI_REC_AUX_COUNT call performed after creating the Realm Descriptor (RD). + +Note that only some of register state for the REC can be set by KVM, the +rest is defined by the RMM (zeroed). The register state then cannot be +changed by KVM after the REC is created (except when the guest +explicitly requests this e.g. by performing a PSCI call). The RMM also +requires that the VMM creates RECs in ascending order of the MPIDR. + +See Realm Management Monitor specification (DEN0137) for more information: +https://developer.arm.com/documentation/den0137/ + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +KVM: arm64: vgic: Provide helper for number of list registers + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Currently the number of list registers available is stored in a global +(kvm_vgic_global_state.nr_lr). With Arm CCA the RMM is permitted to +reserve list registers for its own use and so the number of available +list registers can be fewer for a realm VM. Provide a wrapper function +to fetch the global in preparation for restricting nr_lr when dealing +with a realm VM. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +KVM: arm64: Force GICv3 trap activation when no irqchip is configured on VHE + +mainline inclusion +from mainline-v6.12-rc1 +commit 8d917e0a8651377321c06513f42e2ab9a86161f4 +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8d917e0a8651377321c06513f42e2ab9a86161f4 + +-------------------------------- + +[ Upstream commit 8d917e0a8651377321c06513f42e2ab9a86161f4 ] + +On a VHE system, no GICv3 traps get configured when no irqchip is +present. This is not quite matching the "no GICv3" semantics that +we want to present. + +Force such traps to be configured in this case. + +Reviewed-by: Oliver Upton +Link: https://lore.kernel.org/r/20240827152517.3909653-4-maz@kernel.org +Signed-off-by: Marc Zyngier +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +arm64: RME: Support for the VGIC in realms + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The RMM provides emulation of a VGIC to the realm guest but delegates +much of the handling to the host. Implement support in KVM for +saving/restoring state to/from the REC structure. + +Signed-off-by: Steven Price + +KVM: arm64: Support timers in realm RECs + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The RMM keeps track of the timer while the realm REC is running, but on +exit to the normal world KVM is responsible for handling the timers. + +The RMM doesn't provide a mechanism to set the counter offset, so don't +expose KVM_CAP_COUNTER_OFFSET for a realm VM. + +A later patch adds the support for propagating the timer values from the +exit data structure and calling kvm_realm_timers_update(). + +Signed-off-by: Steven Price +Reviewed-by: Suzuki K Poulose + +KVM: Add member to struct kvm_gfn_range to indicate private/shared + +mainline inclusion +from mainline-v6.14-rc1 +commit dca6c88532322830d5d92486467fcc91b67a9ad8 +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=dca6c88532322830d5d92486467fcc91b67a9ad8 + +-------------------------------- + +Add new members to strut kvm_gfn_range to indicate which mapping +(private-vs-shared) to operate on: enum kvm_gfn_range_filter +attr_filter. Update the core zapping operations to set them appropriately. + +TDX utilizes two GPA aliases for the same memslots, one for memory that is +for private memory and one that is for shared. For private memory, KVM +cannot always perform the same operations it does on memory for default +VMs, such as zapping pages and having them be faulted back in, as this +requires guest coordination. However, some operations such as guest driven +conversion of memory between private and shared should zap private memory. + +Internally to the MMU, private and shared mappings are tracked on separate +roots. Mapping and zapping operations will operate on the respective GFN +alias for each root (private or shared). So zapping operations will by +default zap both aliases. Add fields in struct kvm_gfn_range to allow +callers to specify which aliases so they can only target the aliases +appropriate for their specific operation. + +There was feedback that target aliases should be specified such that the +default value (0) is to operate on both aliases. Several options were +considered. Several variations of having separate bools defined such +that the default behavior was to process both aliases. They either allowed +nonsensical configurations, or were confusing for the caller. A simple +enum was also explored and was close, but was hard to process in the +caller. Instead, use an enum with the default value (0) reserved as a +disallowed value. Catch ranges that didn't have the target aliases +specified by looking for that specific value. + +Set target alias with enum appropriately for these MMU operations: + - For KVM's mmu notifier callbacks, zap shared pages only because private + pages won't have a userspace mapping + - For setting memory attributes, kvm_arch_pre_set_memory_attributes() + chooses the aliases based on the attribute. + - For guest_memfd invalidations, zap private only. + +Link: https://lore.kernel.org/kvm/ZivIF9vjKcuGie3s@google.com/ +Signed-off-by: Isaku Yamahata +Co-developed-by: Rick Edgecombe +Signed-off-by: Rick Edgecombe +Message-ID: <20240718211230.1492011-3-rick.p.edgecombe@intel.com> +Signed-off-by: Paolo Bonzini + +Conflicts:virt/kvm/kvm_main.c,arch/x86/kvm/mmu/mmu.c +Signed-off-by: Xu Raoqing + +arm64: RME: Allow VMM to set RIPAS + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Each page within the protected region of the realm guest can be marked +as either RAM or EMPTY. Allow the VMM to control this before the guest +has started and provide the equivalent functions to change this (with +the guest's approval) at runtime. + +When transitioning from RIPAS RAM (1) to RIPAS EMPTY (0) the memory is +unmapped from the guest and undelegated allowing the memory to be reused +by the host. When transitioning to RIPAS RAM the actual population of +the leaf RTTs is done later on stage 2 fault, however it may be +necessary to allocate additional RTTs to allow the RMM track the RIPAS +for the requested range. + +When freeing a block mapping it is necessary to temporarily unfold the +RTT which requires delegating an extra page to the RMM, this page can +then be recovered once the contents of the block mapping have been +freed. + +Signed-off-by: Steven Price + +arm64: RME: Handle realm enter/exit + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Entering a realm is done using a SMC call to the RMM. On exit the +exit-codes need to be handled slightly differently to the normal KVM +path so define our own functions for realm enter/exit and hook them +in if the guest is a realm guest. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +arm64: RME: Handle RMI_EXIT_RIPAS_CHANGE + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The guest can request that a region of it's protected address space is +switched between RIPAS_RAM and RIPAS_EMPTY (and back) using +RSI_IPA_STATE_SET. This causes a guest exit with the +RMI_EXIT_RIPAS_CHANGE code. We treat this as a request to convert a +protected region to unprotected (or back), exiting to the VMM to make +the necessary changes to the guest_memfd and memslot mappings. On the +next entry the RIPAS changes are committed by making RMI_RTT_SET_RIPAS +calls. + +The VMM may wish to reject the RIPAS change requested by the guest. For +now it can only do with by no longer scheduling the VCPU as we don't +currently have a usecase for returning that rejection to the guest, but +by postponing the RMI_RTT_SET_RIPAS changes to entry we leave the door +open for adding a new ioctl in the future for this purpose. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +KVM: arm64: Handle realm MMIO emulation + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +MMIO emulation for a realm cannot be done directly with the VM's +registers as they are protected from the host. However, for emulatable +data aborts, the RMM uses GPRS[0] to provide the read/written value. +We can transfer this from/to the equivalent VCPU's register entry and +then depend on the generic MMIO handling code in KVM. + +For a MMIO read, the value is placed in the shared RecExit structure +during kvm_handle_mmio_return() rather than in the VCPU's register +entry. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +arm64: RME: Allow populating initial contents + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The VMM needs to populate the realm with some data before starting (e.g. +a kernel and initrd). This is measured by the RMM and used as part of +the attestation later on. + +Co-developed-by: Suzuki K Poulose +Signed-off-by: Suzuki K Poulose +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +rme: populate guest memory region without guest_memfd + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://patchew.org/linux/20241004152804.72508-1-steven.price@arm.com/20241004152804.72508-21-steven.price@arm.com/ + +-------------------------------- + +The VMM needs to populate the realm with some data before starting (e.g. +a kernel and initrd). This is measured by the RMM and used as part of +the attestation later on. + +For now only 4k mappings are supported, future work may add support for +larger mappings. + +Co-developed-by: Suzuki K Poulose +Signed-off-by: Suzuki K Poulose +Signed-off-by: Steven Price + +KVM: arm64: Move pagetable definitions to common header + +mainline inclusion +from mainline-v6.12-rc1 +commit 29caeda359da15d16963096043cda39530f81cc4 +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=29caeda359da15d16963096043cda39530f81cc4 + +-------------------------------- + +In preparation for using the stage-2 definitions in ptdump, move some of +these macros in the common header. + +Signed-off-by: Sebastian Ene +Link: https://lore.kernel.org/r/20240909124721.1672199-2-sebastianene@google.com +Signed-off-by: Marc Zyngier +[xuraoqing: only need macro KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R and KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W] +Signed-off-by: Xu Raoqing + +arm64: RME: Runtime faulting of memory + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +At runtime if the realm guest accesses memory which hasn't yet been +mapped then KVM needs to either populate the region or fault the guest. + +For memory in the lower (protected) region of IPA a fresh page is +provided to the RMM which will zero the contents. For memory in the +upper (shared) region of IPA, the memory from the memslot is mapped +into the realm VM non secure. + +Signed-off-by: Steven Price + +KVM: arm64: Handle realm VCPU load + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +When loading a realm VCPU much of the work is handled by the RMM so only +some of the actions are required. Rearrange kvm_arch_vcpu_load() +slightly so we can bail out early for a realm guest. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +Conflicts:arch/arm64/kvm/arm.c +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +KVM: arm64: Validate register access for a Realm VM + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The RMM only allows setting the GPRS (x0-x30) and PC for a realm +guest. Check this in kvm_arm_set_reg() so that the VMM can receive a +suitable error return if other registers are written to. + +The RMM makes similar restrictions for reading of the guest's registers +(this is *confidential* compute after all), however we don't impose the +restriction here. This allows the VMM to read (stale) values from the +registers which might be useful to read back the initial values even if +the RMM doesn't provide the latest version. For migration of a realm VM, +a new interface will be needed so that the VMM can receive an +(encrypted) blob of the VM's state. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +KVM: arm64: Handle Realm PSCI requests + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The RMM needs to be informed of the target REC when a PSCI call is made +with an MPIDR argument. Expose an ioctl to the userspace in case the PSCI +is handled by it. + +Co-developed-by: Suzuki K Poulose +Signed-off-by: Suzuki K Poulose +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +KVM: arm64: WARN on injected undef exceptions + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The RMM doesn't allow injection of a undefined exception into a realm +guest. Add a WARN to catch if this ever happens. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +arm64: Don't expose stolen time for realm guests + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +It doesn't make much sense as a realm guest wouldn't want to trust the +host. It will also need some extra work to ensure that KVM will only +attempt to write into a shared memory region. So for now just disable +it. + +Reviewed-by: Suzuki K Poulose +Reviewed-by: Gavin Shan +Signed-off-by: Steven Price + +arm64: RME: allow userspace to inject aborts + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Extend KVM_SET_VCPU_EVENTS to support realms, where KVM cannot set the +system registers, and the RMM must perform it on next REC entry. + +Signed-off-by: Joey Gouly +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +arm64: RME: support RSI_HOST_CALL + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +From: Joey Gouly + +Forward RSI_HOST_CALLS to KVM's HVC handler. + +Signed-off-by: Joey Gouly +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +arm64: RME: Allow checking SVE on VM instance + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Given we have different types of VMs supported, check the +support for SVE for the given instance of the VM to accurately +report the status. + +Signed-off-by: Suzuki K Poulose +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +arm64: RME: Always use 4k pages for realms + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Guest_memfd doesn't yet natively support huge pages, and there are +currently difficulties for a VMM to manage huge pages efficiently so for +now always split up mappings to PTE (4k). + +The two issues that need progressing before supporting huge pages for +realms are: + + 1. guest_memfd needs to be able to allocate from an appropriate + allocator which can provide huge pages. + + 2. The VMM needs to be able to repurpose private memory for a shared + mapping when the guest VM requests memory is transitioned. Because + this can happen at a 4k granularity it isn't possible to + free/reallocate while huge pages are in use. Allowing the VMM to + mmap() the shared portion of a huge page would allow the huge page + to be recreated when the memory is unshared and made protected again. + +These two issues are not specific to realms and don't affect the realm +API, so for now just break everything down to 4k pages in the RMM +controlled stage 2. Future work can add huge page support without +changing the uAPI. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +arm64: RME: Prevent Device mappings for Realms + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Physical device assignment is not yet supported by the RMM, so it +doesn't make much sense to allow device mappings within the realm. +Prevent them when the guest is a realm. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +arm_pmu: Provide a mechanism for disabling the physical IRQ + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Arm CCA assigns the physical PMU device to the guest running in realm +world, however the IRQs are routed via the host. To enter a realm guest +while a PMU IRQ is pending it is necessary to block the physical IRQ to +prevent an immediate exit. Provide a mechanism in the PMU driver for KVM +to control the physical IRQ. + +Signed-off-by: Steven Price + +KVM: arm64: PMU: Set PMCR_EL0.N for vCPU based on the associated PMU + +mainline inclusion +from mainline-v6.7-rc1 +commit 4d20debf9ca160720a0b01ba4f2dc3d62296c4d1 +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4d20debf9ca160720a0b01ba4f2dc3d62296c4d1 + +-------------------------------- + +The number of PMU event counters is indicated in PMCR_EL0.N. +For a vCPU with PMUv3 configured, the value is set to the same +value as the current PE on every vCPU reset. Unless the vCPU is +pinned to PEs that has the PMU associated to the guest from the +initial vCPU reset, the value might be different from the PMU's +PMCR_EL0.N on heterogeneous PMU systems. + +Fix this by setting the vCPU's PMCR_EL0.N to the PMU's PMCR_EL0.N +value. Track the PMCR_EL0.N per guest, as only one PMU can be set +for the guest (PMCR_EL0.N must be the same for all vCPUs of the +guest), and it is convenient for updating the value. + +To achieve this, the patch introduces a helper, +kvm_arm_pmu_get_max_counters(), that reads the maximum number of +counters from the arm_pmu associated to the VM. Make the function +global as upcoming patches will be interested to know the value +while setting the PMCR.N of the guest from userspace. + +KVM does not yet support userspace modifying PMCR_EL0.N. +The following patch will add support for that. + +Reviewed-by: Sebastian Ott +Co-developed-by: Marc Zyngier +Signed-off-by: Marc Zyngier +Signed-off-by: Reiji Watanabe +Signed-off-by: Raghavendra Rao Ananta +Link: https://lore.kernel.org/r/20231020214053.2144305-5-rananta@google.com +Signed-off-by: Oliver Upton +Conflicts:arch/arm64/kvm/pmu-emul.c +Signed-off-by: Xu Raoqing + +KVM: arm64: PMU: Allow userspace to limit PMCR_EL0.N for the guest + +mainline inclusion +from mainline-v6.7-rc1 +commit ea9ca904d24ff15ded92fd76c16462c47bcae2f8 +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ea9ca904d24ff15ded92fd76c16462c47bcae2f8 + +-------------------------------- + +KVM does not yet support userspace modifying PMCR_EL0.N (With +the previous patch, KVM ignores what is written by userspace). +Add support userspace limiting PMCR_EL0.N. + +Disallow userspace to set PMCR_EL0.N to a value that is greater +than the host value as KVM doesn't support more event counters +than what the host HW implements. Also, make this register +immutable after the VM has started running. To maintain the +existing expectations, instead of returning an error, KVM +returns a success for these two cases. + +Finally, ignore writes to read-only bits that are cleared on +vCPU reset, and RES{0,1} bits (including writable bits that +KVM doesn't support yet), as those bits shouldn't be modified +(at least with the current KVM). + +Co-developed-by: Marc Zyngier +Signed-off-by: Marc Zyngier +Signed-off-by: Reiji Watanabe +Signed-off-by: Raghavendra Rao Ananta +Signed-off-by: Oliver Upton + +Conflicts:arch/arm64/kvm/sys_regs.c +Signed-off-by: Xu Raoqing + +KVM: arm64: PMU: Introduce helpers to set the guest's PMU + +mainline inclusion +from mainline-v6.7-rc1 +commit 1616ca6f3c10723c1b60ae44724212fae88f502d +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1616ca6f3c10723c1b60ae44724212fae88f502d + +-------------------------------- + +Introduce new helper functions to set the guest's PMU +(kvm->arch.arm_pmu) either to a default probed instance or to a +caller requested one, and use it when the guest's PMU needs to +be set. These helpers will make it easier for the following +patches to modify the relevant code. + +No functional change intended. + +Reviewed-by: Sebastian Ott +Signed-off-by: Reiji Watanabe +Signed-off-by: Raghavendra Rao Ananta +Reviewed-by: Eric Auger +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20231020214053.2144305-2-rananta@google.com +Signed-off-by: Oliver Upton +[arch/arm64/kvm/pmu-emul.c: remove duplicate kvm_arm_set_pmu] +Signed-off-by: Xu Raoqing + +arm64: rme: Enable PMU support with a realm guest + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Use the PMU registers from the RmiRecExit structure to identify when an +overflow interrupt is due and inject it into the guest. Also hook up the +configuration option for enabling the PMU within the guest. + +When entering a realm guest with a PMU interrupt pending, it is +necessary to disable the physical interrupt. Otherwise when the RMM +restores the PMU state the physical interrupt will trigger causing an +immediate exit back to the host. The guest is expected to acknowledge +the interrupt causing a host exit (to update the GIC state) which gives +the opportunity to re-enable the physical interrupt before the next PMU +event. + +Number of PMU counters is configured by the VMM by writing to PMCR.N. + +Signed-off-by: Steven Price + +arm64: RME: Hide KVM_CAP_READONLY_MEM for realm guests + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +For protected memory read only isn't supported by the RMM. While it may +be possible to support read only for unprotected memory, this isn't +supported at the present time. + +Note that this does mean that ROM (or flash) data cannot be emulated +correctly by the VMM as the stage 2 mappings are either always +read/write or are trapped as MMIO (so don't support operations where the +syndrome information doesn't allow emulation, e.g. load/store pair). + +This restriction can be lifted in the future by allowing the stage 2 +mappings to be made read only. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +arm64: RME: Propagate number of breakpoints and watchpoints to userspace + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +The RMM describes the maximum number of BPs/WPs available to the guest +in the Feature Register 0. Propagate those numbers into ID_AA64DFR0_EL1, +which is visible to userspace. A VMM needs this information in order to +set up realm parameters. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +arm64: RME: Set breakpoint parameters through SET_ONE_REG + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://patchwork.kernel.org/project/kvm/patch/20250416134208.383984-36-steven.price@arm.com/ + +-------------------------------- + +Allow userspace to configure the number of breakpoints and watchpoints +of a Realm VM through KVM_SET_ONE_REG ID_AA64DFR0_EL1. + +The KVM sys_reg handler checks the user value against the maximum value +given by RMM (arm64_check_features() gets it from the +read_sanitised_id_aa64dfr0_el1() reset handler). + +Userspace discovers that it can write these fields by issuing a +KVM_ARM_GET_REG_WRITABLE_MASKS ioctl. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Conflicts:arch/arm64/kvm/rme.c +Signed-off-by: Xu Raoqing + +arm64: RME: Initialize PMCR.N with number counter supported by RMM + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://patchwork.kernel.org/project/kvm/patch/20250416134208.383984-37-steven.price@arm.com/ + +-------------------------------- + +Provide an accurate number of available PMU counters to userspace when +setting up a Realm. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Steven Price + +Signed-off-by: Xu Raoqing + +arm64: RME: Propagate max SVE vector length from RMM + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +RMM provides the maximum vector length it supports for a guest in its +feature register. Make it visible to the rest of KVM and to userspace +via KVM_REG_ARM64_SVE_VLS. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +rm64: RME: Configure max SVE vector length for a Realm + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Obtain the max vector length configured by userspace on the vCPUs, and +write it into the Realm parameters. By default the vCPU is configured +with the max vector length reported by RMM, and userspace can reduce it +with a write to KVM_REG_ARM64_SVE_VLS. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +arm64: RME: Provide register list for unfinalized RME RECs + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +KVM_GET_REG_LIST should not be called before SVE is finalized. The ioctl +handler currently returns -EPERM in this case. But because it uses +kvm_arm_vcpu_is_finalized(), it now also rejects the call for +unfinalized REC even though finalizing the REC can only be done late, +after Realm descriptor creation. + +Move the check to copy_sve_reg_indices(). One adverse side effect of +this change is that a KVM_GET_REG_LIST call that only probes for the +array size will now succeed even if SVE is not finalized, but that seems +harmless since the following KVM_GET_REG_LIST with the full array will +fail. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +arm64: RME: Provide accurate register list + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Userspace can set a few registers with KVM_SET_ONE_REG (9 GP registers +at runtime, and 3 system registers during initialization). Update the +register list returned by KVM_GET_REG_LIST. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +KVM: arm64: Expose KVM_ARM_VCPU_REC to user space + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Increment KVM_VCPU_MAX_FEATURES to expose the new capability to user +space. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan + +KVM: arm64: Allow activating realms + +community inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N +Reference: https://lore.kernel.org/kvm/20250416134208.383984-1-steven.price@arm.com/T/ + +-------------------------------- + +Add the ioctl to activate a realm and set the static branch to enable +access to the realm functionality if the RMM is detected. + +Signed-off-by: Steven Price +Reviewed-by: Gavin Shan +Reviewed-by: Suzuki K Poulose + +Signed-off-by: Yiwei Zhuang +Signed-off-by: Xu Raoqing + +VirtCCA: Add CCA base operations. + +virtcca inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N + +-------------------------------- + +Add common code for CCA and VirtCCA. We Refactor VirtCCA to +reuse CCA code where possible, differentiated by `cca_cvm_type` + +Signed-off-by: Yang Xiangkai +Signed-off-by: He Jingxian +Signed-off-by: Xu Raoqing + +VirtCCA: Adapt realm operations by cca_base. + +virtcca inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N + +-------------------------------- + +Refactor CCA implementation to adopt shared code, ensuring +CCA module utilizes cca_base with minimal modification. + +Signed-off-by: Yang Xiangkai +Signed-off-by: He Jingxian +Signed-off-by: Xu Raoqing + +VirtCCA: Adapt virtcca operations by cca_base. + +virtcca inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N + +-------------------------------- + +Refactor the code of virtCCA to utilize the code from cca_base as much +as possible. Let virtCCA and CCA share the common components vput_is_rec +and kvm_is_realm, while retaining vcpu_is_tec as a virtCCA-specific +identifier to distinguish scenarios where code sharing is +not feasible. + +Signed-off-by: He Jingxian +Signed-off-by: Yang Xiangkai +Signed-off-by: Xu Raoqing + +Reapply "VirtCCA: cvm support UEFI boot" + +virtcca inclusion +category: clean +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N + +-------------------------------- + +This reverts commit 8ea38c27b1aa13fb351254713d91f62bd4c44b5d. + +Signed-off-by: Yang Xiangkai +Signed-off-by: He Jingxian +Signed-off-by: Xu Raoqing + +VirtCCA: Modify openeuler_defconfig + +virtcca inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N + +-------------------------------- + +Modify openeuler_defconfig for virtcca + +Signed-off-by: Yang Xiangkai +Signed-off-by: He Jingxian +Signed-off-by: Xu Raoqing + +CCA: Fix cca kabi conflict + +cca inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N + +-------------------------------- + +The new feature ARM CCA adds member variables to +struct kvm_arch and kvm_vpcu_arch, that cause kabi changes. +Now reuse virtcca's variable memory fix changes. + +Signed-off-by: Hou Mingyong +Signed-off-by: Xu Raoqing + +VirtCCA: Compatible with virtcca macro definitions + +virtcca compatible inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBY08N + +-------------------------------- + +To be compatible with openEuler self-developed virtcca +1. vcpu_is_reci() for virtCCA or CCA, _vcpu_is_rec for CCA only. +2. virtCCA and CCA share the macro KVM_ARM_VCPU_REC and reuse the value 8 of virtCCA, +replace the community CCA macro value 9. So the macro KVM_VCPU_MAX_FEATURES of community +also needs to be reduced by 1. +3. Similarly, KVM_CAP_ARM_RME also reuses the value 300 of virtCCA. + +Signed-off-by: Hou Mingyong +Signed-off-by: Yang Xiangkai +Signed-off-by: Xu Raoqing +--- + Documentation/virt/kvm/api.rst | 91 +- + arch/arm64/configs/openeuler_defconfig | 4 +- + arch/arm64/include/asm/cca_base.h | 60 + + arch/arm64/include/asm/cca_type.h | 14 + + arch/arm64/include/asm/kvm_emulate.h | 58 +- + arch/arm64/include/asm/kvm_host.h | 69 +- + arch/arm64/include/asm/kvm_pgtable.h | 9 + + arch/arm64/include/asm/kvm_rme.h | 137 ++ + arch/arm64/include/asm/kvm_tmi.h | 4 +- + arch/arm64/include/asm/kvm_tmm.h | 44 +- + arch/arm64/include/asm/rmi_cmds.h | 508 ++++++ + arch/arm64/include/asm/rmi_smc.h | 259 ++++ + arch/arm64/include/asm/virt.h | 1 + + arch/arm64/include/asm/virtcca_cvm_host.h | 8 +- + arch/arm64/include/uapi/asm/kvm.h | 50 +- + arch/arm64/kernel/virtcca_cvm_host.c | 4 +- + arch/arm64/kvm/Makefile | 3 +- + arch/arm64/kvm/arch_timer.c | 49 +- + arch/arm64/kvm/arm.c | 286 ++-- + arch/arm64/kvm/cca_base.c | 123 ++ + arch/arm64/kvm/guest.c | 104 +- + arch/arm64/kvm/hypercalls.c | 4 +- + arch/arm64/kvm/inject_fault.c | 5 +- + arch/arm64/kvm/mmio.c | 34 +- + arch/arm64/kvm/mmu.c | 163 +- + arch/arm64/kvm/pmu-emul.c | 85 +- + arch/arm64/kvm/psci.c | 40 +- + arch/arm64/kvm/reset.c | 34 +- + arch/arm64/kvm/rme-exit.c | 191 +++ + arch/arm64/kvm/rme.c | 1723 +++++++++++++++++++++ + arch/arm64/kvm/sys_regs.c | 151 +- + arch/arm64/kvm/vgic/vgic-init.c | 2 +- + arch/arm64/kvm/vgic/vgic-v3.c | 15 +- + arch/arm64/kvm/vgic/vgic.c | 111 +- + arch/arm64/kvm/virtcca_cvm.c | 93 +- + arch/arm64/kvm/virtcca_cvm_exit.c | 65 +- + arch/arm64/mm/fault.c | 31 +- + drivers/perf/arm_pmu.c | 2 - + include/kvm/arm_arch_timer.h | 2 + + include/kvm/arm_pmu.h | 10 + + include/kvm/arm_psci.h | 2 + + include/linux/kvm_host.h | 24 +- + include/linux/perf/arm_pmu.h | 11 +- + include/uapi/linux/kvm.h | 29 +- + virt/kvm/kvm_main.c | 5 + + 45 files changed, 4250 insertions(+), 467 deletions(-) + create mode 100644 arch/arm64/include/asm/cca_base.h + create mode 100644 arch/arm64/include/asm/cca_type.h + create mode 100644 arch/arm64/include/asm/kvm_rme.h + create mode 100644 arch/arm64/include/asm/rmi_cmds.h + create mode 100644 arch/arm64/include/asm/rmi_smc.h + create mode 100644 arch/arm64/kvm/cca_base.c + create mode 100644 arch/arm64/kvm/rme-exit.c + create mode 100644 arch/arm64/kvm/rme.c + +diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst +index 17811002a8f7..c40b6a09ffd8 100644 +--- a/Documentation/virt/kvm/api.rst ++++ b/Documentation/virt/kvm/api.rst +@@ -151,8 +151,20 @@ In order to create user controlled virtual machines on S390, check + KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as + privileged user (CAP_SYS_ADMIN). + +-On arm64, the physical address size for a VM (IPA Size limit) is limited +-to 40bits by default. The limit can be configured if the host supports the ++On arm64, the machine type identifier is used to encode a type and the ++physical address size for the VM. The lower byte (bits[7-0]) encode the ++address size and the upper bits[11-8] encode a machine type. The machine ++types that might be available are: ++ ++ ====================== ============================================ ++ KVM_VM_TYPE_ARM_NORMAL A standard VM ++ KVM_VM_TYPE_ARM_REALM A "Realm" VM using the Arm Confidential ++ Compute extensions, the VM's memory is ++ protected from the host. ++ ====================== ============================================ ++ ++The physical address size for a VM (IPA Size limit) is limited to 40bits ++by default. The limit can be configured if the host supports the + extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use + KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type + identifier, where IPA_Bits is the maximum width of any physical +@@ -1271,6 +1283,8 @@ User space may need to inject several types of events to the guest. + Set the pending SError exception state for this VCPU. It is not possible to + 'cancel' an Serror that has been made pending. + ++User space cannot inject SErrors into Realms. ++ + If the guest performed an access to I/O memory which could not be handled by + userspace, for example because of missing instruction syndrome decode + information or because there is no device mapped at the accessed IPA, then +@@ -3526,6 +3540,11 @@ Possible features: + - the KVM_REG_ARM64_SVE_VLS pseudo-register is immutable, and can + no longer be written using KVM_SET_ONE_REG. + ++ - KVM_ARM_VCPU_REC: Allocate a REC (Realm Execution Context) for this ++ VCPU. This must be specified on all VCPUs created in a Realm VM. ++ Depends on KVM_CAP_ARM_RME. ++ Requires KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_REC). ++ + 4.83 KVM_ARM_PREFERRED_TARGET + ----------------------------- + +@@ -5061,6 +5080,7 @@ Recognised values for feature: + + ===== =========================================== + arm64 KVM_ARM_VCPU_SVE (requires KVM_CAP_ARM_SVE) ++ arm64 KVM_ARM_VCPU_REC (requires KVM_CAP_ARM_RME) + ===== =========================================== + + Finalizes the configuration of the specified vcpu feature. +@@ -6320,6 +6340,30 @@ to the byte array. + __u64 flags; + } hypercall; + ++4.144 KVM_ARM_VCPU_RMM_PSCI_COMPLETE ++------------------------------------ ++ ++:Capability: KVM_CAP_ARM_RME ++:Architectures: arm64 ++:Type: vcpu ioctl ++:Parameters: struct kvm_arm_rmm_psci_complete (in) ++:Returns: 0 if successful, < 0 on error ++ ++:: ++ ++ struct kvm_arm_rmm_psci_complete { ++ __u64 target_mpidr; ++ __u32 psci_status; ++ __u32 padding[3]; ++ }; ++ ++Where PSCI functions are handled by user space, the RMM needs to be informed of ++the target of the operation using `target_mpidr`, along with the status ++(`psci_status`). The RMM v1.0 specification defines two functions that require ++this call: PSCI_CPU_ON and PSCI_AFFINITY_INFO. ++ ++If the kernel is handling PSCI then this is done automatically and the VMM ++doesn't need to call this ioctl. + + It is strongly recommended that userspace use ``KVM_EXIT_IO`` (x86) or + ``KVM_EXIT_MMIO`` (all except s390) to implement functionality that +@@ -7794,6 +7838,46 @@ This capability is aimed to mitigate the threat that malicious VMs can + cause CPU stuck (due to event windows don't open up) and make the CPU + unavailable to host or other VMs. + ++7.38 KVM_CAP_ARM_RME ++-------------------- ++ ++:Architectures: arm64 ++:Target: VM ++:Parameters: args[0] provides an action, args[1] points to a structure in ++ memory for some actions. ++:Returns: 0 on success, negative value on error ++ ++Used to configure and set up the memory for a Realm. The available actions are: ++ ++================================= ============================================= ++ KVM_CAP_ARM_RME_CONFIG_REALM Takes struct arm_rme_config as args[1] and ++ configures realm parameters prior to it being ++ created. ++ ++ Options are ARM_RME_CONFIG_RPV to set the ++ "Realm Personalization Value" and ++ ARM_RME_CONFIG_HASH_ALGO to set the hash ++ algorithm. ++ ++ KVM_CAP_ARM_RME_CREATE_REALM Request the RMM create the realm. The realm's ++ configuration parameters must be set first. ++ ++ KVM_CAP_ARM_RME_INIT_RIPAS_REALM Takes struct arm_rme_init_ripas as args[1] ++ and sets the RIPAS (Realm IPA State) to ++ RIPAS_RAM of a specified area of the realm's ++ IPA. ++ ++ KVM_CAP_ARM_RME_POPULATE_REALM Takes struct arm_rme_init_ripas as args[1] ++ and populates a region of protected address ++ space by copying the data from the shared ++ alias. ++ ++ KVM_CAP_ARM_RME_ACTIVATE_REALM Request the RMM activate the realm. No ++ further changes can be made to the realm's ++ configuration, and VCPUs are not permitted to ++ enter the realm until it has been activated. ++================================= ============================================= ++ + 8. Other capabilities. + ====================== + +@@ -8116,6 +8200,9 @@ is supported, than the other should as well and vice versa. For arm64 + see Documentation/virt/kvm/devices/vcpu.rst "KVM_ARM_VCPU_PVTIME_CTRL". + For x86 see Documentation/virt/kvm/x86/msr.rst "MSR_KVM_STEAL_TIME". + ++Note that steal time accounting is not available when a guest is running ++within a Arm CCA realm (machine type KVM_VM_TYPE_ARM_REALM). ++ + 8.25 KVM_CAP_S390_DIAG318 + ------------------------- + +diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig +index 3cfff0701479..9d4916be1b15 100644 +--- a/arch/arm64/configs/openeuler_defconfig ++++ b/arch/arm64/configs/openeuler_defconfig +@@ -618,7 +618,9 @@ CONFIG_DMI=y + # end of Boot options + + CONFIG_COMPAT=y ++CONFIG_HISI_VIRTCCA_HOST=y + CONFIG_HISI_VIRTCCA_GUEST=y ++CONFIG_HISI_VIRTCCA_CODA=y + + # + # Power management options +@@ -785,8 +787,6 @@ CONFIG_KVM_GENERIC_HARDWARE_ENABLING=y + CONFIG_KVM_HISI_VIRT=y + CONFIG_VIRTUALIZATION=y + CONFIG_KVM=y +-CONFIG_HISI_VIRTCCA_HOST=y +-CONFIG_HISI_VIRTCCA_CODA=y + # CONFIG_NVHE_EL2_DEBUG is not set + CONFIG_KVM_ARM_MULTI_LPI_TRANSLATE_CACHE=y + CONFIG_ARCH_VCPU_STAT=y +diff --git a/arch/arm64/include/asm/cca_base.h b/arch/arm64/include/asm/cca_base.h +new file mode 100644 +index 000000000000..22dbf2e68738 +--- /dev/null ++++ b/arch/arm64/include/asm/cca_base.h +@@ -0,0 +1,60 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved. ++ */ ++#ifndef __CCA_BASE_H ++#define __CCA_BASE_H ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++struct cca_operations { ++ int (*enable_cap)(struct kvm *kvm, struct kvm_enable_cap *cap); ++ int (*init_realm_vm)(struct kvm *kvm); ++ int (*realm_vm_enter)(struct kvm_vcpu *vcpu); ++ int (*realm_vm_exit)(struct kvm_vcpu *vcpu, int ret); ++ void (*init_sel2_hypervisor)(void); ++ int (*psci_complete)(struct kvm_vcpu *calling, struct kvm_vcpu *target, ++ unsigned long status); ++ int (*create_vcpu)(struct kvm_vcpu *vcpu); ++ void (*destroy_vcpu)(struct kvm_vcpu *vcpu); ++ void (*destroy_vm)(struct kvm *kvm); ++ int (*enable_realm)(struct kvm *kvm); ++ u32 (*vgic_nr_lr)(void); ++} ____cacheline_aligned; ++ ++struct cca_share_pages_operations { ++ int (*alloc_shared_pages)(int p1, gfp_t p2, unsigned int p3); ++ void (*free_shared_pages)(void *p1, unsigned int p2); ++} ____cacheline_aligned; ++ ++int __init cca_operations_register(enum cca_cvm_type type, struct cca_operations *ops); ++int __init cca_share_pages_ops_register(enum cca_cvm_type type, ++ struct cca_share_pages_operations *ops); ++ ++int kvm_get_cvm_type(void); ++ ++int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); ++void kvm_init_rme(void); ++ ++int kvm_rec_enter(struct kvm_vcpu *vcpu); ++int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret); ++ ++int kvm_init_realm_vm(struct kvm *kvm); ++void kvm_destroy_realm(struct kvm *kvm); ++ ++int kvm_create_rec(struct kvm_vcpu *vcpu); ++void kvm_destroy_rec(struct kvm_vcpu *vcpu); ++ ++int realm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsigned long status); ++ ++u32 kvm_realm_vgic_nr_lr(void); ++ ++#endif /* __CCA_BASE_H */ +diff --git a/arch/arm64/include/asm/cca_type.h b/arch/arm64/include/asm/cca_type.h +new file mode 100644 +index 000000000000..480e8e266af4 +--- /dev/null ++++ b/arch/arm64/include/asm/cca_type.h +@@ -0,0 +1,14 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved. ++ */ ++#ifndef __CCA_TYPE_H ++#define __CCA_TYPE_H ++ ++enum cca_cvm_type { ++ ARMCCA_CVM, ++ VIRTCCA_CVM, ++ CCA_CVM_MAX, ++}; ++ ++#endif /* __CCA_TYPE_H */ +diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h +index f0b10cb2c87d..c1353ad0bcf9 100644 +--- a/arch/arm64/include/asm/kvm_emulate.h ++++ b/arch/arm64/include/asm/kvm_emulate.h +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + #define CURRENT_EL_SP_EL0_VECTOR 0x0 + #define CURRENT_EL_SP_ELx_VECTOR 0x200 +@@ -643,14 +644,62 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu) + kvm_write_cptr_el2(val); + } + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +-static inline bool kvm_is_virtcca_cvm(struct kvm *kvm) ++/* kvm of virtCCA or CCA */ ++static inline bool kvm_is_realm(struct kvm *kvm) ++{ ++ if (static_branch_unlikely(&kvm_rme_is_available) && kvm) ++ return kvm->arch.is_realm; ++ return false; ++} ++ ++/* kvm of CCA only */ ++static inline bool _kvm_is_realm(struct kvm *kvm) ++{ ++ return kvm_is_realm(kvm) && (kvm_get_cvm_type() == ARMCCA_CVM); ++} ++ ++static inline enum realm_state kvm_realm_state(struct kvm *kvm) ++{ ++ return READ_ONCE(kvm->arch.realm.state); ++} ++ ++static inline bool kvm_realm_is_created(struct kvm *kvm) ++{ ++ return kvm_is_realm(kvm) && kvm_realm_state(kvm) != REALM_STATE_NONE; ++} ++ ++static inline gpa_t kvm_gpa_from_fault(struct kvm *kvm, phys_addr_t ipa) + { +- if (static_branch_unlikely(&virtcca_cvm_is_available)) +- return kvm->arch.is_virtcca_cvm; ++ if (kvm_is_realm(kvm)) { ++ struct realm *realm = &kvm->arch.realm; ++ ++ return ipa & ~BIT(realm->ia_bits - 1); ++ } ++ return ipa; ++} ++ ++/* vcpu of virtCCA or CCA */ ++static inline bool vcpu_is_rec(struct kvm_vcpu *vcpu) ++{ ++ if (static_branch_unlikely(&kvm_rme_is_available)) ++ return vcpu_has_feature(vcpu, KVM_ARM_VCPU_REC) || ++ (vcpu->arch.tec.run != NULL); + return false; + } + ++/* vcpu of CCA only */ ++static inline bool _vcpu_is_rec(struct kvm_vcpu *vcpu) ++{ ++ return vcpu_is_rec(vcpu) && (kvm_get_cvm_type() == ARMCCA_CVM); ++} ++ ++static inline bool kvm_arm_rec_finalized(struct kvm_vcpu *vcpu) ++{ ++ return vcpu->arch.rec->mpidr != INVALID_HWID; ++} ++ ++#ifdef CONFIG_HISI_VIRTCCA_HOST ++ + static inline enum virtcca_cvm_state virtcca_cvm_state(struct kvm *kvm) + { + struct virtcca_cvm *virtcca_cvm = kvm->arch.virtcca_cvm; +@@ -660,4 +709,5 @@ static inline enum virtcca_cvm_state virtcca_cvm_state(struct kvm *kvm) + return READ_ONCE(virtcca_cvm->state); + } + #endif ++ + #endif /* __ARM64_KVM_EMULATE_H__ */ +diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h +index 81898bb87c5e..c51ae33acc4b 100644 +--- a/arch/arm64/include/asm/kvm_host.h ++++ b/arch/arm64/include/asm/kvm_host.h +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -27,9 +28,8 @@ + #include + #include + #include +-#ifdef CONFIG_HISI_VIRTCCA_HOST ++#include + #include +-#endif + + #define __KVM_HAVE_ARCH_INTC_INITIALIZED + +@@ -41,7 +41,8 @@ + + #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS + +-#define KVM_VCPU_MAX_FEATURES 7 ++#define KVM_VCPU_MAX_FEATURES_KABI_BASELINE 7 ++#define KVM_VCPU_MAX_FEATURES 9 + #define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1) + + #define KVM_REQ_SLEEP \ +@@ -77,8 +78,8 @@ enum kvm_mode kvm_get_mode(void); + static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; }; + #endif + +-extern unsigned int __ro_after_init kvm_sve_max_vl; + int __init kvm_arm_init_sve(void); ++unsigned int kvm_sve_get_max_vl(struct kvm *kvm); + + u32 __attribute_const__ kvm_target_cpu(void); + int kvm_reset_vcpu(struct kvm_vcpu *vcpu); +@@ -250,8 +251,19 @@ struct kvm_arch { + #define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 8 + unsigned long flags; + +- /* VM-wide vCPU feature set */ +- DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES); ++ /* VM-wide vCPU feature set ++ * When calculating kabi CRC baseline by KVM_VCPU_MAX_FEATURES (old value is 7), ++ * the member variable here is ++ * unsigned long vcpu_features[(((7) + ((sizeof(long) * 8)) - 1) / ((sizeof(long) * 8)))] ++ * Now add ARM CCA vcpu feature, KVM_VCPU_MAX_FEATURES should increase to 10 ++ * the member variable here changes to ++ * unsigned long vcpu_features[(((10) + ((sizeof(long) * 8)) - 1) / ((sizeof(long) * 8)))] ++ * kabi CRC calculate by the expression, not the value of expression, causes kabi changes. ++ * Because the real size of vcpu_features[] is unchanged, we can use KABI_REPLACE ++ * to fix kabi conflict. ++ */ ++ KABI_REPLACE(DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES_KABI_BASELINE), ++ DECLARE_BITMAP(vcpu_features, KVM_VCPU_MAX_FEATURES)) + + /* + * VM-wide PMU filter, implemented as a bitmap and big enough for +@@ -291,13 +303,22 @@ struct kvm_arch { + u64 tlbi_dvmbm; + #endif + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- union { +- struct cvm cvm; +- struct virtcca_cvm *virtcca_cvm; +- }; +- bool is_virtcca_cvm; +-#endif ++ KABI_REPLACE(union { ++ struct cvm cvm; ++ struct virtcca_cvm *virtcca_cvm; ++ }, ++ union { ++ struct cvm cvm; ++ struct virtcca_cvm *virtcca_cvm; ++ struct realm realm; ++ }) ++ KABI_REPLACE(bool is_virtcca_cvm, ++ union { ++ bool is_virtcca_cvm; ++ bool is_realm; ++ }) ++ /* PMCR_EL0.N value for the guest */ ++ KABI_EXTEND(u8 pmcr_n) + }; + + struct kvm_vcpu_fault_info { +@@ -607,8 +628,17 @@ struct kvm_vcpu_arch { + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; + +- /* feature flags */ +- DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); ++ /* feature flags ++ * When calculating kabi CRC baseline by KVM_VCPU_MAX_FEATURES (old value is 7), the member variable here is ++ * unsigned long features[(((7) + ((sizeof(long) * 8)) - 1) / ((sizeof(long) * 8)))] ++ * Now add ARM CCA vcpu feature, KVM_VCPU_MAX_FEATURES should increase to 10 ++ * the member variable here changes to ++ * unsigned long features[(((10) + ((sizeof(long) * 8)) - 1) / ((sizeof(long) * 8)))] ++ * kabi CRC calculate by the expression, not the value of expression, causes kabi changes. ++ * Because the real size of features[] is unchanged, we can use KABI_REPLACE to fix kabi conflict. ++ */ ++ KABI_REPLACE(DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES_KABI_BASELINE), ++ DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES)) + + /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ + u64 vsesr_el2; +@@ -638,9 +668,12 @@ struct kvm_vcpu_arch { + cpumask_var_t pre_sched_cpus; + #endif + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- struct virtcca_cvm_tec tec; +-#endif ++ KABI_REPLACE(struct virtcca_cvm_tec tec, ++ union { ++ struct virtcca_cvm_tec tec; ++ /* Realm meta data */ ++ struct realm_rec *rec; ++ }) + + #ifdef CONFIG_ARM64_HDBSS + /* HDBSS registers info */ +diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h +index 4a03e4801127..0c0ae56e8163 100644 +--- a/arch/arm64/include/asm/kvm_pgtable.h ++++ b/arch/arm64/include/asm/kvm_pgtable.h +@@ -44,6 +44,15 @@ typedef u64 kvm_pte_t; + + #define KVM_PHYS_INVALID (-1ULL) + ++#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) ++#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) ++ ++/* ++ * Used to indicate a pte for which a 'break-before-make' sequence is in ++ * progress. ++ */ ++#define KVM_INVALID_PTE_LOCKED BIT(10) ++ + static inline bool kvm_pte_valid(kvm_pte_t pte) + { + return pte & KVM_PTE_VALID; +diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h +new file mode 100644 +index 000000000000..568537f96da5 +--- /dev/null ++++ b/arch/arm64/include/asm/kvm_rme.h +@@ -0,0 +1,137 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (C) 2023 ARM Ltd. ++ */ ++ ++#ifndef __ASM_KVM_RME_H ++#define __ASM_KVM_RME_H ++ ++#include ++#include ++ ++/** ++ * enum realm_state - State of a Realm ++ */ ++enum realm_state { ++ /** ++ * @REALM_STATE_NONE: ++ * Realm has not yet been created. rmi_realm_create() may be ++ * called to create the realm. ++ */ ++ REALM_STATE_NONE, ++ /** ++ * @REALM_STATE_NEW: ++ * Realm is under construction, not eligible for execution. Pages ++ * may be populated with rmi_data_create(). ++ */ ++ REALM_STATE_NEW, ++ /** ++ * @REALM_STATE_ACTIVE: ++ * Realm has been created and is eligible for execution with ++ * rmi_rec_enter(). Pages may no longer be populated with ++ * rmi_data_create(). ++ */ ++ REALM_STATE_ACTIVE, ++ /** ++ * @REALM_STATE_DYING: ++ * Realm is in the process of being destroyed or has already been ++ * destroyed. ++ */ ++ REALM_STATE_DYING, ++ /** ++ * @REALM_STATE_DEAD: ++ * Realm has been destroyed. ++ */ ++ REALM_STATE_DEAD ++}; ++ ++/** ++ * struct realm - Additional per VM data for a Realm ++ * ++ * @state: The lifetime state machine for the realm ++ * @rd: Kernel mapping of the Realm Descriptor (RD) ++ * @params: Parameters for the RMI_REALM_CREATE command ++ * @num_aux: The number of auxiliary pages required by the RMM ++ * @vmid: VMID to be used by the RMM for the realm ++ * @ia_bits: Number of valid Input Address bits in the IPA ++ */ ++struct realm { ++ enum realm_state state; ++ ++ void *rd; ++ struct realm_params *params; ++ ++ unsigned long num_aux; ++ unsigned int vmid; ++ unsigned int ia_bits; ++}; ++ ++/** ++ * struct realm_rec - Additional per VCPU data for a Realm ++ * ++ * @mpidr: MPIDR (Multiprocessor Affinity Register) value to identify this VCPU ++ * @rec_page: Kernel VA of the RMM's private page for this REC ++ * @aux_pages: Additional pages private to the RMM for this REC ++ * @run: Kernel VA of the RmiRecRun structure shared with the RMM ++ */ ++struct realm_rec { ++ unsigned long mpidr; ++ void *rec_page; ++ /* ++ * REC_PARAMS_AUX_GRANULES is the maximum number of granules that the ++ * RMM can require. By using that to size the array we know that it ++ * will be big enough as the page size is always at least as large as ++ * the granule size. In the case of a larger page size than 4k (or an ++ * RMM which requires fewer auxiliary granules), the array will be ++ * bigger than needed however the extra memory required is small and ++ * this keeps the code cleaner. ++ */ ++ struct page *aux_pages[REC_PARAMS_AUX_GRANULES]; ++ struct rec_run *run; ++}; ++ ++void _kvm_init_rme(void); ++u32 kvm_realm_ipa_limit(void); ++u32 _kvm_realm_vgic_nr_lr(void); ++u8 kvm_realm_max_pmu_counters(void); ++unsigned int kvm_realm_sve_max_vl(void); ++ ++u64 kvm_realm_reset_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val); ++ ++bool kvm_rme_supports_sve(void); ++ ++int _kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); ++int _kvm_init_realm_vm(struct kvm *kvm); ++void _kvm_destroy_realm(struct kvm *kvm); ++void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits); ++int _kvm_create_rec(struct kvm_vcpu *vcpu); ++void _kvm_destroy_rec(struct kvm_vcpu *vcpu); ++ ++int _kvm_rec_enter(struct kvm_vcpu *vcpu); ++int _handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_status); ++ ++void kvm_realm_unmap_range(struct kvm *kvm, ++ unsigned long ipa, ++ unsigned long size, ++ bool unmap_private); ++int realm_map_protected(struct realm *realm, ++ unsigned long base_ipa, ++ kvm_pfn_t pfn, ++ unsigned long size, ++ struct kvm_mmu_memory_cache *memcache); ++int realm_map_non_secure(struct realm *realm, ++ unsigned long ipa, ++ kvm_pfn_t pfn, ++ unsigned long size, ++ struct kvm_mmu_memory_cache *memcache); ++int _realm_psci_complete(struct kvm_vcpu *source, ++ struct kvm_vcpu *target, ++ unsigned long status); ++ ++static inline bool kvm_realm_is_private_address(struct realm *realm, ++ unsigned long addr) ++{ ++ return !(addr & BIT(realm->ia_bits - 1)); ++} ++ ++#endif /* __ASM_KVM_RME_H */ +diff --git a/arch/arm64/include/asm/kvm_tmi.h b/arch/arm64/include/asm/kvm_tmi.h +index 967a57ae932e..973f34ab47e9 100644 +--- a/arch/arm64/include/asm/kvm_tmi.h ++++ b/arch/arm64/include/asm/kvm_tmi.h +@@ -160,8 +160,8 @@ struct tmi_tec_exit { + }; + + struct tmi_tec_run { +- struct tmi_tec_entry tec_entry; +- struct tmi_tec_exit tec_exit; ++ struct tmi_tec_entry enter; ++ struct tmi_tec_exit exit; + }; + + #define TMI_FNUM_MIN_VALUE U(0x150) +diff --git a/arch/arm64/include/asm/kvm_tmm.h b/arch/arm64/include/asm/kvm_tmm.h +index f6e773c4aa13..484940589c7c 100644 +--- a/arch/arm64/include/asm/kvm_tmm.h ++++ b/arch/arm64/include/asm/kvm_tmm.h +@@ -6,24 +6,7 @@ + #define __ASM_KVM_TMM_H + + #include +- +-/* +- * There is a conflict with the internal iova of CVM, +- * so it is necessary to offset the msi iova. +- * According to qemu file(hw/arm/virt.c), 0x0a001000 - 0x0b000000 +- * iova is not being used, so it is used as the iova range for msi +- * mapping. +- */ +-#define CVM_MSI_ORIG_IOVA 0x8000000 +-#define CVM_MSI_MIN_IOVA 0x0a001000 +-#define CVM_MSI_MAX_IOVA 0x0b000000 +-#define CVM_MSI_IOVA_OFFSET 0x1000 +- +-#define CVM_RW_8_BIT 0x8 +-#define CVM_RW_16_BIT 0x10 +-#define CVM_RW_32_BIT 0x20 +-#define CVM_RW_64_BIT 0x40 +- ++#include + enum virtcca_cvm_state { + CVM_STATE_NONE = 1, + CVM_STATE_NEW, +@@ -101,15 +84,33 @@ struct virtcca_cvm { + struct virtcca_cvm_tec { + u64 tec; + bool tec_created; +- void *tec_run; ++ KABI_REPLACE(void *tec_run, struct tmi_tec_run *run) + }; + ++#ifdef CONFIG_HISI_VIRTCCA_HOST ++/* ++ * There is a conflict with the internal iova of CVM, ++ * so it is necessary to offset the msi iova. ++ * According to qemu file(hw/arm/virt.c), 0x0a001000 - 0x0b000000 ++ * iova is not being used, so it is used as the iova range for msi ++ * mapping. ++ */ ++#define CVM_MSI_ORIG_IOVA 0x8000000 ++#define CVM_MSI_MIN_IOVA 0x0a001000 ++#define CVM_MSI_MAX_IOVA 0x0b000000 ++#define CVM_MSI_IOVA_OFFSET 0x1000 ++ ++#define CVM_RW_8_BIT 0x8 ++#define CVM_RW_16_BIT 0x10 ++#define CVM_RW_32_BIT 0x20 ++#define CVM_RW_64_BIT 0x40 ++ + struct cvm_ttt_addr { + struct list_head list; + u64 addr; + }; + +-int kvm_init_tmm(void); ++void kvm_init_tmm(void); + int kvm_cvm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); + void kvm_destroy_cvm(struct kvm *kvm); + int kvm_finalize_vcpu_tec(struct kvm_vcpu *vcpu); +@@ -118,7 +119,7 @@ int kvm_tec_enter(struct kvm_vcpu *vcpu); + int handle_cvm_exit(struct kvm_vcpu *vcpu, int rec_run_status); + int kvm_arm_create_cvm(struct kvm *kvm); + void kvm_free_rd(struct kvm *kvm); +-int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target); ++int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsigned long status); + + void kvm_cvm_unmap_destroy_range(struct kvm *kvm); + int kvm_cvm_map_range(struct kvm *kvm); +@@ -156,5 +157,6 @@ static inline unsigned long cvm_ttt_level_mapsize(int level) + + return (1UL << CVM_TTT_LEVEL_SHIFT(level)); + } ++#endif + + #endif +diff --git a/arch/arm64/include/asm/rmi_cmds.h b/arch/arm64/include/asm/rmi_cmds.h +new file mode 100644 +index 000000000000..27cd2751f3bf +--- /dev/null ++++ b/arch/arm64/include/asm/rmi_cmds.h +@@ -0,0 +1,508 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (C) 2023 ARM Ltd. ++ */ ++ ++#ifndef __ASM_RMI_CMDS_H ++#define __ASM_RMI_CMDS_H ++ ++#include ++ ++#include ++ ++struct rtt_entry { ++ unsigned long walk_level; ++ unsigned long desc; ++ int state; ++ int ripas; ++}; ++ ++/** ++ * rmi_data_create() - Create a data granule ++ * @rd: PA of the RD ++ * @data: PA of the target granule ++ * @ipa: IPA at which the granule will be mapped in the guest ++ * @src: PA of the source granule ++ * @flags: RMI_MEASURE_CONTENT if the contents should be measured ++ * ++ * Create a new data granule, copying contents from a non-secure granule. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_data_create(unsigned long rd, unsigned long data, ++ unsigned long ipa, unsigned long src, ++ unsigned long flags) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_DATA_CREATE, rd, data, ipa, src, ++ flags, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_data_create_unknown() - Create a data granule with unknown contents ++ * @rd: PA of the RD ++ * @data: PA of the target granule ++ * @ipa: IPA at which the granule will be mapped in the guest ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_data_create_unknown(unsigned long rd, ++ unsigned long data, ++ unsigned long ipa) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_DATA_CREATE_UNKNOWN, rd, data, ipa, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_data_destroy() - Destroy a data granule ++ * @rd: PA of the RD ++ * @ipa: IPA at which the granule is mapped in the guest ++ * @data_out: PA of the granule which was destroyed ++ * @top_out: Top IPA of non-live RTT entries ++ * ++ * Unmap a protected IPA from stage 2, transitioning it to DESTROYED. ++ * The IPA cannot be used by the guest unless it is transitioned to RAM again ++ * by the realm guest. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_data_destroy(unsigned long rd, unsigned long ipa, ++ unsigned long *data_out, ++ unsigned long *top_out) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_DATA_DESTROY, rd, ipa, &res); ++ ++ if (data_out) ++ *data_out = res.a1; ++ if (top_out) ++ *top_out = res.a2; ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_features() - Read feature register ++ * @index: Feature register index ++ * @out: Feature register value is written to this pointer ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_features(unsigned long index, unsigned long *out) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_FEATURES, index, &res); ++ ++ if (out) ++ *out = res.a1; ++ return res.a0; ++} ++ ++/** ++ * rmi_granule_delegate() - Delegate a granule ++ * @phys: PA of the granule ++ * ++ * Delegate a granule for use by the realm world. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_granule_delegate(unsigned long phys) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_GRANULE_DELEGATE, phys, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_granule_undelegate() - Undelegate a granule ++ * @phys: PA of the granule ++ * ++ * Undelegate a granule to allow use by the normal world. Will fail if the ++ * granule is in use. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_granule_undelegate(unsigned long phys) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_GRANULE_UNDELEGATE, phys, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_psci_complete() - Complete pending PSCI command ++ * @calling_rec: PA of the calling REC ++ * @target_rec: PA of the target REC ++ * @status: Status of the PSCI request ++ * ++ * Completes a pending PSCI command which was called with an MPIDR argument, by ++ * providing the corresponding REC. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_psci_complete(unsigned long calling_rec, ++ unsigned long target_rec, ++ unsigned long status) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_PSCI_COMPLETE, calling_rec, target_rec, ++ status, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_realm_activate() - Active a realm ++ * @rd: PA of the RD ++ * ++ * Mark a realm as Active signalling that creation is complete and allowing ++ * execution of the realm. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_realm_activate(unsigned long rd) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_REALM_ACTIVATE, rd, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_realm_create() - Create a realm ++ * @rd: PA of the RD ++ * @params: PA of realm parameters ++ * ++ * Create a new realm using the given parameters. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_realm_create(unsigned long rd, unsigned long params) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_REALM_CREATE, rd, params, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_realm_destroy() - Destroy a realm ++ * @rd: PA of the RD ++ * ++ * Destroys a realm, all objects belonging to the realm must be destroyed first. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_realm_destroy(unsigned long rd) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_REALM_DESTROY, rd, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_rec_aux_count() - Get number of auxiliary granules required ++ * @rd: PA of the RD ++ * @aux_count: Number of granules written to this pointer ++ * ++ * A REC may require extra auxiliary granules to be delegated for the RMM to ++ * store metadata (not visible to the normal world) in. This function provides ++ * the number of granules that are required. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rec_aux_count(unsigned long rd, unsigned long *aux_count) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_REC_AUX_COUNT, rd, &res); ++ ++ if (aux_count) ++ *aux_count = res.a1; ++ return res.a0; ++} ++ ++/** ++ * rmi_rec_create() - Create a REC ++ * @rd: PA of the RD ++ * @rec: PA of the target REC ++ * @params: PA of REC parameters ++ * ++ * Create a REC using the parameters specified in the struct rec_params pointed ++ * to by @params. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rec_create(unsigned long rd, unsigned long rec, ++ unsigned long params) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_REC_CREATE, rd, rec, params, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_rec_destroy() - Destroy a REC ++ * @rec: PA of the target REC ++ * ++ * Destroys a REC. The REC must not be running. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rec_destroy(unsigned long rec) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_REC_DESTROY, rec, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_rec_enter() - Enter a REC ++ * @rec: PA of the target REC ++ * @run_ptr: PA of RecRun structure ++ * ++ * Starts (or continues) execution within a REC. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rec_enter(unsigned long rec, unsigned long run_ptr) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_REC_ENTER, rec, run_ptr, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_rtt_create() - Creates an RTT ++ * @rd: PA of the RD ++ * @rtt: PA of the target RTT ++ * @ipa: Base of the IPA range described by the RTT ++ * @level: Depth of the RTT within the tree ++ * ++ * Creates an RTT (Realm Translation Table) at the specified level for the ++ * translation of the specified address within the realm. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rtt_create(unsigned long rd, unsigned long rtt, ++ unsigned long ipa, long level) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_RTT_CREATE, rd, rtt, ipa, level, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_rtt_destroy() - Destroy an RTT ++ * @rd: PA of the RD ++ * @ipa: Base of the IPA range described by the RTT ++ * @level: Depth of the RTT within the tree ++ * @out_rtt: Pointer to write the PA of the RTT which was destroyed ++ * @out_top: Pointer to write the top IPA of non-live RTT entries ++ * ++ * Destroys an RTT. The RTT must be non-live, i.e. none of the entries in the ++ * table are in ASSIGNED or TABLE state. ++ * ++ * Return: RMI return code. ++ */ ++static inline int rmi_rtt_destroy(unsigned long rd, ++ unsigned long ipa, ++ long level, ++ unsigned long *out_rtt, ++ unsigned long *out_top) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_RTT_DESTROY, rd, ipa, level, &res); ++ ++ if (out_rtt) ++ *out_rtt = res.a1; ++ if (out_top) ++ *out_top = res.a2; ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_rtt_fold() - Fold an RTT ++ * @rd: PA of the RD ++ * @ipa: Base of the IPA range described by the RTT ++ * @level: Depth of the RTT within the tree ++ * @out_rtt: Pointer to write the PA of the RTT which was destroyed ++ * ++ * Folds an RTT. If all entries with the RTT are 'homogeneous' the RTT can be ++ * folded into the parent and the RTT destroyed. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rtt_fold(unsigned long rd, unsigned long ipa, ++ long level, unsigned long *out_rtt) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_RTT_FOLD, rd, ipa, level, &res); ++ ++ if (out_rtt) ++ *out_rtt = res.a1; ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_rtt_init_ripas() - Set RIPAS for new realm ++ * @rd: PA of the RD ++ * @base: Base of target IPA region ++ * @top: Top of target IPA region ++ * @out_top: Top IPA of range whose RIPAS was modified ++ * ++ * Sets the RIPAS of a target IPA range to RAM, for a realm in the NEW state. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rtt_init_ripas(unsigned long rd, unsigned long base, ++ unsigned long top, unsigned long *out_top) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_RTT_INIT_RIPAS, rd, base, top, &res); ++ ++ if (out_top) ++ *out_top = res.a1; ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_rtt_map_unprotected() - Map NS granules into a realm ++ * @rd: PA of the RD ++ * @ipa: Base IPA of the mapping ++ * @level: Depth within the RTT tree ++ * @desc: RTTE descriptor ++ * ++ * Create a mapping from an Unprotected IPA to a Non-secure PA. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rtt_map_unprotected(unsigned long rd, ++ unsigned long ipa, ++ long level, ++ unsigned long desc) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_RTT_MAP_UNPROTECTED, rd, ipa, level, ++ desc, &res); ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_rtt_read_entry() - Read an RTTE ++ * @rd: PA of the RD ++ * @ipa: IPA for which to read the RTTE ++ * @level: RTT level at which to read the RTTE ++ * @rtt: Output structure describing the RTTE ++ * ++ * Reads a RTTE (Realm Translation Table Entry). ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rtt_read_entry(unsigned long rd, unsigned long ipa, ++ long level, struct rtt_entry *rtt) ++{ ++ struct arm_smccc_1_2_regs regs = { ++ SMC_RMI_RTT_READ_ENTRY, ++ rd, ipa, level ++ }; ++ ++ arm_smccc_1_2_smc(®s, ®s); ++ ++ rtt->walk_level = regs.a1; ++ rtt->state = regs.a2 & 0xFF; ++ rtt->desc = regs.a3; ++ rtt->ripas = regs.a4 & 0xFF; ++ ++ return regs.a0; ++} ++ ++/** ++ * rmi_rtt_set_ripas() - Set RIPAS for an running realm ++ * @rd: PA of the RD ++ * @rec: PA of the REC making the request ++ * @base: Base of target IPA region ++ * @top: Top of target IPA region ++ * @out_top: Pointer to write top IPA of range whose RIPAS was modified ++ * ++ * Completes a request made by the realm to change the RIPAS of a target IPA ++ * range. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rtt_set_ripas(unsigned long rd, unsigned long rec, ++ unsigned long base, unsigned long top, ++ unsigned long *out_top) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_RTT_SET_RIPAS, rd, rec, base, top, &res); ++ ++ if (out_top) ++ *out_top = res.a1; ++ ++ return res.a0; ++} ++ ++/** ++ * rmi_rtt_unmap_unprotected() - Remove a NS mapping ++ * @rd: PA of the RD ++ * @ipa: Base IPA of the mapping ++ * @level: Depth within the RTT tree ++ * @out_top: Pointer to write top IPA of non-live RTT entries ++ * ++ * Removes a mapping at an Unprotected IPA. ++ * ++ * Return: RMI return code ++ */ ++static inline int rmi_rtt_unmap_unprotected(unsigned long rd, ++ unsigned long ipa, ++ long level, ++ unsigned long *out_top) ++{ ++ struct arm_smccc_res res; ++ ++ arm_smccc_1_1_invoke(SMC_RMI_RTT_UNMAP_UNPROTECTED, rd, ipa, ++ level, &res); ++ ++ if (out_top) ++ *out_top = res.a1; ++ ++ return res.a0; ++} ++ ++#endif /* __ASM_RMI_CMDS_H */ +diff --git a/arch/arm64/include/asm/rmi_smc.h b/arch/arm64/include/asm/rmi_smc.h +new file mode 100644 +index 000000000000..7a93a3e0ac6e +--- /dev/null ++++ b/arch/arm64/include/asm/rmi_smc.h +@@ -0,0 +1,259 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (C) 2023-2024 ARM Ltd. ++ * ++ * The values and structures in this file are from the Realm Management Monitor ++ * specification (DEN0137) version 1.0-rel0: ++ * https://developer.arm.com/documentation/den0137/1-0rel0/ ++ */ ++ ++#ifndef __ASM_RMI_SMC_H ++#define __ASM_RMI_SMC_H ++ ++#include ++ ++#define SMC_RMI_CALL(func) \ ++ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ++ ARM_SMCCC_SMC_64, \ ++ ARM_SMCCC_OWNER_STANDARD, \ ++ (func)) ++ ++#define SMC_RMI_VERSION SMC_RMI_CALL(0x0150) ++#define SMC_RMI_GRANULE_DELEGATE SMC_RMI_CALL(0x0151) ++#define SMC_RMI_GRANULE_UNDELEGATE SMC_RMI_CALL(0x0152) ++#define SMC_RMI_DATA_CREATE SMC_RMI_CALL(0x0153) ++#define SMC_RMI_DATA_CREATE_UNKNOWN SMC_RMI_CALL(0x0154) ++#define SMC_RMI_DATA_DESTROY SMC_RMI_CALL(0x0155) ++ ++#define SMC_RMI_REALM_ACTIVATE SMC_RMI_CALL(0x0157) ++#define SMC_RMI_REALM_CREATE SMC_RMI_CALL(0x0158) ++#define SMC_RMI_REALM_DESTROY SMC_RMI_CALL(0x0159) ++#define SMC_RMI_REC_CREATE SMC_RMI_CALL(0x015a) ++#define SMC_RMI_REC_DESTROY SMC_RMI_CALL(0x015b) ++#define SMC_RMI_REC_ENTER SMC_RMI_CALL(0x015c) ++#define SMC_RMI_RTT_CREATE SMC_RMI_CALL(0x015d) ++#define SMC_RMI_RTT_DESTROY SMC_RMI_CALL(0x015e) ++#define SMC_RMI_RTT_MAP_UNPROTECTED SMC_RMI_CALL(0x015f) ++ ++#define SMC_RMI_RTT_READ_ENTRY SMC_RMI_CALL(0x0161) ++#define SMC_RMI_RTT_UNMAP_UNPROTECTED SMC_RMI_CALL(0x0162) ++ ++#define SMC_RMI_PSCI_COMPLETE SMC_RMI_CALL(0x0164) ++#define SMC_RMI_FEATURES SMC_RMI_CALL(0x0165) ++#define SMC_RMI_RTT_FOLD SMC_RMI_CALL(0x0166) ++#define SMC_RMI_REC_AUX_COUNT SMC_RMI_CALL(0x0167) ++#define SMC_RMI_RTT_INIT_RIPAS SMC_RMI_CALL(0x0168) ++#define SMC_RMI_RTT_SET_RIPAS SMC_RMI_CALL(0x0169) ++ ++#define RMI_ABI_MAJOR_VERSION 1 ++#define RMI_ABI_MINOR_VERSION 0 ++ ++#define RMI_ABI_VERSION_GET_MAJOR(version) ((version) >> 16) ++#define RMI_ABI_VERSION_GET_MINOR(version) ((version) & 0xFFFF) ++#define RMI_ABI_VERSION(major, minor) (((major) << 16) | (minor)) ++ ++#define RMI_UNASSIGNED 0 ++#define RMI_ASSIGNED 1 ++#define RMI_TABLE 2 ++ ++#define RMI_RETURN_STATUS(ret) ((ret) & 0xFF) ++#define RMI_RETURN_INDEX(ret) (((ret) >> 8) & 0xFF) ++ ++#define RMI_SUCCESS 0 ++#define RMI_ERROR_INPUT 1 ++#define RMI_ERROR_REALM 2 ++#define RMI_ERROR_REC 3 ++#define RMI_ERROR_RTT 4 ++ ++enum rmi_ripas { ++ RMI_EMPTY = 0, ++ RMI_RAM = 1, ++ RMI_DESTROYED = 2, ++}; ++ ++#define RMI_NO_MEASURE_CONTENT 0 ++#define RMI_MEASURE_CONTENT 1 ++ ++#define RMI_FEATURE_REGISTER_0_S2SZ GENMASK(7, 0) ++#define RMI_FEATURE_REGISTER_0_LPA2 BIT(8) ++#define RMI_FEATURE_REGISTER_0_SVE_EN BIT(9) ++#define RMI_FEATURE_REGISTER_0_SVE_VL GENMASK(13, 10) ++#define RMI_FEATURE_REGISTER_0_NUM_BPS GENMASK(19, 14) ++#define RMI_FEATURE_REGISTER_0_NUM_WPS GENMASK(25, 20) ++#define RMI_FEATURE_REGISTER_0_PMU_EN BIT(26) ++#define RMI_FEATURE_REGISTER_0_PMU_NUM_CTRS GENMASK(31, 27) ++#define RMI_FEATURE_REGISTER_0_HASH_SHA_256 BIT(32) ++#define RMI_FEATURE_REGISTER_0_HASH_SHA_512 BIT(33) ++#define RMI_FEATURE_REGISTER_0_GICV3_NUM_LRS GENMASK(37, 34) ++#define RMI_FEATURE_REGISTER_0_MAX_RECS_ORDER GENMASK(41, 38) ++#define RMI_FEATURE_REGISTER_0_Reserved GENMASK(63, 42) ++ ++#define RMI_REALM_PARAM_FLAG_LPA2 BIT(0) ++#define RMI_REALM_PARAM_FLAG_SVE BIT(1) ++#define RMI_REALM_PARAM_FLAG_PMU BIT(2) ++ ++/* ++ * Note many of these fields are smaller than u64 but all fields have u64 ++ * alignment, so use u64 to ensure correct alignment. ++ */ ++struct realm_params { ++ union { /* 0x0 */ ++ struct { ++ u64 flags; ++ u64 s2sz; ++ u64 sve_vl; ++ u64 num_bps; ++ u64 num_wps; ++ u64 pmu_num_ctrs; ++ u64 hash_algo; ++ }; ++ u8 padding0[0x400]; ++ }; ++ union { /* 0x400 */ ++ u8 rpv[64]; ++ u8 padding1[0x400]; ++ }; ++ union { /* 0x800 */ ++ struct { ++ u64 vmid; ++ u64 rtt_base; ++ s64 rtt_level_start; ++ u64 rtt_num_start; ++ }; ++ u8 padding2[0x800]; ++ }; ++}; ++ ++/* ++ * The number of GPRs (starting from X0) that are ++ * configured by the host when a REC is created. ++ */ ++#define REC_CREATE_NR_GPRS 8 ++ ++#define REC_PARAMS_FLAG_RUNNABLE BIT_ULL(0) ++ ++#define REC_PARAMS_AUX_GRANULES 16 ++ ++struct rec_params { ++ union { /* 0x0 */ ++ u64 flags; ++ u8 padding0[0x100]; ++ }; ++ union { /* 0x100 */ ++ u64 mpidr; ++ u8 padding1[0x100]; ++ }; ++ union { /* 0x200 */ ++ u64 pc; ++ u8 padding2[0x100]; ++ }; ++ union { /* 0x300 */ ++ u64 gprs[REC_CREATE_NR_GPRS]; ++ u8 padding3[0x500]; ++ }; ++ union { /* 0x800 */ ++ struct { ++ u64 num_rec_aux; ++ u64 aux[REC_PARAMS_AUX_GRANULES]; ++ }; ++ u8 padding4[0x800]; ++ }; ++}; ++ ++#define REC_ENTER_FLAG_EMULATED_MMIO BIT(0) ++#define REC_ENTER_FLAG_INJECT_SEA BIT(1) ++#define REC_ENTER_FLAG_TRAP_WFI BIT(2) ++#define REC_ENTER_FLAG_TRAP_WFE BIT(3) ++#define REC_ENTER_FLAG_RIPAS_RESPONSE BIT(4) ++ ++#define REC_RUN_GPRS 31 ++#define REC_MAX_GIC_NUM_LRS 16 ++ ++struct rec_enter { ++ union { /* 0x000 */ ++ u64 flags; ++ u8 padding0[0x200]; ++ }; ++ union { /* 0x200 */ ++ u64 gprs[REC_RUN_GPRS]; ++ u8 padding1[0x100]; ++ }; ++ union { /* 0x300 */ ++ struct { ++ u64 gicv3_hcr; ++ u64 gicv3_lrs[REC_MAX_GIC_NUM_LRS]; ++ }; ++ u8 padding2[0x100]; ++ }; ++ u8 padding3[0x400]; ++}; ++ ++#define RMI_EXIT_SYNC 0x00 ++#define RMI_EXIT_IRQ 0x01 ++#define RMI_EXIT_FIQ 0x02 ++#define RMI_EXIT_PSCI 0x03 ++#define RMI_EXIT_RIPAS_CHANGE 0x04 ++#define RMI_EXIT_HOST_CALL 0x05 ++#define RMI_EXIT_SERROR 0x06 ++ ++struct rec_exit { ++ union { /* 0x000 */ ++ u8 exit_reason; ++ u8 padding0[0x100]; ++ }; ++ union { /* 0x100 */ ++ struct { ++ u64 esr; ++ u64 far; ++ u64 hpfar; ++ }; ++ u8 padding1[0x100]; ++ }; ++ union { /* 0x200 */ ++ u64 gprs[REC_RUN_GPRS]; ++ u8 padding2[0x100]; ++ }; ++ union { /* 0x300 */ ++ struct { ++ u64 gicv3_hcr; ++ u64 gicv3_lrs[REC_MAX_GIC_NUM_LRS]; ++ u64 gicv3_misr; ++ u64 gicv3_vmcr; ++ }; ++ u8 padding3[0x100]; ++ }; ++ union { /* 0x400 */ ++ struct { ++ u64 cntp_ctl; ++ u64 cntp_cval; ++ u64 cntv_ctl; ++ u64 cntv_cval; ++ }; ++ u8 padding4[0x100]; ++ }; ++ union { /* 0x500 */ ++ struct { ++ u64 ripas_base; ++ u64 ripas_top; ++ u64 ripas_value; ++ }; ++ u8 padding5[0x100]; ++ }; ++ union { /* 0x600 */ ++ u16 imm; ++ u8 padding6[0x100]; ++ }; ++ union { /* 0x700 */ ++ struct { ++ u8 pmu_ovf_status; ++ }; ++ u8 padding7[0x100]; ++ }; ++}; ++ ++struct rec_run { ++ struct rec_enter enter; ++ struct rec_exit exit; ++}; ++ ++#endif /* __ASM_RMI_SMC_H */ +diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h +index 261d6e9df2e1..12cf36c38189 100644 +--- a/arch/arm64/include/asm/virt.h ++++ b/arch/arm64/include/asm/virt.h +@@ -81,6 +81,7 @@ void __hyp_reset_vectors(void); + bool is_kvm_arm_initialised(void); + + DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); ++DECLARE_STATIC_KEY_FALSE(kvm_rme_is_available); + + /* Reports the availability of HYP mode */ + static inline bool is_hyp_mode_available(void) +diff --git a/arch/arm64/include/asm/virtcca_cvm_host.h b/arch/arm64/include/asm/virtcca_cvm_host.h +index c379ad17cffe..3d55928874ad 100644 +--- a/arch/arm64/include/asm/virtcca_cvm_host.h ++++ b/arch/arm64/include/asm/virtcca_cvm_host.h +@@ -4,6 +4,7 @@ + */ + #ifndef __VIRTCCA_CVM_HOST_H + #define __VIRTCCA_CVM_HOST_H ++#include + + #ifdef CONFIG_HISI_VIRTCCA_HOST + +@@ -11,6 +12,7 @@ + #define UEFI_SIZE 0x8000000 + + bool is_virtcca_cvm_enable(void); ++void set_cca_cvm_type(int type); + + #else + +@@ -19,5 +21,7 @@ static inline bool is_virtcca_cvm_enable(void) + return false; + } + +-#endif /* CONFIG_HISI_VIRTCCA_GUEST */ +-#endif /* __VIRTCCA_CVM_GUEST_H */ ++static inline void set_cca_cvm_type(int type) {} ++ ++#endif /* CONFIG_HISI_VIRTCCA_HOST */ ++#endif /* __VIRTCCA_CVM_HOST_H */ +diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h +index 196231712b10..47eea5041899 100644 +--- a/arch/arm64/include/uapi/asm/kvm.h ++++ b/arch/arm64/include/uapi/asm/kvm.h +@@ -110,7 +110,7 @@ struct kvm_regs { + #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ + #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ + #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +-#define KVM_ARM_VCPU_TEC 8 /* VCPU TEC state as part of cvm */ ++#define KVM_ARM_VCPU_REC 8 /* VCPU REC state as part of Realm */ + + struct kvm_vcpu_init { + __u32 target; +@@ -424,6 +424,54 @@ enum { + #define KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256 0 + #define KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512 1 + ++/* KVM_CAP_ARM_RME on VM fd */ ++#define KVM_CAP_ARM_RME_CONFIG_REALM 0 ++#define KVM_CAP_ARM_RME_CREATE_REALM 1 ++#define KVM_CAP_ARM_RME_INIT_RIPAS_REALM 2 ++#define KVM_CAP_ARM_RME_POPULATE_REALM 3 ++#define KVM_CAP_ARM_RME_ACTIVATE_REALM 4 ++ ++/* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ ++#define ARM_RME_CONFIG_RPV 0 ++#define ARM_RME_CONFIG_HASH_ALGO 1 ++ ++#define ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256 0 ++#define ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512 1 ++ ++#define ARM_RME_CONFIG_RPV_SIZE 64 ++ ++struct arm_rme_config { ++ __u32 cfg; ++ union { ++ /* cfg == ARM_RME_CONFIG_RPV */ ++ struct { ++ __u8 rpv[ARM_RME_CONFIG_RPV_SIZE]; ++ }; ++ ++ /* cfg == ARM_RME_CONFIG_HASH_ALGO */ ++ struct { ++ __u32 hash_algo; ++ }; ++ ++ /* Fix the size of the union */ ++ __u8 reserved[256]; ++ }; ++}; ++ ++#define KVM_ARM_RME_POPULATE_FLAGS_MEASURE (1 << 0) ++struct arm_rme_populate_realm { ++ __u64 base; ++ __u64 size; ++ __u32 flags; ++ __u32 reserved[3]; ++}; ++ ++struct arm_rme_init_ripas { ++ __u64 base; ++ __u64 size; ++ __u64 reserved[2]; ++}; ++ + /* Device Control API on vcpu fd */ + #define KVM_ARM_VCPU_PMU_V3_CTRL 0 + #define KVM_ARM_VCPU_PMU_V3_IRQ 0 +diff --git a/arch/arm64/kernel/virtcca_cvm_host.c b/arch/arm64/kernel/virtcca_cvm_host.c +index 4522782b1342..c70c90542427 100644 +--- a/arch/arm64/kernel/virtcca_cvm_host.c ++++ b/arch/arm64/kernel/virtcca_cvm_host.c +@@ -28,8 +28,10 @@ static int __init setup_virtcca_cvm_host(char *str) + if (ret) { + pr_warn("Unable to parse cvm_guest.\n"); + } else { +- if (val) ++ if (val) { + static_branch_enable(&virtcca_cvm_is_enable); ++ set_cca_cvm_type(VIRTCCA_CVM); ++ } + } + return ret; + } +diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile +index eadf41417ffa..f48a34d5b0bc 100644 +--- a/arch/arm64/kvm/Makefile ++++ b/arch/arm64/kvm/Makefile +@@ -20,7 +20,8 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o pvsched.o \ + vgic/vgic-v3.o vgic/vgic-v4.o \ + vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ + vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ +- vgic/vgic-its.o vgic/vgic-debug.o ++ vgic/vgic-its.o vgic/vgic-debug.o \ ++ rme.o rme-exit.o cca_base.o + + kvm-$(CONFIG_VIRT_PLAT_DEV) += vgic/shadow_dev.o + kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o +diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c +index 27032290094d..7d7276712c08 100644 +--- a/arch/arm64/kvm/arch_timer.c ++++ b/arch/arm64/kvm/arch_timer.c +@@ -243,10 +243,13 @@ static inline void cvm_vcpu_put_timer_callback(struct kvm_vcpu *vcpu) + + static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) + { +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (kvm_is_virtcca_cvm(ctxt->vcpu->kvm)) ++ struct kvm_vcpu *vcpu = ctxt->vcpu; ++ ++ if (kvm_is_realm(vcpu->kvm)) { ++ WARN_ON(offset); + return; +-#endif ++ } ++ + if (!ctxt->offset.vm_offset) { + WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); + return; +@@ -545,6 +548,21 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, + } + } + ++void kvm_realm_timers_update(struct kvm_vcpu *vcpu) ++{ ++ struct arch_timer_cpu *arch_timer = &vcpu->arch.timer_cpu; ++ int i; ++ ++ for (i = 0; i < NR_KVM_EL0_TIMERS; i++) { ++ struct arch_timer_context *timer = &arch_timer->timers[i]; ++ bool status = timer_get_ctl(timer) & ARCH_TIMER_CTRL_IT_STAT; ++ bool level = kvm_timer_irq_can_fire(timer) && status; ++ ++ if (level != timer->irq.level) ++ kvm_timer_update_irq(vcpu, level, timer); ++ } ++} ++ + /* Only called for a fully emulated timer */ + static void timer_emulate(struct arch_timer_context *ctx) + { +@@ -960,6 +978,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) + if (unlikely(!timer->enabled)) + return; + ++ kvm_timer_unblocking(vcpu); ++ + get_timer_map(vcpu, &map); + + #ifdef CONFIG_VIRT_VTIMER_IRQ_BYPASS +@@ -988,7 +1008,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) + if (static_branch_likely(&has_gic_active_state) && map.direct_ptimer) + kvm_timer_vcpu_load_gic(map.direct_ptimer); + #endif +- kvm_timer_unblocking(vcpu); + + timer_restore_state(map.direct_vtimer); + +@@ -1201,7 +1220,9 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) + + ctxt->vcpu = vcpu; + +- if (timerid == TIMER_VTIMER) ++ if (kvm_is_realm(vcpu->kvm)) ++ ctxt->offset.vm_offset = NULL; ++ else if (timerid == TIMER_VTIMER) + ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; + else + ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; +@@ -1224,13 +1245,19 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) + void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) + { + struct arch_timer_cpu *timer = vcpu_timer(vcpu); ++ u64 cntvoff; + + for (int i = 0; i < NR_KVM_TIMERS; i++) + timer_context_init(vcpu, i); + ++ if (kvm_is_realm(vcpu->kvm)) ++ cntvoff = 0; ++ else ++ cntvoff = kvm_phys_timer_read(); ++ + /* Synchronize offsets across timers of a VM if not already provided */ + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { +- timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); ++ timer_set_offset(vcpu_vtimer(vcpu), cntvoff); + timer_set_offset(vcpu_ptimer(vcpu), 0); + } + +@@ -1856,6 +1883,13 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) + return 0; + #endif + ++ /* ++ * We don't use mapped IRQs for Realms because the RMI doesn't allow ++ * us setting the LR.HW bit in the VGIC. ++ */ ++ if (vcpu_is_rec(vcpu)) ++ return 0; ++ + get_timer_map(vcpu, &map); + + #ifdef CONFIG_VIRT_VTIMER_IRQ_BYPASS +@@ -1994,6 +2028,9 @@ int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, + if (offset->reserved) + return -EINVAL; + ++ if (kvm_is_realm(kvm)) ++ return -EINVAL; ++ + mutex_lock(&kvm->lock); + + if (lock_all_vcpus(kvm)) { +diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c +index 3100a87944d5..4a9cb1d8070a 100644 +--- a/arch/arm64/kvm/arm.c ++++ b/arch/arm64/kvm/arm.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -39,11 +40,9 @@ + #include + #include + #include ++#include + #include + #include +-#include +-#include +- + #include + #include + #include +@@ -52,6 +51,8 @@ static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; + + #include "hisilicon/hisi_virt.h" + ++DEFINE_STATIC_KEY_FALSE(kvm_rme_is_available); ++ + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); + + DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); +@@ -250,13 +251,6 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + } + mutex_unlock(&kvm->slots_lock); + break; +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- case KVM_CAP_ARM_TMM: +- r = 0; +- if (static_branch_unlikely(&virtcca_cvm_is_available)) +- r = kvm_cvm_enable_cap(kvm, cap); +- break; +-#endif + #ifdef CONFIG_ARM64_HDBSS + case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: + r = kvm_cap_arm_enable_hdbss(kvm, cap); +@@ -267,6 +261,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + r = kvm_hisi_ipiv_enable_cap(kvm, cap); + break; + #endif ++ case KVM_CAP_ARM_RME: ++ mutex_lock(&kvm->lock); ++ r = kvm_realm_enable_cap(kvm, cap); ++ mutex_unlock(&kvm->lock); ++ break; + default: + r = -EINVAL; + break; +@@ -288,14 +287,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) + { + int ret; + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (kvm_arm_cvm_type(type)) { +- ret = kvm_enable_virtcca_cvm(kvm); +- if (ret) +- return ret; +- } +-#endif +- + ret = kvm_sched_affinity_vm_init(kvm); + if (ret) + return ret; +@@ -310,6 +301,21 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) + mutex_unlock(&kvm->lock); + #endif + ++ if (type & ~(KVM_VM_TYPE_ARM_MASK | KVM_VM_TYPE_ARM_IPA_SIZE_MASK)) ++ return -EINVAL; ++ ++ switch (type & KVM_VM_TYPE_ARM_MASK) { ++ case KVM_VM_TYPE_ARM_NORMAL: ++ break; ++ case KVM_VM_TYPE_ARM_REALM: ++ if (!static_branch_unlikely(&kvm_rme_is_available)) ++ return -EPERM; ++ kvm->arch.is_realm = true; ++ break; ++ default: ++ return -EINVAL; ++ } ++ + ret = kvm_share_hyp(kvm, kvm + 1); + if (ret) + return ret; +@@ -342,20 +348,15 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) + + bitmap_zero(kvm->arch.vcpu_features, KVM_VCPU_MAX_FEATURES); + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (kvm_arm_cvm_type(type)) { +- ret = kvm_init_cvm_vm(kvm); ++ /* Initialise the realm bits after the generic bits are enabled */ ++ if (kvm_is_realm(kvm)) { ++ ret = kvm_init_realm_vm(kvm); + if (ret) +- goto out_free_stage2_pgd; ++ goto err_free_cpumask; + } +-#endif + + return 0; + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +-out_free_stage2_pgd: +- kvm_free_stage2_pgd(&kvm->arch.mmu); +-#endif + err_free_cpumask: + free_cpumask_var(kvm->arch.supported_cpus); + err_unshare_kvm: +@@ -390,10 +391,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) + kvm_unshare_hyp(kvm, kvm + 1); + + kvm_arm_teardown_hypercalls(kvm); +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (kvm_is_virtcca_cvm(kvm)) +- kvm_destroy_cvm(kvm); +-#endif ++ kvm_destroy_realm(kvm); + } + + #ifdef CONFIG_ARM64_HISI_IPIV +@@ -415,21 +413,23 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) + case KVM_CAP_ONE_REG: + case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: +- case KVM_CAP_READONLY_MEM: + case KVM_CAP_MP_STATE: + case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_VCPU_EVENTS: + case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: + case KVM_CAP_ARM_NISV_TO_USER: + case KVM_CAP_ARM_INJECT_EXT_DABT: +- case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_VCPU_ATTRIBUTES: + case KVM_CAP_PTP_KVM: + case KVM_CAP_ARM_SYSTEM_SUSPEND: + case KVM_CAP_IRQFD_RESAMPLE: +- case KVM_CAP_COUNTER_OFFSET: + r = 1; + break; ++ case KVM_CAP_COUNTER_OFFSET: ++ case KVM_CAP_READONLY_MEM: ++ case KVM_CAP_SET_GUEST_DEBUG: ++ r = !kvm_is_realm(kvm); ++ break; + case KVM_CAP_SET_GUEST_DEBUG2: + return KVM_GUESTDBG_VALID_MASK; + case KVM_CAP_ARM_SET_DEVICE_ADDR: +@@ -469,21 +469,19 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) + r = system_supports_mte(); + break; + case KVM_CAP_STEAL_TIME: +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (kvm && kvm_is_virtcca_cvm(kvm)) ++ if (kvm_is_realm(kvm)) + r = 0; + else +-#endif + r = kvm_arm_pvtime_supported(); + break; + case KVM_CAP_ARM_EL1_32BIT: + r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); + break; + case KVM_CAP_GUEST_DEBUG_HW_BPS: +- r = get_num_brps(); ++ r = kvm_is_realm(kvm) ? 0 : get_num_brps(); + break; + case KVM_CAP_GUEST_DEBUG_HW_WPS: +- r = get_num_wrps(); ++ r = kvm_is_realm(kvm) ? 0 : get_num_wrps(); + break; + case KVM_CAP_ARM_PMU_V3: + r = kvm_arm_support_pmu_v3(); +@@ -495,7 +493,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) + r = get_kvm_ipa_limit(); + break; + case KVM_CAP_ARM_SVE: +- r = system_supports_sve(); ++ if (kvm_is_realm(kvm)) ++ r = kvm_rme_supports_sve(); ++ else ++ r = system_supports_sve(); + break; + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + case KVM_CAP_ARM_PTRAUTH_GENERIC: +@@ -515,15 +516,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) + r = sdev_enable; + break; + #endif +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- case KVM_CAP_ARM_TMM: +- if (!is_armv8_4_sel2_present()) { +- r = -ENXIO; +- break; +- } +- r = static_key_enabled(&virtcca_cvm_is_available); +- break; +-#endif + #ifdef CONFIG_ARM64_HDBSS + case KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: + r = system_supports_hdbss(); +@@ -537,6 +529,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) + r = 0; + break; + #endif ++ case KVM_CAP_ARM_RME: ++ r = static_key_enabled(&kvm_rme_is_available); ++ break; + default: + r = 0; + } +@@ -571,6 +566,18 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) + return 0; + } + ++int kvm_arch_rec_init(struct kvm_vcpu_arch *vcpu_arch) ++{ ++ struct realm_rec *rec; ++ ++ rec = kzalloc(sizeof(struct realm_rec), GFP_KERNEL_ACCOUNT); ++ if (!rec) ++ return -ENOMEM; ++ rec->mpidr = INVALID_HWID; ++ vcpu_arch->rec = rec; ++ return 0; ++} ++ + int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) + { + int err; +@@ -610,11 +617,26 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) + + err = kvm_sched_affinity_vcpu_init(vcpu); + if (err) +- return err; ++ goto vgic_vcpu_destroy; + + err = kvm_share_hyp(vcpu, vcpu + 1); + if (err) +- kvm_vgic_vcpu_destroy(vcpu); ++ goto sched_affinity_vcpu_destroy; ++ ++ err = kvm_arch_rec_init(&vcpu->arch); ++ if (err) ++ goto unshare_hyp; ++ ++ return err; ++ ++unshare_hyp: ++ kvm_unshare_hyp(vcpu, vcpu + 1); ++ ++sched_affinity_vcpu_destroy: ++ kvm_sched_affinity_vcpu_destroy(vcpu); ++ ++vgic_vcpu_destroy: ++ kvm_vgic_vcpu_destroy(vcpu); + + return err; + } +@@ -684,27 +706,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) + + vcpu->cpu = cpu; + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) { +- if (single_task_running()) +- vcpu_clear_wfx_traps(vcpu); +- else +- vcpu_set_wfx_traps(vcpu); +- } +-#endif + kvm_vgic_load(vcpu); + kvm_timer_vcpu_load(vcpu); +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) { +- if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) +- kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); +- return; +- } +-#endif +- if (has_vhe()) +- kvm_vcpu_load_sysregs_vhe(vcpu); +- kvm_arch_vcpu_load_fp(vcpu); +- kvm_vcpu_pmu_restore_guest(vcpu); + if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); + +@@ -720,6 +723,15 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) + vcpu_ptrauth_disable(vcpu); + kvm_arch_vcpu_load_debug_state_flags(vcpu); + ++ /* No additional state needs to be loaded on Realmed VMs */ ++ if (vcpu_is_rec(vcpu)) ++ return; ++ ++ if (has_vhe()) ++ kvm_vcpu_load_sysregs_vhe(vcpu); ++ kvm_arch_vcpu_load_fp(vcpu); ++ kvm_vcpu_pmu_restore_guest(vcpu); ++ + if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) + vcpu_set_on_unsupported_cpu(vcpu); + +@@ -731,23 +743,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) + + void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) + { +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) { +- kvm_cvm_vcpu_put(vcpu); ++ kvm_timer_vcpu_put(vcpu); ++ kvm_vgic_put(vcpu); ++ ++ vcpu->cpu = -1; ++ ++ if (vcpu_is_rec(vcpu)) + return; +- } +-#endif ++ + kvm_arch_vcpu_put_debug_state_flags(vcpu); + kvm_arch_vcpu_put_fp(vcpu); + if (has_vhe()) + kvm_vcpu_put_sysregs_vhe(vcpu); +- kvm_timer_vcpu_put(vcpu); +- kvm_vgic_put(vcpu); ++ + kvm_vcpu_pmu_restore_host(vcpu); + kvm_arm_vmid_clear_active(); + + vcpu_clear_on_unsupported_cpu(vcpu); +- vcpu->cpu = -1; + + kvm_tlbi_dvmbm_vcpu_put(vcpu); + +@@ -914,6 +926,11 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) + if (kvm_vm_is_protected(kvm)) + kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); + ++ if (!irqchip_in_kernel(kvm) && kvm_is_realm(vcpu->kvm)) { ++ /* Userspace irqchip not yet supported with Realms */ ++ return -EOPNOTSUPP; ++ } ++ + mutex_lock(&kvm->arch.config_lock); + set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags); + mutex_unlock(&kvm->arch.config_lock); +@@ -1166,18 +1183,6 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) + xfer_to_guest_mode_work_pending(); + } + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +-static inline void update_pmu_phys_irq(struct kvm_vcpu *vcpu, bool *pmu_stopped) +-{ +- struct kvm_pmu *pmu = &vcpu->arch.pmu; +- +- if (pmu->irq_level) { +- *pmu_stopped = true; +- arm_pmu_set_phys_irq(false); +- } +-} +-#endif +- + /* + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while + * the vCPU is running. +@@ -1230,9 +1235,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) + run->exit_reason = KVM_EXIT_UNKNOWN; + run->flags = 0; + while (ret > 0) { +-#ifdef CONFIG_HISI_VIRTCCA_HOST + bool pmu_stopped = false; +-#endif ++ + /* + * Check conditions before entering the guest + */ +@@ -1260,10 +1264,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) + kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid); + + kvm_pmu_flush_hwstate(vcpu); +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) +- update_pmu_phys_irq(vcpu, &pmu_stopped); +-#endif ++ ++ if (vcpu_is_rec(vcpu) && kvm_pmu_get_irq_level(vcpu)) { ++ pmu_stopped = true; ++ arm_pmu_set_phys_irq(false); ++ } + + local_irq_disable(); + +@@ -1302,11 +1307,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) + trace_kvm_entry(*vcpu_pc(vcpu)); + guest_timing_enter_irqoff(); + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) +- ret = kvm_tec_enter(vcpu); ++ if (vcpu_is_rec(vcpu)) ++ ret = kvm_rec_enter(vcpu); + else +-#endif + ret = kvm_arm_vcpu_enter_exit(vcpu); + + vcpu->mode = OUTSIDE_GUEST_MODE; +@@ -1361,23 +1364,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) + + local_irq_enable(); + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (!vcpu_is_tec(vcpu)) { +-#endif +- trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); ++ /* Exit types that need handling before we can be preempted */ ++ if (!vcpu_is_rec(vcpu)) { ++ trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), ++ *vcpu_pc(vcpu)); + +- /* Exit types that need handling before we can be preempted */ + handle_exit_early(vcpu, ret); +- +-#ifdef CONFIG_HISI_VIRTCCA_HOST + } +-#endif ++ + preempt_enable(); + +-#ifdef CONFIG_HISI_VIRTCCA_HOST + if (pmu_stopped) + arm_pmu_set_phys_irq(true); +-#endif ++ + /* + * The ARMv8 architecture doesn't give the hypervisor + * a mechanism to prevent a guest from dropping to AArch32 EL0 +@@ -1397,11 +1396,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) + ret = ARM_EXCEPTION_IL; + } + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) +- ret = handle_cvm_exit(vcpu, ret); ++ if (vcpu_is_rec(vcpu)) ++ ret = handle_rec_exit(vcpu, ret); + else +-#endif + ret = handle_exit(vcpu, ret); + #ifdef CONFIG_ARCH_VCPU_STAT + update_vcpu_stat_time(&vcpu->stat); +@@ -1526,6 +1523,19 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, + return -EINVAL; + } + ++static unsigned long system_supported_vcpu_features(struct kvm *kvm) ++{ ++ unsigned long features = KVM_VCPU_VALID_FEATURES; ++ ++ if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1)) ++ clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features); ++ ++ if (!kvm_is_realm(kvm)) ++ clear_bit(KVM_ARM_VCPU_REC, &features); ++ ++ return features; ++} ++ + static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) + { +@@ -1540,12 +1550,12 @@ static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, + return -ENOENT; + } + ++ if (features & ~system_supported_vcpu_features(vcpu->kvm)) ++ return -EINVAL; ++ + if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features)) + return 0; + +- if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) +- return -EINVAL; +- + /* MTE is incompatible with AArch32 */ + if (kvm_has_mte(vcpu->kvm)) + return -EINVAL; +@@ -1564,6 +1574,10 @@ static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, + } + #endif + ++ /* RME is incompatible with AArch32 */ ++ if (test_bit(KVM_ARM_VCPU_REC, &features)) ++ return -EINVAL; ++ + return 0; + } + +@@ -1751,6 +1765,22 @@ static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + return __kvm_arm_vcpu_set_events(vcpu, events); + } + ++static int kvm_arm_vcpu_rmm_psci_complete(struct kvm_vcpu *vcpu, ++ struct kvm_arm_rmm_psci_complete *arg) ++{ ++ struct kvm_vcpu *target = kvm_mpidr_to_vcpu(vcpu->kvm, arg->target_mpidr); ++ ++ if (!target) ++ return -EINVAL; ++ ++ /* ++ * RMM v1.0 only supports PSCI_RET_SUCCESS or PSCI_RET_DENIED ++ * for the status. But, let us leave it to the RMM to filter ++ * for making this future proof. ++ */ ++ return realm_psci_complete(vcpu, target, arg->psci_status); ++} ++ + long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) + { +@@ -1805,10 +1835,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, + if (unlikely(!kvm_vcpu_initialized(vcpu))) + break; + +- r = -EPERM; +- if (!kvm_arm_vcpu_is_finalized(vcpu)) +- break; +- + r = -EFAULT; + if (copy_from_user(®_list, user_list, sizeof(reg_list))) + break; +@@ -1873,6 +1899,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, + + return kvm_arm_vcpu_finalize(vcpu, what); + } ++ case KVM_ARM_VCPU_RMM_PSCI_COMPLETE: { ++ struct kvm_arm_rmm_psci_complete req; ++ ++ if (!vcpu_is_rec(vcpu)) ++ return -EPERM; ++ if (copy_from_user(&req, argp, sizeof(req))) ++ return -EFAULT; ++ return kvm_arm_vcpu_rmm_psci_complete(vcpu, &req); ++ } + default: + r = -EINVAL; + } +@@ -2828,13 +2863,8 @@ static __init int kvm_arm_init(void) + + in_hyp_mode = is_kernel_in_hyp_mode(); + +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (is_virtcca_cvm_enable() && in_hyp_mode) { +- err = kvm_init_tmm(); +- if (err) +- return err; +- } +-#endif ++ if (in_hyp_mode) ++ kvm_init_rme(); + + if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || + cpus_have_final_cap(ARM64_WORKAROUND_1508412)) +diff --git a/arch/arm64/kvm/cca_base.c b/arch/arm64/kvm/cca_base.c +new file mode 100644 +index 000000000000..19999d4a8a06 +--- /dev/null ++++ b/arch/arm64/kvm/cca_base.c +@@ -0,0 +1,123 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2025. Huawei Technologies Co., Ltd. All rights reserved. ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++static int cca_cvm_type; ++static struct cca_operations *g_cca_operations[CCA_CVM_MAX]; ++ ++/* please use 'cca_cvm_type=$type' to enable cca cvm feature */ ++static int __init setup_cca_cvm_type(char *str) ++{ ++ int ret; ++ unsigned int val; ++ ++ if (!str) ++ return 0; ++ ++ ret = kstrtouint(str, 10, &val); ++ if (ret) { ++ pr_warn("Unable to parse cca cvm_type.\n"); ++ } else { ++ if (val >= ARMCCA_CVM && val < CCA_CVM_MAX) ++ cca_cvm_type = val; ++ } ++ return ret; ++} ++early_param("cca_cvm_type", setup_cca_cvm_type); ++ ++int __init cca_operations_register(enum cca_cvm_type type, struct cca_operations *ops) ++{ ++ if (type >= CCA_CVM_MAX) ++ return -EINVAL; ++ ++ g_cca_operations[type] = ops; ++ return 0; ++} ++ ++int kvm_get_cvm_type(void) ++{ ++ return cca_cvm_type; ++} ++ ++void set_cca_cvm_type(int type) ++{ ++ cca_cvm_type = type; ++} ++EXPORT_SYMBOL_GPL(set_cca_cvm_type); ++ ++int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) ++{ ++ if (g_cca_operations[cca_cvm_type]->enable_cap) ++ return g_cca_operations[cca_cvm_type]->enable_cap(kvm, cap); ++ return 0; ++} ++ ++int kvm_init_realm_vm(struct kvm *kvm) ++{ ++ if (g_cca_operations[cca_cvm_type]->init_realm_vm) ++ return g_cca_operations[cca_cvm_type]->init_realm_vm(kvm); ++ return 0; ++} ++ ++int kvm_rec_enter(struct kvm_vcpu *vcpu) ++{ ++ if (g_cca_operations[cca_cvm_type]->realm_vm_enter) ++ return g_cca_operations[cca_cvm_type]->realm_vm_enter(vcpu); ++ return 0; ++} ++ ++int handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) ++{ ++ if (g_cca_operations[cca_cvm_type]->realm_vm_exit) ++ return g_cca_operations[cca_cvm_type]->realm_vm_exit(vcpu, rec_run_ret); ++ return 0; ++} ++ ++void kvm_destroy_realm(struct kvm *kvm) ++{ ++ if (g_cca_operations[cca_cvm_type]->destroy_vm) ++ g_cca_operations[cca_cvm_type]->destroy_vm(kvm); ++} ++ ++int kvm_create_rec(struct kvm_vcpu *vcpu) ++{ ++ if (g_cca_operations[cca_cvm_type]->create_vcpu) ++ return g_cca_operations[cca_cvm_type]->create_vcpu(vcpu); ++ return 0; ++} ++ ++void kvm_destroy_rec(struct kvm_vcpu *vcpu) ++{ ++ if (g_cca_operations[cca_cvm_type]->destroy_vcpu) ++ g_cca_operations[cca_cvm_type]->destroy_vcpu(vcpu); ++} ++ ++void kvm_init_rme(void) ++{ ++ if (g_cca_operations[cca_cvm_type]->init_sel2_hypervisor) ++ g_cca_operations[cca_cvm_type]->init_sel2_hypervisor(); ++} ++ ++int realm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsigned long status) ++{ ++ if (g_cca_operations[cca_cvm_type]->psci_complete) ++ return g_cca_operations[cca_cvm_type]->psci_complete(calling, target, status); ++ return 0; ++} ++ ++u32 kvm_realm_vgic_nr_lr(void) ++{ ++ if (g_cca_operations[cca_cvm_type]->vgic_nr_lr) ++ return g_cca_operations[cca_cvm_type]->vgic_nr_lr(); ++ return 0; ++} +diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c +index 105f4e00ec8b..f7871996e160 100644 +--- a/arch/arm64/kvm/guest.c ++++ b/arch/arm64/kvm/guest.c +@@ -112,6 +112,24 @@ static u64 core_reg_offset_from_id(u64 id) + return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); + } + ++static bool kvm_realm_validate_core_reg(u64 off) ++{ ++ /* ++ * Note that GPRs can only sometimes be controlled by the VMM. ++ * For PSCI only X0-X6 are used, higher registers are ignored (restored ++ * from the REC). ++ * For HOST_CALL all of X0-X30 are copied to the RsiHostCall structure. ++ * For emulated MMIO X0 is always used. ++ */ ++ switch (off) { ++ case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... ++ KVM_REG_ARM_CORE_REG(regs.regs[30]): ++ case KVM_REG_ARM_CORE_REG(regs.pc): ++ return true; ++ } ++ return false; ++} ++ + static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off) + { + int size; +@@ -381,7 +399,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) + if (!vcpu_has_sve(vcpu)) + return -ENOENT; + +- if (kvm_arm_vcpu_sve_finalized(vcpu)) ++ if (kvm_arm_vcpu_sve_finalized(vcpu) || kvm_realm_is_created(vcpu->kvm)) + return -EPERM; /* too late! */ + + if (WARN_ON(vcpu->arch.sve_state)) +@@ -395,7 +413,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) + if (vq_present(vqs, vq)) + max_vq = vq; + +- if (max_vq > sve_vq_from_vl(kvm_sve_max_vl)) ++ if (max_vq > sve_vq_from_vl(kvm_sve_get_max_vl(vcpu->kvm))) + return -EINVAL; + + /* +@@ -639,8 +657,6 @@ static const u64 timer_reg_list[] = { + KVM_REG_ARM_PTIMER_CVAL, + }; + +-#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) +- + static bool is_timer_reg(u64 index) + { + switch (index) { +@@ -655,9 +671,14 @@ static bool is_timer_reg(u64 index) + return false; + } + ++static unsigned long num_timer_regs(struct kvm_vcpu *vcpu) ++{ ++ return kvm_is_realm(vcpu->kvm) ? 0 : ARRAY_SIZE(timer_reg_list); ++} ++ + static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) + { +- for (int i = 0; i < NUM_TIMER_REGS; i++) { ++ for (int i = 0; i < num_timer_regs(vcpu); i++) { + if (put_user(timer_reg_list[i], uindices)) + return -EFAULT; + uindices++; +@@ -692,11 +713,11 @@ static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) + { + const unsigned int slices = vcpu_sve_slices(vcpu); + +- if (!vcpu_has_sve(vcpu)) ++ if (!vcpu_has_sve(vcpu) || !kvm_arm_vcpu_sve_finalized(vcpu)) + return 0; + +- /* Policed by KVM_GET_REG_LIST: */ +- WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu)); ++ if (kvm_is_realm(vcpu->kvm)) ++ return 1; /* KVM_REG_ARM64_SVE_VLS */ + + return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */) + + 1; /* KVM_REG_ARM64_SVE_VLS */ +@@ -713,8 +734,8 @@ static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu, + if (!vcpu_has_sve(vcpu)) + return 0; + +- /* Policed by KVM_GET_REG_LIST: */ +- WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu)); ++ if (!kvm_arm_vcpu_sve_finalized(vcpu)) ++ return -EPERM; + + /* + * Enumerate this first, so that userspace can save/restore in +@@ -725,6 +746,9 @@ static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu, + return -EFAULT; + ++num_regs; + ++ if (kvm_is_realm(vcpu->kvm)) ++ return num_regs; ++ + for (i = 0; i < slices; i++) { + for (n = 0; n < SVE_NUM_ZREGS; n++) { + reg = KVM_REG_ARM64_SVE_ZREG(n, i); +@@ -762,7 +786,7 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) + res += num_sve_regs(vcpu); + res += kvm_arm_num_sys_reg_descs(vcpu); + res += kvm_arm_get_fw_num_regs(vcpu); +- res += NUM_TIMER_REGS; ++ res += num_timer_regs(vcpu); + + return res; + } +@@ -794,7 +818,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) + ret = copy_timer_indices(vcpu, uindices); + if (ret < 0) + return ret; +- uindices += NUM_TIMER_REGS; ++ uindices += num_timer_regs(vcpu); + + return kvm_arm_copy_sys_reg_indices(vcpu, uindices); + } +@@ -819,12 +843,44 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) + return kvm_arm_sys_reg_get_reg(vcpu, reg); + } + ++#define KVM_REG_ARM_PMCR_EL0 ARM64_SYS_REG(3, 3, 9, 12, 0) ++#define KVM_REG_ARM_ID_AA64DFR0_EL1 ARM64_SYS_REG(3, 0, 0, 5, 0) ++ ++/* ++ * The RMI ABI only enables setting some GPRs and PC. The selection of GPRs ++ * that are available depends on the Realm state and the reason for the last ++ * exit. All other registers are reset to architectural or otherwise defined ++ * reset values by the RMM, except for a few configuration fields that ++ * correspond to Realm parameters. ++ */ ++static bool validate_realm_set_reg(struct kvm_vcpu *vcpu, ++ const struct kvm_one_reg *reg) ++{ ++ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) { ++ u64 off = core_reg_offset_from_id(reg->id); ++ ++ return kvm_realm_validate_core_reg(off); ++ } else { ++ switch (reg->id) { ++ case KVM_REG_ARM_PMCR_EL0: ++ case KVM_REG_ARM_ID_AA64DFR0_EL1: ++ case KVM_REG_ARM64_SVE_VLS: ++ return true; ++ } ++ } ++ ++ return false; ++} ++ + int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) + { + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) + return -EINVAL; + ++ if (_kvm_is_realm(vcpu->kvm) && !validate_realm_set_reg(vcpu, reg)) ++ return -EINVAL; ++ + switch (reg->id & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg); + case KVM_REG_ARM_FW: +@@ -876,10 +932,34 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + bool has_esr = events->exception.serror_has_esr; + bool ext_dabt_pending = events->exception.ext_dabt_pending; + ++ if (_vcpu_is_rec(vcpu)) { ++ /* Cannot inject SError into a Realm. */ ++ if (serror_pending) ++ return -EINVAL; ++ ++ /* ++ * If a data abort is pending, set the flag and let the RMM ++ * inject an SEA when the REC is scheduled to be run. ++ */ ++ if (ext_dabt_pending) { ++ /* ++ * Can only inject SEA into a Realm if the previous exit ++ * was due to a data abort of an Unprotected IPA. ++ */ ++ if (!(vcpu->arch.rec->run->enter.flags & REC_ENTER_FLAG_EMULATED_MMIO)) ++ return -EINVAL; ++ ++ vcpu->arch.rec->run->enter.flags &= ~REC_ENTER_FLAG_EMULATED_MMIO; ++ vcpu->arch.rec->run->enter.flags |= REC_ENTER_FLAG_INJECT_SEA; ++ } ++ ++ return 0; ++ } + #ifdef CONFIG_HISI_VIRTCCA_HOST + if (vcpu_is_tec(vcpu)) + return kvm_cvm_vcpu_set_events(vcpu, serror_pending, ext_dabt_pending); + #endif ++ + if (serror_pending && has_esr) { + if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) + return -EINVAL; +diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c +index f02c7b53c922..02ea4d10a128 100644 +--- a/arch/arm64/kvm/hypercalls.c ++++ b/arch/arm64/kvm/hypercalls.c +@@ -444,14 +444,14 @@ void kvm_arm_teardown_hypercalls(struct kvm *kvm) + + int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) + { +- return ARRAY_SIZE(kvm_arm_fw_reg_ids); ++ return kvm_is_realm(vcpu->kvm) ? 0 : ARRAY_SIZE(kvm_arm_fw_reg_ids); + } + + int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) + { + int i; + +- for (i = 0; i < ARRAY_SIZE(kvm_arm_fw_reg_ids); i++) { ++ for (i = 0; i < kvm_arm_get_fw_num_regs(vcpu); i++) { + if (put_user(kvm_arm_fw_reg_ids[i], uindices++)) + return -EFAULT; + } +diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c +index 0bd93a5f21ce..7b69484d8580 100644 +--- a/arch/arm64/kvm/inject_fault.c ++++ b/arch/arm64/kvm/inject_fault.c +@@ -165,7 +165,9 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) + */ + void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) + { +- if (vcpu_el1_is_32bit(vcpu)) ++ if (unlikely(_vcpu_is_rec(vcpu))) ++ vcpu->arch.rec->run->enter.flags |= REC_ENTER_FLAG_INJECT_SEA; ++ else if (vcpu_el1_is_32bit(vcpu)) + inject_abt32(vcpu, false, addr); + else + inject_abt64(vcpu, false, addr); +@@ -224,6 +226,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu) + */ + void kvm_inject_undefined(struct kvm_vcpu *vcpu) + { ++ WARN(vcpu_is_rec(vcpu), "Unexpected undefined exception injection to REC"); + if (vcpu_el1_is_32bit(vcpu)) + inject_undef32(vcpu); + else +diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c +index 9abea8e35240..7d95412261f2 100644 +--- a/arch/arm64/kvm/mmio.c ++++ b/arch/arm64/kvm/mmio.c +@@ -6,6 +6,7 @@ + + #include + #include ++#include + #include + #include + +@@ -137,20 +138,29 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, + &data); + data = vcpu_data_host_to_guest(vcpu, data, len); +- vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); ++ ++ if (_vcpu_is_rec(vcpu)) ++ vcpu->arch.rec->run->enter.gprs[0] = data; + #ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) { +- ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)-> +- tec_entry.gprs[0] = data; +- } ++ else if (vcpu_is_tec(vcpu)) ++ vcpu->arch.tec.run->enter.gprs[0] = data; + #endif ++ else ++ vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); + } + + /* + * The MMIO instruction is emulated and should not be re-executed + * in the guest. + */ +- kvm_incr_pc(vcpu); ++ if (_vcpu_is_rec(vcpu)) ++ vcpu->arch.rec->run->enter.flags |= REC_ENTER_FLAG_EMULATED_MMIO; ++#ifdef CONFIG_HISI_VIRTCCA_HOST ++ else if (vcpu_is_tec(vcpu)) ++ vcpu->arch.tec.run->enter.flags |= REC_ENTER_FLAG_EMULATED_MMIO; ++#endif ++ else ++ kvm_incr_pc(vcpu); + + return 1; + } +@@ -170,6 +180,11 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) + * volunteered to do so, and bail out otherwise. + */ + if (!kvm_vcpu_dabt_isvalid(vcpu)) { ++ if (vcpu_is_rec(vcpu)) { ++ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); ++ return 1; ++ } ++ + if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, + &vcpu->kvm->arch.flags)) { + run->exit_reason = KVM_EXIT_ARM_NISV; +@@ -213,12 +228,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) + run->mmio.phys_addr = fault_ipa; + run->mmio.len = len; + vcpu->mmio_needed = 1; +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) { +- ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)->tec_entry.flags |= +- TEC_ENTRY_FLAG_EMUL_MMIO; +- } +-#endif ++ + if (!ret) { + /* We handled the access successfully in the kernel. */ + if (!is_write) +diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c +index 3830aa0b07a0..dd44bc9dd513 100644 +--- a/arch/arm64/kvm/mmu.c ++++ b/arch/arm64/kvm/mmu.c +@@ -312,6 +312,7 @@ static void invalidate_icache_guest_page(void *va, size_t size) + * @start: The intermediate physical base address of the range to unmap + * @size: The size of the area to unmap + * @may_block: Whether or not we are permitted to block ++ * @only_shared: If true then protected mappings should not be unmapped + * + * Clear a range of stage-2 mappings, lowering the various ref-counts. Must + * be called while holding mmu_lock (unless for freeing the stage2 pgd before +@@ -319,20 +320,26 @@ static void invalidate_icache_guest_page(void *va, size_t size) + * with things behind our backs. + */ + static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size, +- bool may_block) ++ bool may_block, bool only_shared) + { + struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); + phys_addr_t end = start + size; + + lockdep_assert_held_write(&kvm->mmu_lock); + WARN_ON(size & ~PAGE_MASK); +- WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap, +- may_block)); ++ ++ if (_kvm_is_realm(kvm)) ++ kvm_realm_unmap_range(kvm, start, size, !only_shared); ++ else ++ WARN_ON(stage2_apply_range(mmu, start, end, ++ kvm_pgtable_stage2_unmap, ++ may_block)); + } + +-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) ++static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size, ++ bool may_block) + { +- __unmap_stage2_range(mmu, start, size, true); ++ __unmap_stage2_range(mmu, start, size, may_block, false); + } + + static void stage2_flush_memslot(struct kvm *kvm, +@@ -341,7 +348,11 @@ static void stage2_flush_memslot(struct kvm *kvm, + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = addr + PAGE_SIZE * memslot->npages; + +- stage2_apply_range_resched(&kvm->arch.mmu, addr, end, kvm_pgtable_stage2_flush); ++ if (kvm_is_realm(kvm)) ++ kvm_realm_unmap_range(kvm, addr, end - addr, false); ++ else ++ stage2_apply_range_resched(&kvm->arch.mmu, addr, end, ++ kvm_pgtable_stage2_flush); + } + + /** +@@ -855,30 +866,15 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { + .icache_inval_pou = invalidate_icache_guest_page, + }; + +-/** +- * kvm_init_stage2_mmu - Initialise a S2 MMU structure +- * @kvm: The pointer to the KVM structure +- * @mmu: The pointer to the s2 MMU structure +- * @type: The machine type of the virtual machine +- * +- * Allocates only the stage-2 HW PGD level table(s). +- * Note we don't need locking here as this is only called when the VM is +- * created, which can only be done once. +- */ +-int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type) ++static int kvm_init_ipa_range(struct kvm *kvm, ++ struct kvm_s2_mmu *mmu, unsigned long type) + { + u32 kvm_ipa_limit = get_kvm_ipa_limit(); +- int cpu, err; +- struct kvm_pgtable *pgt; + u64 mmfr0, mmfr1; + u32 phys_shift; + +-#ifdef CONFIG_HISI_VIRTCCA_CODA +- if ((type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) && (!kvm_is_virtcca_cvm(kvm))) +-#else +- if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) +-#endif +- return -EINVAL; ++ if (_kvm_is_realm(kvm)) ++ kvm_ipa_limit = kvm_realm_ipa_limit(); + + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); + if (is_protected_kvm_enabled()) { +@@ -900,11 +896,33 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); + ++ return 0; ++} ++ ++/** ++ * kvm_init_stage2_mmu - Initialise a S2 MMU structure ++ * @kvm: The pointer to the KVM structure ++ * @mmu: The pointer to the s2 MMU structure ++ * @type: The machine type of the virtual machine ++ * ++ * Allocates only the stage-2 HW PGD level table(s). ++ * Note we don't need locking here as this is only called when the VM is ++ * created, which can only be done once. ++ */ ++int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type) ++{ ++ int cpu, err; ++ struct kvm_pgtable *pgt; ++ + if (mmu->pgt != NULL) { + kvm_err("kvm_arch already initialized?\n"); + return -EINVAL; + } + ++ err = kvm_init_ipa_range(kvm, mmu, type); ++ if (err) ++ return err; ++ + pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT); + if (!pgt) + return -ENOMEM; +@@ -980,7 +998,8 @@ static void stage2_unmap_memslot(struct kvm *kvm, + + if (!(vma->vm_flags & VM_PFNMAP)) { + gpa_t gpa = addr + (vm_start - memslot->userspace_addr); +- unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start); ++ unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start, ++ true); + } + hva = vm_end; + } while (hva < reg_end); +@@ -999,6 +1018,10 @@ void stage2_unmap_vm(struct kvm *kvm) + struct kvm_memory_slot *memslot; + int idx, bkt; + ++ /* For realms this is handled by the RMM so nothing to do here */ ++ if (kvm_is_realm(kvm)) ++ return; ++ + idx = srcu_read_lock(&kvm->srcu); + mmap_read_lock(current->mm); + write_lock(&kvm->mmu_lock); +@@ -1015,10 +1038,25 @@ void stage2_unmap_vm(struct kvm *kvm) + void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) + { + struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); +- struct kvm_pgtable *pgt = NULL; ++ struct kvm_pgtable *pgt; + + write_lock(&kvm->mmu_lock); + pgt = mmu->pgt; ++ if (_kvm_is_realm(kvm) && ++ (kvm_realm_state(kvm) != REALM_STATE_DEAD && ++ kvm_realm_state(kvm) != REALM_STATE_NONE)) { ++ unmap_stage2_range(mmu, 0, (~0ULL) & PAGE_MASK, false); ++ write_unlock(&kvm->mmu_lock); ++ kvm_realm_destroy_rtts(kvm, pgt->ia_bits); ++ ++ /* ++ * The physical PGD pages are delegated to the RMM, so cannot ++ * be freed at this point. This function will be called again ++ * from kvm_destroy_realm() after the physical pages have been ++ * returned at which point the memory can be freed. ++ */ ++ return; ++ } + if (pgt) { + mmu->pgd_phys = 0; + mmu->pgt = NULL; +@@ -1081,6 +1119,10 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, + if (is_protected_kvm_enabled()) + return -EPERM; + ++ /* We don't support mapping special pages into a Realm */ ++ if (kvm_is_realm(kvm)) ++ return -EPERM; ++ + size += offset_in_page(guest_ipa); + guest_ipa &= PAGE_MASK; + +@@ -1397,6 +1439,25 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) + return vma->vm_flags & VM_MTE_ALLOWED; + } + ++static int realm_map_ipa(struct kvm *kvm, phys_addr_t ipa, ++ kvm_pfn_t pfn, unsigned long map_size, ++ enum kvm_pgtable_prot prot, ++ struct kvm_mmu_memory_cache *memcache) ++{ ++ struct realm *realm = &kvm->arch.realm; ++ ++ if (WARN_ON(!(prot & KVM_PGTABLE_PROT_W))) ++ return -EFAULT; ++ ++ ipa = ALIGN_DOWN(ipa, PAGE_SIZE); ++ ++ if (!kvm_realm_is_private_address(realm, ipa)) ++ return realm_map_non_secure(realm, ipa, pfn, map_size, ++ memcache); ++ ++ return realm_map_protected(realm, ipa, pfn, map_size, memcache); ++} ++ + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_memory_slot *memslot, unsigned long hva, + unsigned long fault_status) +@@ -1420,12 +1481,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + + fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level); + write_fault = kvm_is_write_fault(vcpu); +-#ifdef CONFIG_HISI_VIRTCCA_CODA +- if (vcpu_is_tec(vcpu)) { ++ ++ /* ++ * Realms cannot map protected pages read-only ++ * FIXME: It should be possible to map unprotected pages read-only ++ */ ++ if (vcpu_is_rec(vcpu)) + write_fault = true; ++ if (vcpu_is_tec(vcpu)) + prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; +- } +-#endif ++ + exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); + VM_BUG_ON(write_fault && exec_fault); + vcpu->stat.mabt_exit_stat++; +@@ -1468,6 +1533,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + if (logging_active) { + force_pte = true; + vma_shift = PAGE_SHIFT; ++ } else if (_vcpu_is_rec(vcpu)) { ++ // Force PTE level mappings for realms ++ force_pte = true; ++ vma_shift = PAGE_SHIFT; + } else { + vma_shift = get_vma_page_shift(vma, hva); + } +@@ -1500,7 +1569,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) + fault_ipa &= ~(vma_pagesize - 1); + +- gfn = fault_ipa >> PAGE_SHIFT; ++ gfn = kvm_gpa_from_fault(kvm, fault_ipa) >> PAGE_SHIFT; + mte_allowed = kvm_vma_mte_allowed(vma); + + /* Don't use the VMA after the unlock -- it may have vanished */ +@@ -1549,6 +1618,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + if (exec_fault && device) + return -ENOEXEC; + ++ /* ++ * Adapted from cca-v8 ++ * Since OLK-6.6 does not implement the private_memslot_fault() ++ * that depends on the higher version feature guest memfd. ++ * Here we should handle protected addresses fault expect protected devices. ++ */ ++ if (device && _vcpu_is_rec(vcpu) && ++ kvm_gpa_from_fault(kvm, fault_ipa) == fault_ipa) ++ return -EINVAL; ++ + read_lock(&kvm->mmu_lock); + pgt = vcpu->arch.hw_mmu->pgt; + if (mmu_invalidate_retry(kvm, mmu_seq)) +@@ -1606,6 +1685,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + */ + if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule) + ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); ++ else if (_kvm_is_realm(kvm)) ++ ret = realm_map_ipa(kvm, fault_ipa, pfn, vma_pagesize, ++ prot, memcache); + else + ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, + __pfn_to_phys(pfn), prot, +@@ -1729,7 +1811,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) + + idx = srcu_read_lock(&vcpu->kvm->srcu); + +- gfn = fault_ipa >> PAGE_SHIFT; ++ gfn = kvm_gpa_from_fault(vcpu->kvm, fault_ipa) >> PAGE_SHIFT; + memslot = gfn_to_memslot(vcpu->kvm, gfn); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); + write_fault = kvm_is_write_fault(vcpu); +@@ -1774,7 +1856,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) + * of the page size. + */ + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); +- ret = io_mem_abort(vcpu, fault_ipa); ++ ret = io_mem_abort(vcpu, kvm_gpa_from_fault(vcpu->kvm, fault_ipa)); + goto out_unlock; + } + +@@ -1813,7 +1895,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) + + __unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT, + (range->end - range->start) << PAGE_SHIFT, +- range->may_block); ++ range->may_block, ++ !(range->attr_filter & KVM_FILTER_PRIVATE)); + + return false; + } +@@ -1859,6 +1942,10 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) + if (!kvm->arch.mmu.pgt) + return false; + ++ /* We don't support aging for Realms */ ++ if (kvm_is_realm(kvm)) ++ return true; ++ + return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, + range->start << PAGE_SHIFT, + size, true); +@@ -1871,6 +1958,10 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) + if (!kvm->arch.mmu.pgt) + return false; + ++ /* We don't support aging for Realms */ ++ if (kvm_is_realm(kvm)) ++ return true; ++ + return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, + range->start << PAGE_SHIFT, + size, false); +@@ -2123,7 +2214,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + phys_addr_t size = slot->npages << PAGE_SHIFT; + + write_lock(&kvm->mmu_lock); +- unmap_stage2_range(&kvm->arch.mmu, gpa, size); ++ unmap_stage2_range(&kvm->arch.mmu, gpa, size, true); + write_unlock(&kvm->mmu_lock); + } + +diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c +index 6469a7c51df3..ac7ffa5f007b 100644 +--- a/arch/arm64/kvm/pmu-emul.c ++++ b/arch/arm64/kvm/pmu-emul.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + + #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) +@@ -341,14 +342,13 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) + { + u64 reg = 0; + ++ if (_vcpu_is_rec(vcpu)) ++ return vcpu->arch.rec->run->exit.pmu_ovf_status; + #ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) { +- struct tmi_tec_run *run = vcpu->arch.tec.tec_run; +- +- reg = run->tec_exit.pmu_ovf_status; +- return reg; +- } ++ if (vcpu_is_tec(vcpu)) ++ return vcpu->arch.tec.run->exit.pmu_ovf_status; + #endif ++ + if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { + reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); +@@ -899,6 +899,55 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) + return true; + } + ++static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu) ++{ ++ lockdep_assert_held(&kvm->arch.config_lock); ++ ++ kvm->arch.arm_pmu = arm_pmu; ++ kvm->arch.pmcr_n = kvm_arm_pmu_get_max_counters(kvm); ++} ++ ++/** ++ * kvm_arm_set_default_pmu - No PMU set, get the default one. ++ * @kvm: The kvm pointer ++ * ++ * The observant among you will notice that the supported_cpus ++ * mask does not get updated for the default PMU even though it ++ * is quite possible the selected instance supports only a ++ * subset of cores in the system. This is intentional, and ++ * upholds the preexisting behavior on heterogeneous systems ++ * where vCPUs can be scheduled on any core but the guest ++ * counters could stop working. ++ */ ++static int kvm_arm_set_default_pmu(struct kvm *kvm) ++{ ++ struct arm_pmu *arm_pmu = kvm_pmu_probe_armpmu(); ++ ++ if (!arm_pmu) ++ return -ENODEV; ++ ++ kvm_arm_set_pmu(kvm, arm_pmu); ++ return 0; ++} ++ ++/** ++ * kvm_arm_pmu_get_max_counters - Return the max number of PMU counters. ++ * @kvm: The kvm pointer ++ */ ++u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm) ++{ ++ struct arm_pmu *arm_pmu = kvm->arch.arm_pmu; ++ ++ if (kvm_is_realm(kvm)) ++ return kvm_realm_max_pmu_counters(); ++ ++ /* ++ * The arm_pmu->num_events considers the cycle counter as well. ++ * Ignore that and return only the general-purpose counters. ++ */ ++ return arm_pmu->num_events - 1; ++} ++ + static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) + { + struct kvm *kvm = vcpu->kvm; +@@ -918,7 +967,7 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) + break; + } + +- kvm->arch.arm_pmu = arm_pmu; ++ kvm_arm_set_pmu(kvm, arm_pmu); + cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); + ret = 0; + break; +@@ -942,20 +991,10 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) + return -EBUSY; + + if (!kvm->arch.arm_pmu) { +- /* +- * No PMU set, get the default one. +- * +- * The observant among you will notice that the supported_cpus +- * mask does not get updated for the default PMU even though it +- * is quite possible the selected instance supports only a +- * subset of cores in the system. This is intentional, and +- * upholds the preexisting behavior on heterogeneous systems +- * where vCPUs can be scheduled on any core but the guest +- * counters could stop working. +- */ +- kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); +- if (!kvm->arch.arm_pmu) +- return -ENODEV; ++ int ret = kvm_arm_set_default_pmu(kvm); ++ ++ if (ret) ++ return ret; + } + + switch (attr->attr) { +@@ -1104,5 +1143,7 @@ u8 kvm_arm_pmu_get_pmuver_limit(void) + */ + u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu) + { +- return __vcpu_sys_reg(vcpu, PMCR_EL0); ++ u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0); ++ ++ return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N); + } +diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c +index b544418b68ed..750b3899d462 100644 +--- a/arch/arm64/kvm/psci.c ++++ b/arch/arm64/kvm/psci.c +@@ -12,7 +12,6 @@ + + #include + #include +-#include + + #include + #include +@@ -80,10 +79,6 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) + return PSCI_RET_INVALID_PARAMS; + + spin_lock(&vcpu->arch.mp_state_lock); +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) +- cvm_psci_complete(source_vcpu, vcpu); +-#endif + if (!kvm_arm_vcpu_stopped(vcpu)) { + if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) + ret = PSCI_RET_ALREADY_ON; +@@ -108,6 +103,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) + + reset_state->reset = true; + kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); ++ /* ++ * Make sure we issue PSCI_COMPLETE before the VCPU can be ++ * scheduled. ++ */ ++ if (vcpu_is_rec(vcpu)) ++ realm_psci_complete(source_vcpu, vcpu, PSCI_RET_SUCCESS); + + /* + * Make sure the reset request is observed if the RUNNABLE mp_state is +@@ -120,6 +121,11 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) + + out_unlock: + spin_unlock(&vcpu->arch.mp_state_lock); ++ if (vcpu_is_rec(vcpu) && ret != PSCI_RET_SUCCESS) { ++ realm_psci_complete(source_vcpu, vcpu, ++ ret == PSCI_RET_ALREADY_ON ? ++ PSCI_RET_SUCCESS : PSCI_RET_DENIED); ++ } + return ret; + } + +@@ -146,10 +152,26 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) + + /* Ignore other bits of target affinity */ + target_affinity &= target_affinity_mask; +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) +- return cvm_psci_vcpu_affinity_info(vcpu, target_affinity, lowest_affinity_level); +-#endif ++ ++ if (vcpu_is_rec(vcpu)) { ++ struct kvm_vcpu *target_vcpu; ++ ++ /* RMM supports only zero affinity level */ ++ if (lowest_affinity_level != 0) ++ return PSCI_RET_INVALID_PARAMS; ++ ++ target_vcpu = kvm_mpidr_to_vcpu(kvm, target_affinity); ++ if (!target_vcpu) ++ return PSCI_RET_INVALID_PARAMS; ++ ++ /* ++ * Provide the references of the source and target RECs to the ++ * RMM so that the RMM can complete the PSCI request. ++ */ ++ realm_psci_complete(vcpu, target_vcpu, PSCI_RET_SUCCESS); ++ return PSCI_RET_SUCCESS; ++ } ++ + /* + * If one or more VCPU matching target affinity are running + * then ON else OFF +diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c +index 0de1094d4e19..ac6c4f5bf52a 100644 +--- a/arch/arm64/kvm/reset.c ++++ b/arch/arm64/kvm/reset.c +@@ -29,7 +29,6 @@ + #include + #include + #include +-#include + + /* Maximum phys_shift supported for any VM on this host */ + static u32 __ro_after_init kvm_ipa_limit; +@@ -46,7 +45,7 @@ static u32 __ro_after_init kvm_ipa_limit; + #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ + PSR_AA32_I_BIT | PSR_AA32_F_BIT) + +-unsigned int __ro_after_init kvm_sve_max_vl; ++static unsigned int __ro_after_init kvm_sve_max_vl; + + int __init kvm_arm_init_sve(void) + { +@@ -74,12 +73,20 @@ int __init kvm_arm_init_sve(void) + return 0; + } + ++unsigned int kvm_sve_get_max_vl(struct kvm *kvm) ++{ ++ if (kvm_is_realm(kvm)) ++ return kvm_realm_sve_max_vl(); ++ else ++ return kvm_sve_max_vl; ++} ++ + static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) + { + if (!system_supports_sve()) + return -EINVAL; + +- vcpu->arch.sve_max_vl = kvm_sve_max_vl; ++ vcpu->arch.sve_max_vl = kvm_sve_get_max_vl(vcpu->kvm); + + /* + * Userspace can still customize the vector lengths by writing +@@ -140,12 +147,12 @@ int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature) + return -EPERM; + + return kvm_vcpu_finalize_sve(vcpu); +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- case KVM_ARM_VCPU_TEC: +- if (!kvm_is_virtcca_cvm(vcpu->kvm)) ++ case KVM_ARM_VCPU_REC: ++ if (!kvm_is_realm(vcpu->kvm) || ++ (kvm_get_cvm_type() == ARMCCA_CVM && !vcpu_is_rec(vcpu))) + return -EINVAL; +- return kvm_finalize_vcpu_tec(vcpu); +-#endif ++ ++ return kvm_create_rec(vcpu); + } + + return -EINVAL; +@@ -156,6 +163,11 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) + if (vcpu_has_sve(vcpu) && !kvm_arm_vcpu_sve_finalized(vcpu)) + return false; + ++ if (_kvm_is_realm(vcpu->kvm) && ++ !(vcpu_is_rec(vcpu) && kvm_arm_rec_finalized(vcpu) && ++ READ_ONCE(vcpu->kvm->arch.realm.state) == REALM_STATE_ACTIVE)) ++ return false; ++ + return true; + } + +@@ -170,10 +182,8 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) + kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); + kfree(sve_state); + kfree(vcpu->arch.ccsidr); +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) +- kvm_destroy_tec(vcpu); +-#endif ++ kvm_destroy_rec(vcpu); ++ kfree(vcpu->arch.rec); + + #ifdef CONFIG_ARM64_HDBSS + if (vcpu->arch.hdbss.br_el2) { +diff --git a/arch/arm64/kvm/rme-exit.c b/arch/arm64/kvm/rme-exit.c +new file mode 100644 +index 000000000000..83ca2c271f1a +--- /dev/null ++++ b/arch/arm64/kvm/rme-exit.c +@@ -0,0 +1,191 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2023 ARM Ltd. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++typedef int (*exit_handler_fn)(struct kvm_vcpu *vcpu); ++ ++static int rec_exit_reason_notimpl(struct kvm_vcpu *vcpu) ++{ ++ struct realm_rec *rec = vcpu->arch.rec; ++ ++ vcpu_err(vcpu, "Unhandled exit reason from realm (ESR: %#llx)\n", ++ rec->run->exit.esr); ++ return -ENXIO; ++} ++ ++static int rec_exit_sync_dabt(struct kvm_vcpu *vcpu) ++{ ++ struct realm_rec *rec = vcpu->arch.rec; ++ ++ /* ++ * In the case of a write, copy over gprs[0] to the target GPR, ++ * preparing to handle MMIO write fault. The content to be written has ++ * been saved to gprs[0] by the RMM (even if another register was used ++ * by the guest). In the case of normal memory access this is redundant ++ * (the guest will replay the instruction), but the overhead is ++ * minimal. ++ */ ++ if (kvm_vcpu_dabt_iswrite(vcpu) && kvm_vcpu_dabt_isvalid(vcpu)) ++ vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), ++ rec->run->exit.gprs[0]); ++ ++ return kvm_handle_guest_abort(vcpu); ++} ++ ++static int rec_exit_sync_iabt(struct kvm_vcpu *vcpu) ++{ ++ struct realm_rec *rec = vcpu->arch.rec; ++ ++ vcpu_err(vcpu, "Unhandled instruction abort (ESR: %#llx).\n", ++ rec->run->exit.esr); ++ return -ENXIO; ++} ++ ++static int rec_exit_sys_reg(struct kvm_vcpu *vcpu) ++{ ++ struct realm_rec *rec = vcpu->arch.rec; ++ unsigned long esr = kvm_vcpu_get_esr(vcpu); ++ int rt = kvm_vcpu_sys_get_rt(vcpu); ++ bool is_write = !(esr & 1); ++ int ret; ++ ++ if (is_write) ++ vcpu_set_reg(vcpu, rt, rec->run->exit.gprs[0]); ++ ++ ret = kvm_handle_sys_reg(vcpu); ++ if (ret > 0 && !is_write) ++ rec->run->enter.gprs[0] = vcpu_get_reg(vcpu, rt); ++ ++ return ret; ++} ++ ++static exit_handler_fn rec_exit_handlers[] = { ++ [0 ... ESR_ELx_EC_MAX] = rec_exit_reason_notimpl, ++ [ESR_ELx_EC_SYS64] = rec_exit_sys_reg, ++ [ESR_ELx_EC_DABT_LOW] = rec_exit_sync_dabt, ++ [ESR_ELx_EC_IABT_LOW] = rec_exit_sync_iabt ++}; ++ ++static int rec_exit_psci(struct kvm_vcpu *vcpu) ++{ ++ struct realm_rec *rec = vcpu->arch.rec; ++ int i; ++ ++ for (i = 0; i < REC_RUN_GPRS; i++) ++ vcpu_set_reg(vcpu, i, rec->run->exit.gprs[i]); ++ ++ return kvm_smccc_call_handler(vcpu); ++} ++ ++static int rec_exit_ripas_change(struct kvm_vcpu *vcpu) ++{ ++ struct kvm *kvm = vcpu->kvm; ++ struct realm *realm = &kvm->arch.realm; ++ struct realm_rec *rec = vcpu->arch.rec; ++ unsigned long base = rec->run->exit.ripas_base; ++ unsigned long top = rec->run->exit.ripas_top; ++ unsigned long ripas = rec->run->exit.ripas_value; ++ ++ if (!kvm_realm_is_private_address(realm, base) || ++ !kvm_realm_is_private_address(realm, top - 1)) { ++ vcpu_err(vcpu, "Invalid RIPAS_CHANGE for %#lx - %#lx, ripas: %#lx\n", ++ base, top, ripas); ++ return -EINVAL; ++ } ++ ++ return 1; ++} ++ ++static int rec_exit_host_call(struct kvm_vcpu *vcpu) ++{ ++ int i; ++ struct realm_rec *rec = vcpu->arch.rec; ++ ++ vcpu->stat.hvc_exit_stat++; ++ ++ for (i = 0; i < REC_RUN_GPRS; i++) ++ vcpu_set_reg(vcpu, i, rec->run->exit.gprs[i]); ++ ++ return kvm_smccc_call_handler(vcpu); ++} ++ ++static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu) ++{ ++ struct realm_rec *rec = vcpu->arch.rec; ++ ++ __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = rec->run->exit.cntv_ctl; ++ __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = rec->run->exit.cntv_cval; ++ __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = rec->run->exit.cntp_ctl; ++ __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = rec->run->exit.cntp_cval; ++ ++ kvm_realm_timers_update(vcpu); ++} ++ ++/* ++ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on ++ * proper exit to userspace. ++ */ ++int _handle_rec_exit(struct kvm_vcpu *vcpu, int rec_run_ret) ++{ ++ struct realm_rec *rec = vcpu->arch.rec; ++ u8 esr_ec = ESR_ELx_EC(rec->run->exit.esr); ++ unsigned long status, index; ++ ++ status = RMI_RETURN_STATUS(rec_run_ret); ++ index = RMI_RETURN_INDEX(rec_run_ret); ++ ++ /* ++ * If a PSCI_SYSTEM_OFF request raced with a vcpu executing, we might ++ * see the following status code and index indicating an attempt to run ++ * a REC when the RD state is SYSTEM_OFF. In this case, we just need to ++ * return to user space which can deal with the system event or will try ++ * to run the KVM VCPU again, at which point we will no longer attempt ++ * to enter the Realm because we will have a sleep request pending on ++ * the VCPU as a result of KVM's PSCI handling. ++ */ ++ if (status == RMI_ERROR_REALM && index == 1) { ++ vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; ++ return 0; ++ } ++ ++ if (rec_run_ret) ++ return -ENXIO; ++ ++ vcpu->arch.fault.esr_el2 = rec->run->exit.esr; ++ vcpu->arch.fault.far_el2 = rec->run->exit.far; ++ vcpu->arch.fault.hpfar_el2 = rec->run->exit.hpfar; ++ ++ update_arch_timer_irq_lines(vcpu); ++ ++ /* Reset the emulation flags for the next run of the REC */ ++ rec->run->enter.flags = 0; ++ ++ switch (rec->run->exit.exit_reason) { ++ case RMI_EXIT_SYNC: ++ return rec_exit_handlers[esr_ec](vcpu); ++ case RMI_EXIT_IRQ: ++ case RMI_EXIT_FIQ: ++ return 1; ++ case RMI_EXIT_PSCI: ++ return rec_exit_psci(vcpu); ++ case RMI_EXIT_RIPAS_CHANGE: ++ return rec_exit_ripas_change(vcpu); ++ case RMI_EXIT_HOST_CALL: ++ return rec_exit_host_call(vcpu); ++ } ++ ++ kvm_pr_unimpl("Unsupported exit reason: %u\n", ++ rec->run->exit.exit_reason); ++ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ++ return 0; ++} +diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c +new file mode 100644 +index 000000000000..9095a1573ecb +--- /dev/null ++++ b/arch/arm64/kvm/rme.c +@@ -0,0 +1,1723 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2023 ARM Ltd. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++static unsigned long rmm_feat_reg0; ++ ++#define RMM_PAGE_SHIFT 12 ++#define RMM_PAGE_SIZE BIT(RMM_PAGE_SHIFT) ++ ++#define RMM_RTT_BLOCK_LEVEL 2 ++#define RMM_RTT_MAX_LEVEL 3 ++ ++/* See ARM64_HW_PGTABLE_LEVEL_SHIFT() */ ++#define RMM_RTT_LEVEL_SHIFT(l) \ ++ ((RMM_PAGE_SHIFT - 3) * (4 - (l)) + 3) ++#define RMM_L2_BLOCK_SIZE BIT(RMM_RTT_LEVEL_SHIFT(2)) ++ ++static inline unsigned long rme_rtt_level_mapsize(int level) ++{ ++ if (WARN_ON(level > RMM_RTT_MAX_LEVEL)) ++ return RMM_PAGE_SIZE; ++ ++ return (1UL << RMM_RTT_LEVEL_SHIFT(level)); ++} ++ ++static bool rme_has_feature(unsigned long feature) ++{ ++ return !!u64_get_bits(rmm_feat_reg0, feature); ++} ++ ++bool kvm_rme_supports_sve(void) ++{ ++ return rme_has_feature(RMI_FEATURE_REGISTER_0_SVE_EN); ++} ++ ++static int rmi_check_version(void) ++{ ++ struct arm_smccc_res res; ++ unsigned short version_major, version_minor; ++ unsigned long host_version = RMI_ABI_VERSION(RMI_ABI_MAJOR_VERSION, ++ RMI_ABI_MINOR_VERSION); ++ ++ arm_smccc_1_1_invoke(SMC_RMI_VERSION, host_version, &res); ++ ++ if (res.a0 == SMCCC_RET_NOT_SUPPORTED) ++ return -ENXIO; ++ ++ version_major = RMI_ABI_VERSION_GET_MAJOR(res.a1); ++ version_minor = RMI_ABI_VERSION_GET_MINOR(res.a1); ++ ++ if (res.a0 != RMI_SUCCESS) { ++ unsigned short high_version_major, high_version_minor; ++ ++ high_version_major = RMI_ABI_VERSION_GET_MAJOR(res.a2); ++ high_version_minor = RMI_ABI_VERSION_GET_MINOR(res.a2); ++ ++ kvm_err("Unsupported RMI ABI (v%d.%d - v%d.%d) we want v%d.%d\n", ++ version_major, version_minor, ++ high_version_major, high_version_minor, ++ RMI_ABI_MAJOR_VERSION, ++ RMI_ABI_MINOR_VERSION); ++ return -ENXIO; ++ } ++ ++ kvm_info("RMI ABI version %d.%d\n", version_major, version_minor); ++ ++ return 0; ++} ++ ++u32 kvm_realm_ipa_limit(void) ++{ ++ return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ); ++} ++ ++u32 _kvm_realm_vgic_nr_lr(void) ++{ ++ return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_GICV3_NUM_LRS); ++} ++ ++u8 kvm_realm_max_pmu_counters(void) ++{ ++ return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_PMU_NUM_CTRS); ++} ++ ++unsigned int kvm_realm_sve_max_vl(void) ++{ ++ return sve_vl_from_vq(u64_get_bits(rmm_feat_reg0, ++ RMI_FEATURE_REGISTER_0_SVE_VL) + 1); ++} ++ ++u64 kvm_realm_reset_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val) ++{ ++ u32 bps = u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_NUM_BPS); ++ u32 wps = u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_NUM_WPS); ++ u32 ctx_cmps; ++ ++ if (!kvm_is_realm(vcpu->kvm)) ++ return val; ++ ++ /* Ensure CTX_CMPs is still valid */ ++ ctx_cmps = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, val); ++ ctx_cmps = min(bps, ctx_cmps); ++ ++ val &= ~(ID_AA64DFR0_EL1_BRPs_MASK | ID_AA64DFR0_EL1_WRPs_MASK | ++ ID_AA64DFR0_EL1_CTX_CMPs); ++ val |= FIELD_PREP(ID_AA64DFR0_EL1_BRPs_MASK, bps) | ++ FIELD_PREP(ID_AA64DFR0_EL1_WRPs_MASK, wps) | ++ FIELD_PREP(ID_AA64DFR0_EL1_CTX_CMPs, ctx_cmps); ++ ++ return val; ++} ++ ++static int get_start_level(struct realm *realm) ++{ ++ return 4 - ((realm->ia_bits - 8) / (RMM_PAGE_SHIFT - 3)); ++} ++ ++static int find_map_level(struct realm *realm, ++ unsigned long start, ++ unsigned long end) ++{ ++ int level = RMM_RTT_MAX_LEVEL; ++ ++ while (level > get_start_level(realm)) { ++ unsigned long map_size = rme_rtt_level_mapsize(level - 1); ++ ++ if (!IS_ALIGNED(start, map_size) || ++ (start + map_size) > end) ++ break; ++ ++ level--; ++ } ++ ++ return level; ++} ++ ++static phys_addr_t alloc_delegated_granule(struct kvm_mmu_memory_cache *mc) ++{ ++ phys_addr_t phys; ++ void *virt; ++ ++ if (mc) ++ virt = kvm_mmu_memory_cache_alloc(mc); ++ else ++ virt = (void *)__get_free_page(GFP_KERNEL_ACCOUNT); ++ ++ if (!virt) ++ return PHYS_ADDR_MAX; ++ ++ phys = virt_to_phys(virt); ++ ++ if (rmi_granule_delegate(phys)) { ++ free_page((unsigned long)virt); ++ ++ return PHYS_ADDR_MAX; ++ } ++ ++ kvm_account_pgtable_pages(virt, 1); ++ ++ return phys; ++} ++ ++static void free_delegated_granule(phys_addr_t phys) ++{ ++ if (WARN_ON(rmi_granule_undelegate(phys))) { ++ /* Undelegate failed: leak the page */ ++ return; ++ } ++ ++ kvm_account_pgtable_pages(phys_to_virt(phys), -1); ++ ++ free_page((unsigned long)phys_to_virt(phys)); ++} ++ ++int _realm_psci_complete(struct kvm_vcpu *source, struct kvm_vcpu *target, ++ unsigned long status) ++{ ++ int ret; ++ ++ ret = rmi_psci_complete(virt_to_phys(source->arch.rec->rec_page), ++ virt_to_phys(target->arch.rec->rec_page), ++ status); ++ if (ret) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int realm_rtt_create(struct realm *realm, ++ unsigned long addr, ++ int level, ++ phys_addr_t phys) ++{ ++ addr = ALIGN_DOWN(addr, rme_rtt_level_mapsize(level - 1)); ++ return rmi_rtt_create(virt_to_phys(realm->rd), phys, addr, level); ++} ++ ++static int realm_rtt_fold(struct realm *realm, ++ unsigned long addr, ++ int level, ++ phys_addr_t *rtt_granule) ++{ ++ unsigned long out_rtt; ++ int ret; ++ ++ ret = rmi_rtt_fold(virt_to_phys(realm->rd), addr, level, &out_rtt); ++ ++ if (RMI_RETURN_STATUS(ret) == RMI_SUCCESS && rtt_granule) ++ *rtt_granule = out_rtt; ++ ++ return ret; ++} ++ ++static int realm_destroy_private_granule(struct realm *realm, ++ unsigned long ipa, ++ unsigned long *next_addr, ++ phys_addr_t *out_rtt) ++{ ++ unsigned long rd = virt_to_phys(realm->rd); ++ unsigned long rtt_addr; ++ phys_addr_t rtt; ++ int ret; ++ ++retry: ++ ret = rmi_data_destroy(rd, ipa, &rtt_addr, next_addr); ++ if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { ++ if (*next_addr > ipa) ++ return 0; /* UNASSIGNED */ ++ rtt = alloc_delegated_granule(NULL); ++ if (WARN_ON(rtt == PHYS_ADDR_MAX)) ++ return -ENOMEM; ++ /* ++ * ASSIGNED - ipa is mapped as a block, so split. The index ++ * from the return code should be 2 otherwise it appears ++ * there's a huge page bigger than KVM currently supports ++ */ ++ WARN_ON(RMI_RETURN_INDEX(ret) != 2); ++ ret = realm_rtt_create(realm, ipa, 3, rtt); ++ if (WARN_ON(ret)) { ++ free_delegated_granule(rtt); ++ return -ENXIO; ++ } ++ goto retry; ++ } else if (WARN_ON(ret)) { ++ return -ENXIO; ++ } ++ ++ ret = rmi_granule_undelegate(rtt_addr); ++ if (WARN_ON(ret)) ++ return -ENXIO; ++ ++ *out_rtt = rtt_addr; ++ ++ return 0; ++} ++ ++static int realm_unmap_private_page(struct realm *realm, ++ unsigned long ipa, ++ unsigned long *next_addr) ++{ ++ unsigned long end = ALIGN(ipa + 1, PAGE_SIZE); ++ unsigned long addr; ++ phys_addr_t out_rtt = PHYS_ADDR_MAX; ++ int ret; ++ ++ for (addr = ipa; addr < end; addr = *next_addr) { ++ ret = realm_destroy_private_granule(realm, addr, next_addr, ++ &out_rtt); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void realm_unmap_shared_range(struct kvm *kvm, ++ int level, ++ unsigned long start, ++ unsigned long end) ++{ ++ struct realm *realm = &kvm->arch.realm; ++ unsigned long rd = virt_to_phys(realm->rd); ++ ssize_t map_size = rme_rtt_level_mapsize(level); ++ unsigned long next_addr, addr; ++ unsigned long shared_bit = BIT(realm->ia_bits - 1); ++ ++ if (WARN_ON(level > RMM_RTT_MAX_LEVEL)) ++ return; ++ ++ start |= shared_bit; ++ end |= shared_bit; ++ ++ for (addr = start; addr < end; addr = next_addr) { ++ unsigned long align_addr = ALIGN(addr, map_size); ++ int ret; ++ ++ next_addr = ALIGN(addr + 1, map_size); ++ ++ if (align_addr != addr || next_addr > end) { ++ /* Need to recurse deeper */ ++ if (addr < align_addr) ++ next_addr = align_addr; ++ realm_unmap_shared_range(kvm, level + 1, addr, ++ min(next_addr, end)); ++ continue; ++ } ++ ++ ret = rmi_rtt_unmap_unprotected(rd, addr, level, &next_addr); ++ switch (RMI_RETURN_STATUS(ret)) { ++ case RMI_SUCCESS: ++ break; ++ case RMI_ERROR_RTT: ++ if (next_addr == addr) { ++ /* ++ * There's a mapping here, but it's not a block ++ * mapping, so reset next_addr to the next block ++ * boundary and recurse to clear out the pages ++ * one level deeper. ++ */ ++ next_addr = ALIGN(addr + 1, map_size); ++ realm_unmap_shared_range(kvm, level + 1, addr, ++ next_addr); ++ } ++ break; ++ default: ++ WARN_ON(1); ++ return; ++ } ++ ++ cond_resched_rwlock_write(&kvm->mmu_lock); ++ } ++} ++ ++static int realm_init_sve_param(struct kvm *kvm, struct realm_params *params) ++{ ++ int ret = 0; ++ unsigned long i; ++ struct kvm_vcpu *vcpu; ++ int vl, last_vl = -1; ++ ++ /* ++ * Get the preferred SVE configuration, set by userspace with the ++ * KVM_ARM_VCPU_SVE feature and KVM_REG_ARM64_SVE_VLS pseudo-register. ++ */ ++ kvm_for_each_vcpu(i, vcpu, kvm) { ++ mutex_lock(&vcpu->mutex); ++ if (vcpu_has_sve(vcpu)) { ++ if (!kvm_arm_vcpu_sve_finalized(vcpu)) ++ ret = -EINVAL; ++ vl = vcpu->arch.sve_max_vl; ++ } else { ++ vl = 0; ++ } ++ mutex_unlock(&vcpu->mutex); ++ if (ret) ++ return ret; ++ ++ /* We need all vCPUs to have the same SVE config */ ++ if (last_vl >= 0 && last_vl != vl) ++ return -EINVAL; ++ ++ last_vl = vl; ++ } ++ ++ if (last_vl > 0) { ++ params->sve_vl = sve_vq_from_vl(last_vl) - 1; ++ params->flags |= RMI_REALM_PARAM_FLAG_SVE; ++ } ++ return 0; ++} ++ ++/* Calculate the number of s2 root rtts needed */ ++static int realm_num_root_rtts(struct realm *realm) ++{ ++ unsigned int ipa_bits = realm->ia_bits; ++ unsigned int levels = 3 - get_start_level(realm); ++ unsigned int sl_ipa_bits = (levels + 1) * (RMM_PAGE_SHIFT - 3) + ++ RMM_PAGE_SHIFT; ++ ++ if (sl_ipa_bits >= ipa_bits) ++ return 1; ++ ++ return 1 << (ipa_bits - sl_ipa_bits); ++} ++ ++static int realm_create_rd(struct kvm *kvm) ++{ ++ struct realm *realm = &kvm->arch.realm; ++ struct realm_params *params = realm->params; ++ void *rd = NULL; ++ phys_addr_t rd_phys, params_phys; ++ size_t pgd_size = kvm_pgtable_stage2_pgd_size(kvm->arch.vtcr); ++ u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1); ++ int i, r; ++ int rtt_num_start; ++ ++ realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr); ++ rtt_num_start = realm_num_root_rtts(realm); ++ ++ if (WARN_ON(realm->rd || !realm->params)) ++ return -EEXIST; ++ ++ if (pgd_size / RMM_PAGE_SIZE < rtt_num_start) ++ return -EINVAL; ++ ++ rd = (void *)__get_free_page(GFP_KERNEL); ++ if (!rd) ++ return -ENOMEM; ++ ++ rd_phys = virt_to_phys(rd); ++ if (rmi_granule_delegate(rd_phys)) { ++ r = -ENXIO; ++ goto free_rd; ++ } ++ ++ for (i = 0; i < pgd_size; i += RMM_PAGE_SIZE) { ++ phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i; ++ ++ if (rmi_granule_delegate(pgd_phys)) { ++ r = -ENXIO; ++ goto out_undelegate_tables; ++ } ++ } ++ ++ params->s2sz = VTCR_EL2_IPA(kvm->arch.vtcr); ++ params->rtt_level_start = get_start_level(realm); ++ params->rtt_num_start = rtt_num_start; ++ params->rtt_base = kvm->arch.mmu.pgd_phys; ++ params->vmid = realm->vmid; ++ params->num_bps = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr0); ++ params->num_wps = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr0); ++ ++ if (kvm->arch.arm_pmu) { ++ params->pmu_num_ctrs = kvm->arch.pmcr_n; ++ params->flags |= RMI_REALM_PARAM_FLAG_PMU; ++ } ++ ++ r = realm_init_sve_param(kvm, params); ++ if (r) ++ goto out_undelegate_tables; ++ ++ params_phys = virt_to_phys(params); ++ ++ if (rmi_realm_create(rd_phys, params_phys)) { ++ r = -ENXIO; ++ goto out_undelegate_tables; ++ } ++ ++ if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) { ++ WARN_ON(rmi_realm_destroy(rd_phys)); ++ goto out_undelegate_tables; ++ } ++ ++ realm->rd = rd; ++ ++ return 0; ++ ++out_undelegate_tables: ++ while (i > 0) { ++ i -= RMM_PAGE_SIZE; ++ ++ phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i; ++ ++ if (WARN_ON(rmi_granule_undelegate(pgd_phys))) { ++ /* Leak the pages if they cannot be returned */ ++ kvm->arch.mmu.pgt = NULL; ++ break; ++ } ++ } ++ if (WARN_ON(rmi_granule_undelegate(rd_phys))) { ++ /* Leak the page if it isn't returned */ ++ return r; ++ } ++free_rd: ++ free_page((unsigned long)rd); ++ return r; ++} ++ ++static int realm_rtt_destroy(struct realm *realm, unsigned long addr, ++ int level, phys_addr_t *rtt_granule, ++ unsigned long *next_addr) ++{ ++ unsigned long out_rtt; ++ int ret; ++ ++ ret = rmi_rtt_destroy(virt_to_phys(realm->rd), addr, level, ++ &out_rtt, next_addr); ++ ++ *rtt_granule = out_rtt; ++ ++ return ret; ++} ++ ++static int realm_create_rtt_levels(struct realm *realm, ++ unsigned long ipa, ++ int level, ++ int max_level, ++ struct kvm_mmu_memory_cache *mc) ++{ ++ if (level == max_level) ++ return 0; ++ ++ while (level++ < max_level) { ++ phys_addr_t rtt = alloc_delegated_granule(mc); ++ int ret; ++ ++ if (rtt == PHYS_ADDR_MAX) ++ return -ENOMEM; ++ ++ ret = realm_rtt_create(realm, ipa, level, rtt); ++ ++ if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT && ++ RMI_RETURN_INDEX(ret) == level - 1) { ++ /* The RTT already exists, continue */ ++ continue; ++ } ++ if (ret) { ++ WARN(1, "Failed to create RTT at level %d: %d\n", ++ level, ret); ++ free_delegated_granule(rtt); ++ return -ENXIO; ++ } ++ } ++ ++ return 0; ++} ++ ++static int realm_tear_down_rtt_level(struct realm *realm, int level, ++ unsigned long start, unsigned long end) ++{ ++ ssize_t map_size; ++ unsigned long addr, next_addr; ++ ++ if (WARN_ON(level > RMM_RTT_MAX_LEVEL)) ++ return -EINVAL; ++ ++ map_size = rme_rtt_level_mapsize(level - 1); ++ ++ for (addr = start; addr < end; addr = next_addr) { ++ phys_addr_t rtt_granule; ++ int ret; ++ unsigned long align_addr = ALIGN(addr, map_size); ++ ++ next_addr = ALIGN(addr + 1, map_size); ++ ++ if (next_addr > end || align_addr != addr) { ++ /* ++ * The target range is smaller than what this level ++ * covers, recurse deeper. ++ */ ++ ret = realm_tear_down_rtt_level(realm, ++ level + 1, ++ addr, ++ min(next_addr, end)); ++ if (ret) ++ return ret; ++ continue; ++ } ++ ++ ret = realm_rtt_destroy(realm, addr, level, ++ &rtt_granule, &next_addr); ++ ++ switch (RMI_RETURN_STATUS(ret)) { ++ case RMI_SUCCESS: ++ free_delegated_granule(rtt_granule); ++ break; ++ case RMI_ERROR_RTT: ++ if (next_addr > addr) { ++ /* Missing RTT, skip */ ++ break; ++ } ++ /* ++ * We tear down the RTT range for the full IPA ++ * space, after everything is unmapped. Also we ++ * descend down only if we cannot tear down a ++ * top level RTT. Thus RMM must be able to walk ++ * to the requested level. e.g., a block mapping ++ * exists at L1 or L2. ++ */ ++ if (WARN_ON(RMI_RETURN_INDEX(ret) != level)) ++ return -EBUSY; ++ if (WARN_ON(level == RMM_RTT_MAX_LEVEL)) ++ return -EBUSY; ++ ++ /* ++ * The table has active entries in it, recurse deeper ++ * and tear down the RTTs. ++ */ ++ next_addr = ALIGN(addr + 1, map_size); ++ ret = realm_tear_down_rtt_level(realm, ++ level + 1, ++ addr, ++ next_addr); ++ if (ret) ++ return ret; ++ /* ++ * Now that the child RTTs are destroyed, ++ * retry at this level. ++ */ ++ next_addr = addr; ++ break; ++ default: ++ WARN_ON(1); ++ return -ENXIO; ++ } ++ } ++ ++ return 0; ++} ++ ++static int realm_tear_down_rtt_range(struct realm *realm, ++ unsigned long start, unsigned long end) ++{ ++ return realm_tear_down_rtt_level(realm, get_start_level(realm) + 1, ++ start, end); ++} ++ ++/* ++ * Returns 0 on successful fold, a negative value on error, a positive value if ++ * we were not able to fold all tables at this level. ++ */ ++static int realm_fold_rtt_level(struct realm *realm, int level, ++ unsigned long start, unsigned long end) ++{ ++ int not_folded = 0; ++ ssize_t map_size; ++ unsigned long addr, next_addr; ++ ++ if (WARN_ON(level > RMM_RTT_MAX_LEVEL)) ++ return -EINVAL; ++ ++ map_size = rme_rtt_level_mapsize(level - 1); ++ ++ for (addr = start; addr < end; addr = next_addr) { ++ phys_addr_t rtt_granule; ++ int ret; ++ unsigned long align_addr = ALIGN(addr, map_size); ++ ++ next_addr = ALIGN(addr + 1, map_size); ++ ++ ret = realm_rtt_fold(realm, align_addr, level, &rtt_granule); ++ ++ switch (RMI_RETURN_STATUS(ret)) { ++ case RMI_SUCCESS: ++ free_delegated_granule(rtt_granule); ++ break; ++ case RMI_ERROR_RTT: ++ if (level == RMM_RTT_MAX_LEVEL || ++ RMI_RETURN_INDEX(ret) < level) { ++ not_folded++; ++ break; ++ } ++ /* Recurse a level deeper */ ++ ret = realm_fold_rtt_level(realm, ++ level + 1, ++ addr, ++ next_addr); ++ if (ret < 0) ++ return ret; ++ else if (ret == 0) ++ /* Try again at this level */ ++ next_addr = addr; ++ break; ++ default: ++ WARN_ON(1); ++ return -ENXIO; ++ } ++ } ++ ++ return not_folded; ++} ++ ++void kvm_realm_destroy_rtts(struct kvm *kvm, u32 ia_bits) ++{ ++ struct realm *realm = &kvm->arch.realm; ++ ++ WARN_ON(realm_tear_down_rtt_range(realm, 0, (1UL << ia_bits))); ++} ++ ++static void realm_unmap_private_range(struct kvm *kvm, ++ unsigned long start, ++ unsigned long end) ++{ ++ struct realm *realm = &kvm->arch.realm; ++ unsigned long next_addr, addr; ++ int ret; ++ ++ for (addr = start; addr < end; addr = next_addr) { ++ ret = realm_unmap_private_page(realm, addr, &next_addr); ++ ++ if (ret) ++ break; ++ } ++ ++ realm_fold_rtt_level(realm, get_start_level(realm) + 1, ++ start, end); ++} ++ ++void kvm_realm_unmap_range(struct kvm *kvm, unsigned long start, ++ unsigned long size, bool unmap_private) ++{ ++ unsigned long end = start + size; ++ struct realm *realm = &kvm->arch.realm; ++ ++ end = min(BIT(realm->ia_bits - 1), end); ++ ++ if (realm->state == REALM_STATE_NONE) ++ return; ++ ++ realm_unmap_shared_range(kvm, find_map_level(realm, start, end), ++ start, end); ++ if (unmap_private) ++ realm_unmap_private_range(kvm, start, end); ++} ++ ++static int realm_create_protected_data_granule(struct realm *realm, ++ unsigned long ipa, ++ phys_addr_t dst_phys, ++ phys_addr_t src_phys, ++ unsigned long flags) ++{ ++ phys_addr_t rd = virt_to_phys(realm->rd); ++ int ret; ++ ++ if (rmi_granule_delegate(dst_phys)) ++ return -ENXIO; ++ ++ ret = rmi_data_create(rd, dst_phys, ipa, src_phys, flags); ++ if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { ++ /* Create missing RTTs and retry */ ++ int level = RMI_RETURN_INDEX(ret); ++ ++ WARN_ON(level == RMM_RTT_MAX_LEVEL); ++ ++ ret = realm_create_rtt_levels(realm, ipa, level, ++ RMM_RTT_MAX_LEVEL, NULL); ++ if (ret) ++ return -EIO; ++ ++ ret = rmi_data_create(rd, dst_phys, ipa, src_phys, flags); ++ } ++ if (ret) ++ return -EIO; ++ ++ return 0; ++} ++ ++static int realm_create_protected_data_page(struct realm *realm, ++ unsigned long ipa, ++ struct page *dst_page, ++ struct page *src_page, ++ unsigned long flags) ++{ ++ unsigned long rd = virt_to_phys(realm->rd); ++ phys_addr_t dst_phys, src_phys; ++ bool undelegate_failed = false; ++ int ret, offset; ++ ++ dst_phys = page_to_phys(dst_page); ++ src_phys = page_to_phys(src_page); ++ copy_page(page_address(src_page), page_address(dst_page)); ++ ++ for (offset = 0; offset < PAGE_SIZE; offset += RMM_PAGE_SIZE) { ++ ++ ret = realm_create_protected_data_granule(realm, ++ ipa, ++ dst_phys, ++ src_phys, ++ flags); ++ if (ret) ++ goto err; ++ ++ ipa += RMM_PAGE_SIZE; ++ dst_phys += RMM_PAGE_SIZE; ++ src_phys += RMM_PAGE_SIZE; ++ } ++ ++ return 0; ++ ++err: ++ if (ret == -EIO) { ++ /* current offset needs undelegating */ ++ if (WARN_ON(rmi_granule_undelegate(dst_phys))) ++ undelegate_failed = true; ++ } ++ while (offset > 0) { ++ ipa -= RMM_PAGE_SIZE; ++ offset -= RMM_PAGE_SIZE; ++ dst_phys -= RMM_PAGE_SIZE; ++ ++ rmi_data_destroy(rd, ipa, NULL, NULL); ++ ++ if (WARN_ON(rmi_granule_undelegate(dst_phys))) ++ undelegate_failed = true; ++ } ++ ++ if (undelegate_failed) { ++ /* ++ * A granule could not be undelegated, ++ * so the page has to be leaked ++ */ ++ get_page(dst_page); ++ } ++ ++ return -ENXIO; ++} ++ ++static int fold_rtt(struct realm *realm, unsigned long addr, int level) ++{ ++ phys_addr_t rtt_addr; ++ int ret; ++ ++ ret = realm_rtt_fold(realm, addr, level, &rtt_addr); ++ if (ret) ++ return ret; ++ ++ free_delegated_granule(rtt_addr); ++ ++ return 0; ++} ++ ++int realm_map_protected(struct realm *realm, ++ unsigned long ipa, ++ kvm_pfn_t pfn, ++ unsigned long map_size, ++ struct kvm_mmu_memory_cache *memcache) ++{ ++ phys_addr_t phys = __pfn_to_phys(pfn); ++ phys_addr_t rd = virt_to_phys(realm->rd); ++ unsigned long base_ipa = ipa; ++ unsigned long size; ++ int map_level; ++ int ret = 0; ++ ++ if (WARN_ON(!IS_ALIGNED(map_size, RMM_PAGE_SIZE))) ++ return -EINVAL; ++ ++ if (WARN_ON(!IS_ALIGNED(ipa, map_size))) ++ return -EINVAL; ++ ++ if (IS_ALIGNED(map_size, RMM_L2_BLOCK_SIZE)) ++ map_level = 2; ++ else ++ map_level = 3; ++ ++ if (map_level < RMM_RTT_MAX_LEVEL) { ++ /* ++ * A temporary RTT is needed during the map, precreate it, ++ * however if there is an error (e.g. missing parent tables) ++ * this will be handled below. ++ */ ++ realm_create_rtt_levels(realm, ipa, map_level, ++ RMM_RTT_MAX_LEVEL, memcache); ++ } ++ ++ for (size = 0; size < map_size; size += RMM_PAGE_SIZE) { ++ if (rmi_granule_delegate(phys)) { ++ /* ++ * It's likely we raced with another VCPU on the same ++ * fault. Assume the other VCPU has handled the fault ++ * and return to the guest. ++ */ ++ return 0; ++ } ++ ++ ret = rmi_data_create_unknown(rd, phys, ipa); ++ ++ if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { ++ /* Create missing RTTs and retry */ ++ int level = RMI_RETURN_INDEX(ret); ++ ++ WARN_ON(level == RMM_RTT_MAX_LEVEL); ++ ++ ret = realm_create_rtt_levels(realm, ipa, level, ++ RMM_RTT_MAX_LEVEL, ++ memcache); ++ if (ret) ++ goto err_undelegate; ++ ++ ret = rmi_data_create_unknown(rd, phys, ipa); ++ } ++ ++ if (WARN_ON(ret)) ++ goto err_undelegate; ++ ++ phys += RMM_PAGE_SIZE; ++ ipa += RMM_PAGE_SIZE; ++ } ++ ++ if (map_size == RMM_L2_BLOCK_SIZE) { ++ ret = fold_rtt(realm, base_ipa, map_level + 1); ++ if (WARN_ON(ret)) ++ goto err; ++ } ++ ++ return 0; ++ ++err_undelegate: ++ if (WARN_ON(rmi_granule_undelegate(phys))) { ++ /* Page can't be returned to NS world so is lost */ ++ get_page(phys_to_page(phys)); ++ } ++err: ++ while (size > 0) { ++ unsigned long data, top; ++ ++ phys -= RMM_PAGE_SIZE; ++ size -= RMM_PAGE_SIZE; ++ ipa -= RMM_PAGE_SIZE; ++ ++ WARN_ON(rmi_data_destroy(rd, ipa, &data, &top)); ++ ++ if (WARN_ON(rmi_granule_undelegate(phys))) { ++ /* Page can't be returned to NS world so is lost */ ++ get_page(phys_to_page(phys)); ++ } ++ } ++ return -ENXIO; ++} ++ ++int realm_map_non_secure(struct realm *realm, ++ unsigned long ipa, ++ kvm_pfn_t pfn, ++ unsigned long size, ++ struct kvm_mmu_memory_cache *memcache) ++{ ++ phys_addr_t rd = virt_to_phys(realm->rd); ++ phys_addr_t phys = __pfn_to_phys(pfn); ++ unsigned long offset; ++ int map_size, map_level; ++ int ret = 0; ++ ++ if (WARN_ON(!IS_ALIGNED(size, RMM_PAGE_SIZE))) ++ return -EINVAL; ++ ++ if (WARN_ON(!IS_ALIGNED(ipa, size))) ++ return -EINVAL; ++ ++ if (IS_ALIGNED(size, RMM_L2_BLOCK_SIZE)) { ++ map_level = 2; ++ map_size = RMM_L2_BLOCK_SIZE; ++ } else { ++ map_level = 3; ++ map_size = RMM_PAGE_SIZE; ++ } ++ ++ for (offset = 0; offset < size; offset += map_size) { ++ /* ++ * realm_map_ipa() enforces that the memory is writable, ++ * so for now we permit both read and write. ++ */ ++ unsigned long desc = phys | ++ PTE_S2_MEMATTR(MT_S2_FWB_NORMAL) | ++ (3 << 6); ++ ret = rmi_rtt_map_unprotected(rd, ipa, map_level, desc); ++ ++ if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { ++ /* Create missing RTTs and retry */ ++ int level = RMI_RETURN_INDEX(ret); ++ ++ ret = realm_create_rtt_levels(realm, ipa, level, ++ map_level, memcache); ++ if (ret) ++ return -ENXIO; ++ ++ ret = rmi_rtt_map_unprotected(rd, ipa, map_level, desc); ++ } ++ /* ++ * RMI_ERROR_RTT can be reported for two reasons: either the ++ * RTT tables are not there, or there is an RTTE already ++ * present for the address. The call to ++ * realm_create_rtt_levels() above handles the first case, and ++ * in the second case this indicates that another thread has ++ * already populated the RTTE for us, so we can ignore the ++ * error and continue. ++ */ ++ if (ret && RMI_RETURN_STATUS(ret) != RMI_ERROR_RTT) ++ return -ENXIO; ++ ++ ipa += map_size; ++ phys += map_size; ++ } ++ ++ return 0; ++} ++ ++static int populate_region(struct kvm *kvm, ++ phys_addr_t ipa_base, ++ phys_addr_t ipa_end, ++ unsigned long data_flags) ++{ ++ struct realm *realm = &kvm->arch.realm; ++ struct kvm_memory_slot *memslot; ++ gfn_t base_gfn, end_gfn; ++ int idx; ++ phys_addr_t ipa = ipa_base; ++ struct page *tmp_page; ++ int ret = 0; ++ ++ base_gfn = gpa_to_gfn(ipa_base); ++ end_gfn = gpa_to_gfn(ipa_end); ++ ++ idx = srcu_read_lock(&kvm->srcu); ++ memslot = gfn_to_memslot(kvm, base_gfn); ++ if (!memslot) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ /* We require the region to be contained within a single memslot */ ++ if (memslot->base_gfn + memslot->npages < end_gfn) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ tmp_page = alloc_page(GFP_KERNEL); ++ if (!tmp_page) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ mmap_read_lock(current->mm); ++ ++ while (ipa < ipa_end) { ++ struct vm_area_struct *vma; ++ unsigned long hva; ++ struct page *page; ++ kvm_pfn_t pfn; ++ ++ hva = gfn_to_hva_memslot(memslot, gpa_to_gfn(ipa)); ++ vma = vma_lookup(current->mm, hva); ++ if (!vma) { ++ ret = -EFAULT; ++ break; ++ } ++ ++ pfn = gfn_to_pfn_memslot(memslot, gpa_to_gfn(ipa)); ++ ++ if (is_error_pfn(pfn)) { ++ ret = -EFAULT; ++ break; ++ } ++ ++ page = pfn_to_page(pfn); ++ ++ ret = realm_create_protected_data_page(realm, ipa, ++ page, ++ tmp_page, ++ data_flags); ++ if (ret) { ++ kvm_release_page_clean(page); ++ break; ++ } ++ ++ ipa += PAGE_SIZE; ++ kvm_release_pfn_dirty(pfn); ++ } ++out: ++ mmap_read_unlock(current->mm); ++ __free_page(tmp_page); ++ srcu_read_unlock(&kvm->srcu, idx); ++ return ret; ++} ++ ++static int kvm_populate_realm(struct kvm *kvm, ++ struct arm_rme_populate_realm *args) ++{ ++ phys_addr_t ipa_base, ipa_end; ++ unsigned long data_flags = 0; ++ ++ if (kvm_realm_state(kvm) != REALM_STATE_NEW) ++ return -EPERM; ++ ++ if (!IS_ALIGNED(args->base, PAGE_SIZE) || ++ !IS_ALIGNED(args->size, PAGE_SIZE) || ++ (args->flags & ~RMI_MEASURE_CONTENT)) ++ return -EINVAL; ++ ++ ipa_base = args->base; ++ ipa_end = ipa_base + args->size; ++ ++ if (ipa_end < ipa_base) ++ return -EINVAL; ++ ++ if (args->flags & RMI_MEASURE_CONTENT) ++ data_flags |= RMI_MEASURE_CONTENT; ++ ++ /* ++ * Perform the population in parts to ensure locks are not held for too ++ * long ++ */ ++ while (ipa_base < ipa_end) { ++ phys_addr_t end = min(ipa_end, ipa_base + SZ_2M); ++ ++ int ret = populate_region(kvm, ipa_base, end, ++ args->flags); ++ ++ if (ret) ++ return ret; ++ ++ ipa_base = end; ++ ++ cond_resched(); ++ } ++ ++ return 0; ++} ++ ++static int realm_set_ipa_state(struct kvm_vcpu *vcpu, ++ unsigned long start, ++ unsigned long end, ++ unsigned long ripas, ++ unsigned long *top_ipa) ++{ ++ struct kvm *kvm = vcpu->kvm; ++ struct realm *realm = &kvm->arch.realm; ++ struct realm_rec *rec = vcpu->arch.rec; ++ phys_addr_t rd_phys = virt_to_phys(realm->rd); ++ phys_addr_t rec_phys = virt_to_phys(rec->rec_page); ++ struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; ++ unsigned long ipa = start; ++ int ret = 0; ++ ++ while (ipa < end) { ++ unsigned long next; ++ ++ ret = rmi_rtt_set_ripas(rd_phys, rec_phys, ipa, end, &next); ++ ++ if (RMI_RETURN_STATUS(ret) == RMI_SUCCESS) { ++ ipa = next; ++ } else if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { ++ int walk_level = RMI_RETURN_INDEX(ret); ++ int level = find_map_level(realm, ipa, end); ++ ++ /* ++ * If the RMM walk ended early then more tables are ++ * needed to reach the required depth to set the RIPAS. ++ */ ++ if (walk_level < level) { ++ ret = realm_create_rtt_levels(realm, ipa, ++ walk_level, ++ level, ++ memcache); ++ /* Retry with RTTs created */ ++ if (!ret) ++ continue; ++ } else { ++ ret = -EINVAL; ++ } ++ ++ break; ++ } else { ++ WARN(1, "Unexpected error in %s: %#x\n", __func__, ++ ret); ++ ret = -ENXIO; ++ break; ++ } ++ } ++ ++ *top_ipa = ipa; ++ ++ if (ripas == RMI_EMPTY && ipa != start) ++ realm_unmap_private_range(kvm, start, ipa); ++ ++ return ret; ++} ++ ++static int realm_init_ipa_state(struct realm *realm, ++ unsigned long ipa, ++ unsigned long end) ++{ ++ phys_addr_t rd_phys = virt_to_phys(realm->rd); ++ int ret; ++ ++ while (ipa < end) { ++ unsigned long next; ++ ++ ret = rmi_rtt_init_ripas(rd_phys, ipa, end, &next); ++ ++ if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) { ++ int err_level = RMI_RETURN_INDEX(ret); ++ int level = find_map_level(realm, ipa, end); ++ ++ if (WARN_ON(err_level >= level)) ++ return -ENXIO; ++ ++ ret = realm_create_rtt_levels(realm, ipa, ++ err_level, ++ level, NULL); ++ if (ret) ++ return ret; ++ /* Retry with the RTT levels in place */ ++ continue; ++ } else if (WARN_ON(ret)) { ++ return -ENXIO; ++ } ++ ++ ipa = next; ++ } ++ ++ return 0; ++} ++ ++static int kvm_init_ipa_range_realm(struct kvm *kvm, ++ struct arm_rme_init_ripas *args) ++{ ++ gpa_t addr, end; ++ struct realm *realm = &kvm->arch.realm; ++ ++ addr = args->base; ++ end = addr + args->size; ++ ++ if (end < addr) ++ return -EINVAL; ++ ++ if (kvm_realm_state(kvm) != REALM_STATE_NEW) ++ return -EPERM; ++ ++ return realm_init_ipa_state(realm, addr, end); ++} ++ ++static int kvm_activate_realm(struct kvm *kvm) ++{ ++ struct realm *realm = &kvm->arch.realm; ++ ++ if (kvm_realm_state(kvm) != REALM_STATE_NEW) ++ return -EINVAL; ++ ++ if (rmi_realm_activate(virt_to_phys(realm->rd))) ++ return -ENXIO; ++ ++ WRITE_ONCE(realm->state, REALM_STATE_ACTIVE); ++ return 0; ++} ++ ++/* Protects access to rme_vmid_bitmap */ ++static DEFINE_SPINLOCK(rme_vmid_lock); ++static unsigned long *rme_vmid_bitmap; ++ ++static int rme_vmid_init(void) ++{ ++ unsigned int vmid_count = 1 << kvm_get_vmid_bits(); ++ ++ rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL); ++ if (!rme_vmid_bitmap) { ++ kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++static int rme_vmid_reserve(void) ++{ ++ int ret; ++ unsigned int vmid_count = 1 << kvm_get_vmid_bits(); ++ ++ spin_lock(&rme_vmid_lock); ++ ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0); ++ spin_unlock(&rme_vmid_lock); ++ ++ return ret; ++} ++ ++static void rme_vmid_release(unsigned int vmid) ++{ ++ spin_lock(&rme_vmid_lock); ++ bitmap_release_region(rme_vmid_bitmap, vmid, 0); ++ spin_unlock(&rme_vmid_lock); ++} ++ ++static int kvm_create_realm(struct kvm *kvm) ++{ ++ struct realm *realm = &kvm->arch.realm; ++ int ret; ++ ++ if (!kvm_is_realm(kvm)) ++ return -EINVAL; ++ if (kvm_realm_is_created(kvm)) ++ return -EEXIST; ++ ++ ret = rme_vmid_reserve(); ++ if (ret < 0) ++ return ret; ++ realm->vmid = ret; ++ ++ ret = realm_create_rd(kvm); ++ if (ret) { ++ rme_vmid_release(realm->vmid); ++ return ret; ++ } ++ ++ WRITE_ONCE(realm->state, REALM_STATE_NEW); ++ ++ /* The realm is up, free the parameters. */ ++ free_page((unsigned long)realm->params); ++ realm->params = NULL; ++ ++ return 0; ++} ++ ++static int config_realm_hash_algo(struct realm *realm, ++ struct arm_rme_config *cfg) ++{ ++ switch (cfg->hash_algo) { ++ case ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256: ++ if (!rme_has_feature(RMI_FEATURE_REGISTER_0_HASH_SHA_256)) ++ return -EINVAL; ++ break; ++ case ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512: ++ if (!rme_has_feature(RMI_FEATURE_REGISTER_0_HASH_SHA_512)) ++ return -EINVAL; ++ break; ++ default: ++ return -EINVAL; ++ } ++ realm->params->hash_algo = cfg->hash_algo; ++ return 0; ++} ++ ++static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap) ++{ ++ struct arm_rme_config cfg; ++ struct realm *realm = &kvm->arch.realm; ++ int r = 0; ++ ++ if (kvm_realm_is_created(kvm)) ++ return -EBUSY; ++ ++ if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg))) ++ return -EFAULT; ++ ++ switch (cfg.cfg) { ++ case ARM_RME_CONFIG_RPV: ++ memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv)); ++ break; ++ case ARM_RME_CONFIG_HASH_ALGO: ++ r = config_realm_hash_algo(realm, &cfg); ++ break; ++ default: ++ r = -EINVAL; ++ } ++ ++ return r; ++} ++ ++int _kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) ++{ ++ int r = 0; ++ ++ if (!kvm_is_realm(kvm)) ++ return -EINVAL; ++ ++ switch (cap->args[0]) { ++ case KVM_CAP_ARM_RME_CONFIG_REALM: ++ r = kvm_rme_config_realm(kvm, cap); ++ break; ++ case KVM_CAP_ARM_RME_CREATE_REALM: ++ r = kvm_create_realm(kvm); ++ break; ++ case KVM_CAP_ARM_RME_INIT_RIPAS_REALM: { ++ struct arm_rme_init_ripas args; ++ void __user *argp = u64_to_user_ptr(cap->args[1]); ++ ++ if (copy_from_user(&args, argp, sizeof(args))) { ++ r = -EFAULT; ++ break; ++ } ++ ++ r = kvm_init_ipa_range_realm(kvm, &args); ++ break; ++ } ++ case KVM_CAP_ARM_RME_POPULATE_REALM: { ++ struct arm_rme_populate_realm args; ++ void __user *argp = u64_to_user_ptr(cap->args[1]); ++ ++ if (copy_from_user(&args, argp, sizeof(args))) { ++ r = -EFAULT; ++ break; ++ } ++ ++ r = kvm_populate_realm(kvm, &args); ++ break; ++ } ++ case KVM_CAP_ARM_RME_ACTIVATE_REALM: ++ r = kvm_activate_realm(kvm); ++ break; ++ default: ++ r = -EINVAL; ++ break; ++ } ++ ++ return r; ++} ++ ++void _kvm_destroy_realm(struct kvm *kvm) ++{ ++ struct realm *realm = &kvm->arch.realm; ++ size_t pgd_size = kvm_pgtable_stage2_pgd_size(kvm->arch.vtcr); ++ int i; ++ ++ if (realm->params) { ++ free_page((unsigned long)realm->params); ++ realm->params = NULL; ++ } ++ ++ if (!kvm_realm_is_created(kvm)) ++ return; ++ ++ WRITE_ONCE(realm->state, REALM_STATE_DYING); ++ ++ if (realm->rd) { ++ phys_addr_t rd_phys = virt_to_phys(realm->rd); ++ ++ if (WARN_ON(rmi_realm_destroy(rd_phys))) ++ return; ++ free_delegated_granule(rd_phys); ++ realm->rd = NULL; ++ } ++ ++ rme_vmid_release(realm->vmid); ++ ++ for (i = 0; i < pgd_size; i += RMM_PAGE_SIZE) { ++ phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i; ++ ++ if (WARN_ON(rmi_granule_undelegate(pgd_phys))) ++ return; ++ } ++ ++ WRITE_ONCE(realm->state, REALM_STATE_DEAD); ++ ++ /* Now that the Realm is destroyed, free the entry level RTTs */ ++ kvm_free_stage2_pgd(&kvm->arch.mmu); ++} ++ ++static void kvm_complete_ripas_change(struct kvm_vcpu *vcpu) ++{ ++ struct kvm *kvm = vcpu->kvm; ++ struct realm_rec *rec = vcpu->arch.rec; ++ unsigned long base = rec->run->exit.ripas_base; ++ unsigned long top = rec->run->exit.ripas_top; ++ unsigned long ripas = rec->run->exit.ripas_value; ++ unsigned long top_ipa; ++ int ret; ++ ++ do { ++ kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_page_cache, ++ kvm_mmu_cache_min_pages(kvm)); ++ write_lock(&kvm->mmu_lock); ++ ret = realm_set_ipa_state(vcpu, base, top, ripas, &top_ipa); ++ write_unlock(&kvm->mmu_lock); ++ ++ if (WARN_RATELIMIT(ret && ret != -ENOMEM, ++ "Unable to satisfy RIPAS_CHANGE for %#lx - %#lx, ripas: %#lx\n", ++ base, top, ripas)) ++ break; ++ ++ base = top_ipa; ++ } while (top_ipa < top); ++} ++ ++int _kvm_rec_enter(struct kvm_vcpu *vcpu) ++{ ++ struct realm_rec *rec = vcpu->arch.rec; ++ ++ switch (rec->run->exit.exit_reason) { ++ case RMI_EXIT_HOST_CALL: ++ case RMI_EXIT_PSCI: ++ for (int i = 0; i < REC_RUN_GPRS; i++) ++ rec->run->enter.gprs[i] = vcpu_get_reg(vcpu, i); ++ break; ++ case RMI_EXIT_RIPAS_CHANGE: ++ kvm_complete_ripas_change(vcpu); ++ break; ++ } ++ ++ if (kvm_realm_state(vcpu->kvm) != REALM_STATE_ACTIVE) ++ return -EINVAL; ++ ++ return rmi_rec_enter(virt_to_phys(rec->rec_page), ++ virt_to_phys(rec->run)); ++} ++ ++static void free_rec_aux(struct page **aux_pages, ++ unsigned int num_aux) ++{ ++ unsigned int i, j; ++ unsigned int page_count = 0; ++ ++ for (i = 0; i < num_aux;) { ++ struct page *aux_page = aux_pages[page_count++]; ++ phys_addr_t aux_page_phys = page_to_phys(aux_page); ++ bool should_free = true; ++ ++ for (j = 0; j < PAGE_SIZE && i < num_aux; j += RMM_PAGE_SIZE) { ++ if (WARN_ON(rmi_granule_undelegate(aux_page_phys))) ++ should_free = false; ++ aux_page_phys += RMM_PAGE_SIZE; ++ i++; ++ } ++ /* Only free if all the undelegate calls were successful */ ++ if (should_free) ++ __free_page(aux_page); ++ } ++} ++ ++static int alloc_rec_aux(struct page **aux_pages, ++ u64 *aux_phys_pages, ++ unsigned int num_aux) ++{ ++ struct page *aux_page; ++ int page_count = 0; ++ unsigned int i, j; ++ int ret; ++ ++ for (i = 0; i < num_aux;) { ++ phys_addr_t aux_page_phys; ++ ++ aux_page = alloc_page(GFP_KERNEL); ++ if (!aux_page) { ++ ret = -ENOMEM; ++ goto out_err; ++ } ++ ++ aux_page_phys = page_to_phys(aux_page); ++ for (j = 0; j < PAGE_SIZE && i < num_aux; j += RMM_PAGE_SIZE) { ++ if (rmi_granule_delegate(aux_page_phys)) { ++ ret = -ENXIO; ++ goto err_undelegate; ++ } ++ aux_phys_pages[i++] = aux_page_phys; ++ aux_page_phys += RMM_PAGE_SIZE; ++ } ++ aux_pages[page_count++] = aux_page; ++ } ++ ++ return 0; ++err_undelegate: ++ while (j > 0) { ++ j -= RMM_PAGE_SIZE; ++ i--; ++ if (WARN_ON(rmi_granule_undelegate(aux_phys_pages[i]))) { ++ /* Leak the page if the undelegate fails */ ++ goto out_err; ++ } ++ } ++ __free_page(aux_page); ++out_err: ++ free_rec_aux(aux_pages, i); ++ return ret; ++} ++ ++int _kvm_create_rec(struct kvm_vcpu *vcpu) ++{ ++ struct user_pt_regs *vcpu_regs = vcpu_gp_regs(vcpu); ++ unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); ++ struct realm *realm = &vcpu->kvm->arch.realm; ++ struct realm_rec *rec = vcpu->arch.rec; ++ unsigned long rec_page_phys; ++ struct rec_params *params; ++ int r, i; ++ ++ if (kvm_realm_state(vcpu->kvm) != REALM_STATE_NEW) ++ return -ENOENT; ++ ++ if (rec->run) ++ return -EBUSY; ++ ++ /* ++ * The RMM will report PSCI v1.0 to Realms and the KVM_ARM_VCPU_PSCI_0_2 ++ * flag covers v0.2 and onwards. ++ */ ++ if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_PSCI_0_2)) ++ return -EINVAL; ++ ++ if (vcpu->kvm->arch.arm_pmu && !kvm_vcpu_has_pmu(vcpu)) ++ return -EINVAL; ++ ++ BUILD_BUG_ON(sizeof(*params) > PAGE_SIZE); ++ BUILD_BUG_ON(sizeof(*rec->run) > PAGE_SIZE); ++ ++ params = (struct rec_params *)get_zeroed_page(GFP_KERNEL); ++ rec->rec_page = (void *)__get_free_page(GFP_KERNEL); ++ rec->run = (void *)get_zeroed_page(GFP_KERNEL); ++ if (!params || !rec->rec_page || !rec->run) { ++ r = -ENOMEM; ++ goto out_free_pages; ++ } ++ ++ for (i = 0; i < ARRAY_SIZE(params->gprs); i++) ++ params->gprs[i] = vcpu_regs->regs[i]; ++ ++ params->pc = vcpu_regs->pc; ++ ++ if (vcpu->vcpu_id == 0) ++ params->flags |= REC_PARAMS_FLAG_RUNNABLE; ++ ++ rec_page_phys = virt_to_phys(rec->rec_page); ++ ++ if (rmi_granule_delegate(rec_page_phys)) { ++ r = -ENXIO; ++ goto out_free_pages; ++ } ++ ++ r = alloc_rec_aux(rec->aux_pages, params->aux, realm->num_aux); ++ if (r) ++ goto out_undelegate_rmm_rec; ++ ++ params->num_rec_aux = realm->num_aux; ++ params->mpidr = mpidr; ++ ++ if (rmi_rec_create(virt_to_phys(realm->rd), ++ rec_page_phys, ++ virt_to_phys(params))) { ++ r = -ENXIO; ++ goto out_free_rec_aux; ++ } ++ ++ rec->mpidr = mpidr; ++ ++ free_page((unsigned long)params); ++ return 0; ++ ++out_free_rec_aux: ++ free_rec_aux(rec->aux_pages, realm->num_aux); ++out_undelegate_rmm_rec: ++ if (WARN_ON(rmi_granule_undelegate(rec_page_phys))) ++ rec->rec_page = NULL; ++out_free_pages: ++ free_page((unsigned long)rec->run); ++ free_page((unsigned long)rec->rec_page); ++ free_page((unsigned long)params); ++ return r; ++} ++ ++void _kvm_destroy_rec(struct kvm_vcpu *vcpu) ++{ ++ struct realm *realm = &vcpu->kvm->arch.realm; ++ struct realm_rec *rec = vcpu->arch.rec; ++ unsigned long rec_page_phys; ++ ++ if (!vcpu_is_rec(vcpu)) ++ return; ++ ++ if (!rec->run) { ++ /* Nothing to do if the VCPU hasn't been finalized */ ++ return; ++ } ++ ++ free_page((unsigned long)rec->run); ++ ++ rec_page_phys = virt_to_phys(rec->rec_page); ++ ++ /* ++ * The REC and any AUX pages cannot be reclaimed until the REC is ++ * destroyed. So if the REC destroy fails then the REC page and any AUX ++ * pages will be leaked. ++ */ ++ if (WARN_ON(rmi_rec_destroy(rec_page_phys))) ++ return; ++ ++ free_rec_aux(rec->aux_pages, realm->num_aux); ++ ++ free_delegated_granule(rec_page_phys); ++} ++ ++int _kvm_init_realm_vm(struct kvm *kvm) ++{ ++ kvm->arch.realm.params = (void *)get_zeroed_page(GFP_KERNEL); ++ ++ if (!kvm->arch.realm.params) ++ return -ENOMEM; ++ return 0; ++} ++ ++void _kvm_init_rme(void) ++{ ++ if (PAGE_SIZE != SZ_4K) ++ /* Only 4k page size on the host is supported */ ++ return; ++ ++ if (rmi_check_version()) ++ /* Continue without realm support */ ++ return; ++ ++ if (WARN_ON(rmi_features(0, &rmm_feat_reg0))) ++ return; ++ ++ if (rme_vmid_init()) ++ return; ++ ++ static_branch_enable(&kvm_rme_is_available); ++} ++ ++static struct cca_operations armcca_operations = { ++ .enable_cap = _kvm_realm_enable_cap, ++ .init_realm_vm = _kvm_init_realm_vm, ++ .realm_vm_enter = _kvm_rec_enter, ++ .realm_vm_exit = _handle_rec_exit, ++ .init_sel2_hypervisor = _kvm_init_rme, ++ .psci_complete = _realm_psci_complete, ++ .destroy_vm = _kvm_destroy_realm, ++ .create_vcpu = _kvm_create_rec, ++ .destroy_vcpu = _kvm_destroy_rec, ++ .vgic_nr_lr = _kvm_realm_vgic_nr_lr, ++}; ++ ++static int __init armcca_register(void) ++{ ++ return cca_operations_register(ARMCCA_CVM, &armcca_operations); ++} ++core_initcall(armcca_register); +diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c +index 8cd4c95759a0..ff00ad782d7d 100644 +--- a/arch/arm64/kvm/sys_regs.c ++++ b/arch/arm64/kvm/sys_regs.c +@@ -743,13 +743,9 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu, + + static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) + { +- u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX); ++ u64 mask = BIT(ARMV8_PMU_CYCLE_IDX); ++ u8 n = vcpu->kvm->arch.pmcr_n; + +- /* No PMU available, any PMU reg may UNDEF... */ +- if (!kvm_arm_support_pmu_v3()) +- return 0; +- +- n = FIELD_GET(ARMV8_PMU_PMCR_N, read_sysreg(pmcr_el0)); + if (n) + mask |= GENMASK(n - 1, 0); + +@@ -789,17 +785,14 @@ static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) + + static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) + { +- u64 pmcr; ++ u64 pmcr = 0; + +- /* No PMU available, PMCR_EL0 may UNDEF... */ +- if (!kvm_arm_support_pmu_v3()) +- return 0; +- +- /* Only preserve PMCR_EL0.N, and reset the rest to 0 */ +- pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N; + if (!kvm_supports_32bit_el0()) + pmcr |= ARMV8_PMU_PMCR_LC; +- ++ /* ++ * The value of PMCR.N field is included when the ++ * vCPU register is read via kvm_vcpu_read_pmcr(). ++ */ + __vcpu_sys_reg(vcpu, r->reg) = pmcr; + + return __vcpu_sys_reg(vcpu, r->reg); +@@ -1135,6 +1128,51 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + { SYS_DESC(SYS_BRBSRC_EL1(n)), undef_access }, \ + { SYS_DESC(SYS_BRBTGT_EL1(n)), undef_access } \ + ++static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, ++ u64 *val) ++{ ++ *val = kvm_vcpu_read_pmcr(vcpu); ++ return 0; ++} ++ ++static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, ++ u64 val) ++{ ++ u8 new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val); ++ struct kvm *kvm = vcpu->kvm; ++ ++ mutex_lock(&kvm->arch.config_lock); ++ ++ /* ++ * The vCPU can't have more counters than the PMU hardware ++ * implements. Ignore this error to maintain compatibility ++ * with the existing KVM behavior. ++ */ ++ if (!kvm_vm_has_ran_once(kvm) && !kvm_realm_is_created(kvm) && ++ new_n <= kvm_arm_pmu_get_max_counters(kvm)) ++ kvm->arch.pmcr_n = new_n; ++ ++ mutex_unlock(&kvm->arch.config_lock); ++ ++ /* ++ * Ignore writes to RES0 bits, read only bits that are cleared on ++ * vCPU reset, and writable bits that KVM doesn't support yet. ++ * (i.e. only PMCR.N and bits [7:0] are mutable from userspace) ++ * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU. ++ * But, we leave the bit as it is here, as the vCPU's PMUver might ++ * be changed later (NOTE: the bit will be cleared on first vCPU run ++ * if necessary). ++ */ ++ val &= ARMV8_PMU_PMCR_MASK; ++ ++ /* The LC bit is RES1 when AArch32 is not supported */ ++ if (!kvm_supports_32bit_el0()) ++ val |= ARMV8_PMU_PMCR_LC; ++ ++ __vcpu_sys_reg(vcpu, r->reg) = val; ++ return 0; ++} ++ + /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ + #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ + { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ +@@ -1526,14 +1564,18 @@ static u64 read_sanitised_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, + /* Hide BRBE from guests */ + val &= ~ID_AA64DFR0_EL1_BRBE_MASK; + +- return val; ++ return kvm_realm_reset_id_aa64dfr0_el1(vcpu, val); + } + + static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) + { ++ u8 debugver = SYS_FIELD_GET(ID_AA64DFR0_EL1, DebugVer, val); + u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, val); ++ u8 bps = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, val); ++ u8 wps = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, val); ++ u8 ctx_cmps = SYS_FIELD_GET(ID_AA64DFR0_EL1, CTX_CMPs, val); + + /* + * Prior to commit 3d0dba5764b9 ("KVM: arm64: PMU: Move the +@@ -1552,6 +1594,15 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, + if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) + val &= ~ID_AA64DFR0_EL1_PMUVer_MASK; + ++ /* ++ * ID_AA64DFR0_EL1.DebugVer, BRPs and WRPs all have to be greater than ++ * zero. CTX_CMPs is never greater than BRPs. ++ */ ++ if ((kvm_get_cvm_type() != VIRTCCA_CVM) && ++ (debugver < ID_AA64DFR0_EL1_DebugVer_IMP || !bps || !wps || ++ ctx_cmps > bps)) ++ return -EINVAL; ++ + return set_id_reg(vcpu, rd, val); + } + +@@ -1667,10 +1718,11 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + mutex_lock(&vcpu->kvm->arch.config_lock); + + /* +- * Once the VM has started the ID registers are immutable. Reject any +- * write that does not match the final register value. ++ * Once the VM has started or the Realm descriptor is created, the ID ++ * registers are immutable. Reject any write that does not match the ++ * final register value. + */ +- if (kvm_vm_has_ran_once(vcpu->kvm)) { ++ if (kvm_vm_has_ran_once(vcpu->kvm) || kvm_realm_is_created(vcpu->kvm)) { + if (val != read_id_reg(vcpu, rd)) + ret = -EBUSY; + else +@@ -2308,8 +2360,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { + { SYS_DESC(SYS_CTR_EL0), access_ctr }, + { SYS_DESC(SYS_SVCR), undef_access }, + +- { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, +- .reset = reset_pmcr, .reg = PMCR_EL0 }, ++ { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr, ++ .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr }, + { PMU_SYS_REG(PMCNTENSET_EL0), + .access = access_pmcnten, .reg = PMCNTENSET_EL0 }, + { PMU_SYS_REG(PMCNTENCLR_EL0), +@@ -3614,18 +3666,18 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg + sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + } + +-static unsigned int num_demux_regs(void) ++static unsigned int num_demux_regs(struct kvm_vcpu *vcpu) + { +- return CSSELR_MAX; ++ return kvm_is_realm(vcpu->kvm) ? 0 : CSSELR_MAX; + } + +-static int write_demux_regids(u64 __user *uindices) ++static int write_demux_regids(struct kvm_vcpu *vcpu, u64 __user *uindices) + { + u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX; + unsigned int i; + + val |= KVM_REG_ARM_DEMUX_ID_CCSIDR; +- for (i = 0; i < CSSELR_MAX; i++) { ++ for (i = 0; i < num_demux_regs(vcpu); i++) { + if (put_user(val | i, uindices)) + return -EFAULT; + uindices++; +@@ -3633,6 +3685,24 @@ static int write_demux_regids(u64 __user *uindices) + return 0; + } + ++static unsigned int num_invariant_regs(struct kvm_vcpu *vcpu) ++{ ++ return kvm_is_realm(vcpu->kvm) ? 0 : ARRAY_SIZE(invariant_sys_regs); ++} ++ ++static int write_invariant_regids(struct kvm_vcpu *vcpu, u64 __user *uindices) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < num_invariant_regs(vcpu); i++) { ++ if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices)) ++ return -EFAULT; ++ uindices++; ++ } ++ return 0; ++} ++ ++ + static u64 sys_reg_to_index(const struct sys_reg_desc *reg) + { + return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | +@@ -3656,11 +3726,27 @@ static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) + return true; + } + ++static bool kvm_realm_sys_reg_hidden_user(const struct kvm_vcpu *vcpu, u64 reg) ++{ ++ if (!kvm_is_realm(vcpu->kvm)) ++ return false; ++ ++ switch (reg) { ++ case SYS_ID_AA64DFR0_EL1: ++ case SYS_PMCR_EL0: ++ return false; ++ } ++ return true; ++} ++ + static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 __user **uind, + unsigned int *total) + { ++ if (kvm_realm_sys_reg_hidden_user(vcpu, reg_to_encoding(rd))) ++ return 0; ++ + /* + * Ignore registers we trap but don't save, + * and for which no custom user accessor is provided. +@@ -3698,29 +3784,26 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) + + unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu) + { +- return ARRAY_SIZE(invariant_sys_regs) +- + num_demux_regs() ++ return num_invariant_regs(vcpu) ++ + num_demux_regs(vcpu) + + walk_sys_regs(vcpu, (u64 __user *)NULL); + } + + int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) + { +- unsigned int i; + int err; + +- /* Then give them all the invariant registers' indices. */ +- for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) { +- if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices)) +- return -EFAULT; +- uindices++; +- } ++ err = write_invariant_regids(vcpu, uindices); ++ if (err) ++ return err; ++ uindices += num_invariant_regs(vcpu); + + err = walk_sys_regs(vcpu, uindices); + if (err < 0) + return err; + uindices += err; + +- return write_demux_regids(uindices); ++ return write_demux_regids(vcpu, uindices); + } + + int __init kvm_sys_reg_table_init(void) +diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c +index 9ec452bcfb13..14c4ce075232 100644 +--- a/arch/arm64/kvm/vgic/vgic-init.c ++++ b/arch/arm64/kvm/vgic/vgic-init.c +@@ -103,7 +103,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) + * the proper checks already. + */ + if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && +- !kvm_vgic_global_state.can_emulate_gicv2) ++ (!kvm_vgic_global_state.can_emulate_gicv2 || kvm_is_realm(kvm))) + return -ENODEV; + + /* Must be held to avoid race with vCPU creation */ +diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c +index dab599e857b5..ffed412e10a6 100644 +--- a/arch/arm64/kvm/vgic/vgic-v3.c ++++ b/arch/arm64/kvm/vgic/vgic-v3.c +@@ -7,9 +7,11 @@ + #include + #include + #include ++#include + #include + #include + #include ++#include + #include + + #include "vgic.h" +@@ -681,10 +683,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) + (unsigned long long)info->vcpu.start); + } else if (kvm_get_mode() != KVM_MODE_PROTECTED) { + kvm_vgic_global_state.vcpu_base = info->vcpu.start; +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (!static_branch_unlikely(&virtcca_cvm_is_available)) +-#endif +- kvm_vgic_global_state.can_emulate_gicv2 = true; ++ kvm_vgic_global_state.can_emulate_gicv2 = true; + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device.\n"); +@@ -764,13 +763,14 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) + void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) + { + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; ++ + #ifdef CONFIG_HISI_VIRTCCA_HOST + if (vcpu_is_tec(vcpu)) { +- cpu_if->vgic_vmcr = +- ((struct tmi_tec_run *)vcpu->arch.tec.tec_run)->tec_exit.gicv3_vmcr; ++ cpu_if->vgic_vmcr = vcpu->arch.tec.run->exit.gicv3_vmcr; + return; + } + #endif ++ + if (likely(cpu_if->vgic_sre)) + cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); + } +@@ -779,6 +779,9 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) + { + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + ++ ++ if (_vcpu_is_rec(vcpu)) ++ cpu_if->vgic_vmcr = vcpu->arch.rec->run->exit.gicv3_vmcr; + WARN_ON(vgic_v4_put(vcpu)); + + vgic_v3_vmcr_sync(vcpu); +diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c +index ec110006acf5..13af2566f56c 100644 +--- a/arch/arm64/kvm/vgic/vgic.c ++++ b/arch/arm64/kvm/vgic/vgic.c +@@ -10,7 +10,9 @@ + #include + #include + ++#include + #include ++#include + #include + + #include "vgic.h" +@@ -22,6 +24,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { + .gicv3_cpuif = STATIC_KEY_FALSE_INIT, + }; + ++static inline int kvm_vcpu_vgic_nr_lr(struct kvm_vcpu *vcpu) ++{ ++ if (unlikely(vcpu_is_rec(vcpu))) ++ return kvm_realm_vgic_nr_lr(); ++ return kvm_vgic_global_state.nr_lr; ++} ++ + /* + * Locking order is always: + * kvm->lock (mutex) +@@ -841,7 +850,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) + lockdep_assert_held(&vgic_cpu->ap_list_lock); + + count = compute_ap_list_depth(vcpu, &multi_sgi); +- if (count > kvm_vgic_global_state.nr_lr || multi_sgi) ++ if (count > kvm_vcpu_vgic_nr_lr(vcpu) || multi_sgi) + vgic_sort_ap_list(vcpu); + + count = 0; +@@ -870,7 +879,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) + + raw_spin_unlock(&irq->irq_lock); + +- if (count == kvm_vgic_global_state.nr_lr) { ++ if (count == kvm_vcpu_vgic_nr_lr(vcpu)) { + if (!list_is_last(&irq->ap_list, + &vgic_cpu->ap_list_head)) + vgic_set_underflow(vcpu); +@@ -879,7 +888,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) + } + + /* Nuke remaining LRs */ +- for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) ++ for (i = count ; i < kvm_vcpu_vgic_nr_lr(vcpu); i++) + vgic_clear_lr(vcpu, i); + + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) +@@ -903,11 +912,11 @@ static inline void vgic_tmm_save_state(struct kvm_vcpu *vcpu) + { + int i; + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; +- struct tmi_tec_run *tec_run = vcpu->arch.tec.tec_run; ++ struct tmi_tec_run *tec_run = vcpu->arch.tec.run; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; ++i) { +- cpu_if->vgic_lr[i] = tec_run->tec_exit.gicv3_lrs[i]; +- tec_run->tec_entry.gicv3_lrs[i] = 0; ++ cpu_if->vgic_lr[i] = tec_run->exit.gicv3_lrs[i]; ++ tec_run->enter.gicv3_lrs[i] = 0; + } + } + +@@ -915,27 +924,41 @@ static inline void vgic_tmm_restore_state(struct kvm_vcpu *vcpu) + { + int i; + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; +- struct tmi_tec_run *tec_run = vcpu->arch.tec.tec_run; ++ struct tmi_tec_run *tec_run = vcpu->arch.tec.run; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; ++i) { +- tec_run->tec_entry.gicv3_lrs[i] = cpu_if->vgic_lr[i]; +- tec_run->tec_exit.gicv3_lrs[i] = cpu_if->vgic_lr[i]; ++ tec_run->enter.gicv3_lrs[i] = cpu_if->vgic_lr[i]; ++ tec_run->exit.gicv3_lrs[i] = cpu_if->vgic_lr[i]; + } + } + #endif + ++static inline void vgic_rmm_save_state(struct kvm_vcpu *vcpu) ++{ ++ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; ++ int i; ++ ++ if (!_vcpu_is_rec(vcpu)) ++ return; ++ ++ for (i = 0; i < kvm_vcpu_vgic_nr_lr(vcpu); i++) { ++ cpu_if->vgic_lr[i] = vcpu->arch.rec->run->exit.gicv3_lrs[i]; ++ vcpu->arch.rec->run->enter.gicv3_lrs[i] = 0; ++ } ++} ++ + static inline void vgic_save_state(struct kvm_vcpu *vcpu) + { + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_save_state(vcpu); +- else + #ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) +- vgic_tmm_save_state(vcpu); +- else ++ else if (vcpu_is_tec(vcpu)) ++ vgic_tmm_save_state(vcpu); + #endif +- __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); +- ++ else if (vcpu_is_rec(vcpu)) ++ vgic_rmm_save_state(vcpu); ++ else ++ __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + } + + /* Sync back the hardware VGIC state into our emulation after a guest's run. */ +@@ -960,17 +983,37 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) + vgic_prune_ap_list(vcpu); + } + ++static inline void vgic_rmm_restore_state(struct kvm_vcpu *vcpu) ++{ ++ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; ++ int i; ++ ++ if (!_vcpu_is_rec(vcpu)) ++ return; ++ ++ for (i = 0; i < kvm_vcpu_vgic_nr_lr(vcpu); i++) { ++ vcpu->arch.rec->run->enter.gicv3_lrs[i] = cpu_if->vgic_lr[i]; ++ /* ++ * Also populate the rec.run->exit copies so that a late ++ * decision to back out from entering the realm doesn't cause ++ * the state to be lost ++ */ ++ vcpu->arch.rec->run->exit.gicv3_lrs[i] = cpu_if->vgic_lr[i]; ++ } ++} ++ + static inline void vgic_restore_state(struct kvm_vcpu *vcpu) + { + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_restore_state(vcpu); +- else + #ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) +- vgic_tmm_restore_state(vcpu); +- else ++ else if (vcpu_is_tec(vcpu)) ++ vgic_tmm_restore_state(vcpu); + #endif +- __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); ++ else if (vcpu_is_rec(vcpu)) ++ vgic_rmm_restore_state(vcpu); ++ else ++ __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); + } + + /* Flush our emulation state into the GIC hardware before entering the guest. */ +@@ -1009,13 +1052,15 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) + + void kvm_vgic_load(struct kvm_vcpu *vcpu) + { +- if (unlikely(!vgic_initialized(vcpu->kvm))) +- return; +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) ++ if (unlikely(!irqchip_in_kernel(vcpu->kvm) || ++ !vgic_initialized(vcpu->kvm) || ++ vcpu_is_rec(vcpu))) { ++ if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) ++ __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + return; +-#endif +- if (kvm_vgic_global_state.type == VGIC_V2) ++ } ++ ++ if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_load(vcpu); + else + vgic_v3_load(vcpu); +@@ -1023,13 +1068,15 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu) + + void kvm_vgic_put(struct kvm_vcpu *vcpu) + { +- if (unlikely(!vgic_initialized(vcpu->kvm))) +- return; +-#ifdef CONFIG_HISI_VIRTCCA_HOST +- if (vcpu_is_tec(vcpu)) ++ if (unlikely(!irqchip_in_kernel(vcpu->kvm) || ++ !vgic_initialized(vcpu->kvm) || ++ vcpu_is_rec(vcpu))) { ++ if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) ++ __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + return; +-#endif +- if (kvm_vgic_global_state.type == VGIC_V2) ++ } ++ ++ if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_put(vcpu); + else + vgic_v3_put(vcpu); +diff --git a/arch/arm64/kvm/virtcca_cvm.c b/arch/arm64/kvm/virtcca_cvm.c +index 6c470c2fa27a..3df47586e7bd 100644 +--- a/arch/arm64/kvm/virtcca_cvm.c ++++ b/arch/arm64/kvm/virtcca_cvm.c +@@ -22,7 +22,7 @@ + /* Protects access to cvm_vmid_bitmap */ + static DEFINE_SPINLOCK(cvm_vmid_lock); + static unsigned long *cvm_vmid_bitmap; +-DEFINE_STATIC_KEY_FALSE(virtcca_cvm_is_available); ++DECLARE_STATIC_KEY_FALSE(virtcca_cvm_is_enable); + #define SIMD_PAGE_SIZE 0x3000 + #define UEFI_MAX_SIZE 0x8000000 + #define UEFI_DTB_START 0x40000000 +@@ -30,13 +30,13 @@ DEFINE_STATIC_KEY_FALSE(virtcca_cvm_is_available); + + bool is_virtcca_available(void) + { +- return static_key_enabled(&virtcca_cvm_is_available); ++ return static_key_enabled(&virtcca_cvm_is_enable); + } + EXPORT_SYMBOL_GPL(is_virtcca_available); + + int kvm_enable_virtcca_cvm(struct kvm *kvm) + { +- if (!static_key_enabled(&virtcca_cvm_is_available)) ++ if (!static_key_enabled(&virtcca_cvm_is_enable)) + return -EFAULT; + + kvm->arch.is_virtcca_cvm = true; +@@ -144,7 +144,7 @@ int kvm_arm_create_cvm(struct kvm *kvm) + /* get affine host numa set by default vcpu 0 */ + u64 numa_set = kvm_get_host_numa_set_by_vcpu(0, kvm); + +- if (!kvm_is_virtcca_cvm(kvm) || virtcca_cvm_state(kvm) != CVM_STATE_NONE) ++ if (!kvm_is_realm(kvm) || virtcca_cvm_state(kvm) != CVM_STATE_NONE) + return 0; + + if (!cvm->params) { +@@ -404,8 +404,8 @@ int kvm_finalize_vcpu_tec(struct kvm_vcpu *vcpu) + struct virtcca_cvm_tec *tec = &vcpu->arch.tec; + + mutex_lock(&vcpu->kvm->lock); +- tec->tec_run = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); +- if (!tec->tec_run) { ++ tec->run = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); ++ if (!tec->run) { + ret = -ENOMEM; + goto tec_free; + } +@@ -434,7 +434,7 @@ int kvm_finalize_vcpu_tec(struct kvm_vcpu *vcpu) + return ret; + + tec_free: +- kfree(tec->tec_run); ++ kfree(tec->run); + kfree(params_ptr); + mutex_unlock(&vcpu->kvm->lock); + return ret; +@@ -693,7 +693,6 @@ int kvm_cvm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) + { + int r = 0; + +- mutex_lock(&kvm->lock); + switch (cap->args[0]) { + case KVM_CAP_ARM_TMM_CONFIG_CVM_HOST: + r = kvm_tmm_config_cvm(kvm, cap); +@@ -719,7 +718,6 @@ int kvm_cvm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) + r = -EINVAL; + break; + } +- mutex_unlock(&kvm->lock); + + return r; + } +@@ -728,14 +726,14 @@ void kvm_destroy_tec(struct kvm_vcpu *vcpu) + { + struct virtcca_cvm_tec *tec = &vcpu->arch.tec; + +- if (!vcpu_is_tec(vcpu)) ++ if (!vcpu_is_rec(vcpu)) + return; + + if (tmi_tec_destroy(tec->tec) != 0) + kvm_err("%s vcpu id : %d failed!\n", __func__, vcpu->vcpu_id); + + tec->tec = 0; +- kfree(tec->tec_run); ++ kfree(tec->run); + } + + static int tmi_check_version(void) +@@ -767,25 +765,25 @@ int kvm_tec_enter(struct kvm_vcpu *vcpu) + struct virtcca_cvm_tec *tec = &vcpu->arch.tec; + struct virtcca_cvm *cvm = vcpu->kvm->arch.virtcca_cvm; + ++ run = (struct tmi_tec_run *)tec->run; + if (READ_ONCE(cvm->state) != CVM_STATE_ACTIVE) + return -EINVAL; + +- run = tec->tec_run; + /* set/clear TWI TWE flags */ + if (vcpu->arch.hcr_el2 & HCR_TWI) +- run->tec_entry.flags |= TEC_ENTRY_FLAG_TRAP_WFI; ++ run->enter.flags |= TEC_ENTRY_FLAG_TRAP_WFI; + else +- run->tec_entry.flags &= ~TEC_ENTRY_FLAG_TRAP_WFI; ++ run->enter.flags &= ~TEC_ENTRY_FLAG_TRAP_WFI; + + if (vcpu->arch.hcr_el2 & HCR_TWE) +- run->tec_entry.flags |= TEC_ENTRY_FLAG_TRAP_WFE; ++ run->enter.flags |= TEC_ENTRY_FLAG_TRAP_WFE; + else +- run->tec_entry.flags &= ~TEC_ENTRY_FLAG_TRAP_WFE; ++ run->enter.flags &= ~TEC_ENTRY_FLAG_TRAP_WFE; + + return tmi_tec_enter(tec->tec, __pa(run)); + } + +-int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target) ++int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target, unsigned long status) + { + int ret; + struct virtcca_cvm_tec *calling_tec = &calling->arch.tec; +@@ -797,29 +795,30 @@ int cvm_psci_complete(struct kvm_vcpu *calling, struct kvm_vcpu *target) + return 0; + } + +-int kvm_init_tmm(void) ++void kvm_init_tmm(void) + { + int ret; + + if (PAGE_SIZE != SZ_4K) +- return 0; ++ return; + + if (tmi_check_version()) +- return 0; ++ return; + + if (tmi_kae_init()) + pr_warn("kvm [%i]: Warning: kae init failed!\n", task_pid_nr(current)); + + ret = cvm_vmid_init(); + if (ret) +- return ret; ++ return; + + tmm_feat_reg0 = tmi_features(0); + kvm_info("TMM feature0: 0x%lx\n", tmm_feat_reg0); + +- static_branch_enable(&virtcca_cvm_is_available); ++ static_branch_enable(&kvm_rme_is_available); ++ static_branch_enable(&virtcca_cvm_is_enable); + +- return 0; ++ return; + } + + u64 virtcca_get_tmi_version(void) +@@ -863,7 +862,7 @@ int kvm_load_user_data(struct kvm *kvm, unsigned long arg) + struct virtcca_cvm *cvm = kvm->arch.virtcca_cvm; + struct kvm_numa_info *numa_info; + +- if (!kvm_is_virtcca_cvm(kvm)) ++ if (!kvm_is_realm(kvm)) + return -EFAULT; + + if (copy_from_user(&user_data, argp, sizeof(user_data))) +@@ -938,27 +937,22 @@ unsigned long cvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu, + if (!target_vcpu) + return PSCI_RET_INVALID_PARAMS; + +- cvm_psci_complete(vcpu, target_vcpu); ++ cvm_psci_complete(vcpu, target_vcpu, PSCI_RET_SUCCESS); + return PSCI_RET_SUCCESS; + } + + int kvm_cvm_vcpu_set_events(struct kvm_vcpu *vcpu, + bool serror_pending, bool ext_dabt_pending) + { +- struct virtcca_cvm_tec *tec = &vcpu->arch.tec; +- ++ struct tmi_tec_run *run = vcpu->arch.tec.run; + if (serror_pending) + return -EINVAL; + + if (ext_dabt_pending) { +- if (!(((struct tmi_tec_run *)tec->tec_run)->tec_entry.flags & +- TEC_ENTRY_FLAG_EMUL_MMIO)) ++ if (!(run->enter.flags & REC_ENTER_FLAG_EMULATED_MMIO)) + return -EINVAL; +- +- ((struct tmi_tec_run *)tec->tec_run)->tec_entry.flags +- &= ~TEC_ENTRY_FLAG_EMUL_MMIO; +- ((struct tmi_tec_run *)tec->tec_run)->tec_entry.flags +- |= TEC_ENTRY_FLAG_INJECT_SEA; ++ run->enter.flags &= ~REC_ENTER_FLAG_EMULATED_MMIO; ++ run->enter.flags |= REC_ENTER_FLAG_INJECT_SEA; + } + return 0; + } +@@ -988,9 +982,36 @@ int kvm_init_cvm_vm(struct kvm *kvm) + cvm->params = params; + WRITE_ONCE(cvm->state, CVM_STATE_NONE); + ++ kvm_enable_virtcca_cvm(kvm); + return 0; + } + ++extern struct vgic_global kvm_vgic_global_state; ++ ++u32 kvm_cvm_vgic_nr_lr(void) ++{ ++ return kvm_vgic_global_state.nr_lr; ++} ++ ++static struct cca_operations virtcca_operations = { ++ .enable_cap = kvm_cvm_enable_cap, ++ .init_realm_vm = kvm_init_cvm_vm, ++ .realm_vm_enter = kvm_tec_enter, ++ .realm_vm_exit = handle_cvm_exit, ++ .init_sel2_hypervisor = kvm_init_tmm, ++ .psci_complete = cvm_psci_complete, ++ .destroy_vm = kvm_destroy_cvm, ++ .create_vcpu = kvm_finalize_vcpu_tec, ++ .destroy_vcpu = kvm_destroy_tec, ++ .vgic_nr_lr = kvm_cvm_vgic_nr_lr, ++}; ++ ++static int __init virtcca_register(void) ++{ ++ return cca_operations_register(VIRTCCA_CVM, &virtcca_operations); ++} ++core_initcall(virtcca_register); ++ + #ifdef CONFIG_HISI_VIRTCCA_CODA + /* + * Coda (Confidential Device Assignment) feature +@@ -1250,7 +1271,7 @@ int kvm_cvm_map_ipa_mmio(struct kvm *kvm, phys_addr_t ipa_base, + int kvm_cvm_map_ipa(struct kvm *kvm, phys_addr_t ipa, kvm_pfn_t pfn, + unsigned long map_size, enum kvm_pgtable_prot prot, int ret) + { +- if (!is_virtcca_cvm_enable() || !kvm_is_virtcca_cvm(kvm)) ++ if (!is_virtcca_cvm_enable() || !kvm_is_realm(kvm)) + return ret; + + struct page *dst_page = pfn_to_page(pfn); +@@ -1301,7 +1322,7 @@ int cvm_arm_smmu_domain_set_kvm(struct device *dev, void *data) + return 1; + + kvm = virtcca_arm_smmu_get_kvm(arm_smmu_domain); +- if (kvm && kvm_is_virtcca_cvm(kvm)) ++ if (kvm && kvm_is_realm(kvm)) + arm_smmu_domain->kvm = kvm; + + return 1; +diff --git a/arch/arm64/kvm/virtcca_cvm_exit.c b/arch/arm64/kvm/virtcca_cvm_exit.c +index 9654375a9c8c..be14e08e136e 100644 +--- a/arch/arm64/kvm/virtcca_cvm_exit.c ++++ b/arch/arm64/kvm/virtcca_cvm_exit.c +@@ -1,6 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0-only + /* +- * Copyright (c) 2024, The Linux Foundation. All rights reserved. ++ * Copyright (c) 2025. Huawei Technologies Co., Ltd. All rights reserved. + */ + #include + #include +@@ -14,12 +14,12 @@ typedef int (*exit_handler_fn)(struct kvm_vcpu *vcpu); + + static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu, bool unmask_ctl) + { +- struct tmi_tec_run *run = vcpu->arch.tec.tec_run; ++ struct tmi_tec_run *run = vcpu->arch.tec.run; + +- __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = run->tec_exit.cntv_ctl; +- __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = run->tec_exit.cntv_cval; +- __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = run->tec_exit.cntp_ctl; +- __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = run->tec_exit.cntp_cval; ++ __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = run->exit.cntv_ctl; ++ __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = run->exit.cntv_cval; ++ __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = run->exit.cntp_ctl; ++ __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = run->exit.cntp_cval; + + /* Because the timer mask is tainted by TMM, we don't know the + * true intent of the guest. Here, we assume mask is always +@@ -35,10 +35,10 @@ static void update_arch_timer_irq_lines(struct kvm_vcpu *vcpu, bool unmask_ctl) + + static int tec_exit_reason_notimpl(struct kvm_vcpu *vcpu) + { +- struct tmi_tec_run *run = vcpu->arch.tec.tec_run; ++ struct tmi_tec_run *run = vcpu->arch.tec.run; + + pr_err("[vcpu %d] Unhandled exit reason from cvm (ESR: %#llx)\n", +- vcpu->vcpu_id, run->tec_exit.esr); ++ vcpu->vcpu_id, run->exit.esr); + return -ENXIO; + } + +@@ -51,11 +51,10 @@ static int tec_exit_wfx(struct kvm_vcpu *vcpu) + { + u64 esr = kvm_vcpu_get_esr(vcpu); + +- if (esr & ESR_ELx_WFx_ISS_WFE) { ++ if (esr & ESR_ELx_WFx_ISS_WFE) + vcpu->stat.wfe_exit_stat++; +- } else { ++ else + vcpu->stat.wfi_exit_stat++; +- } + + if (esr & ESR_ELx_WFx_ISS_WFxT) { + if (esr & ESR_ELx_WFx_ISS_RV) { +@@ -88,39 +87,39 @@ static int tec_exit_wfx(struct kvm_vcpu *vcpu) + static int tec_exit_sys_reg(struct kvm_vcpu *vcpu) + { + int ret; +- struct tmi_tec_run *run = vcpu->arch.tec.tec_run; ++ struct tmi_tec_run *run = vcpu->arch.tec.run; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + int rt = kvm_vcpu_sys_get_rt(vcpu); + bool is_write = !(esr & 1); + + if (is_write) +- vcpu_set_reg(vcpu, rt, run->tec_exit.gprs[0]); ++ vcpu_set_reg(vcpu, rt, run->exit.gprs[0]); + + ret = kvm_handle_sys_reg(vcpu); + + if (ret >= 0 && !is_write) +- run->tec_entry.gprs[0] = vcpu_get_reg(vcpu, rt); ++ run->enter.gprs[0] = vcpu_get_reg(vcpu, rt); + + return ret; + } + + static int tec_exit_sync_dabt(struct kvm_vcpu *vcpu) + { +- struct tmi_tec_run *run = vcpu->arch.tec.tec_run; ++ struct tmi_tec_run *run = vcpu->arch.tec.run; + + if (kvm_vcpu_dabt_iswrite(vcpu) && kvm_vcpu_dabt_isvalid(vcpu)) { + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), +- run->tec_exit.gprs[0]); ++ run->exit.gprs[0]); + } + return kvm_handle_guest_abort(vcpu); + } + + static int tec_exit_sync_iabt(struct kvm_vcpu *vcpu) + { +- struct tmi_tec_run *run = vcpu->arch.tec.tec_run; ++ struct tmi_tec_run *run = vcpu->arch.tec.run; + + pr_err("[vcpu %d] Unhandled instruction abort (ESR: %#llx).\n", +- vcpu->vcpu_id, run->tec_exit.esr); ++ vcpu->vcpu_id, run->exit.esr); + + return -ENXIO; + } +@@ -136,10 +135,10 @@ static exit_handler_fn tec_exit_handlers[] = { + static int tec_exit_psci(struct kvm_vcpu *vcpu) + { + int i; +- struct tmi_tec_run *run = vcpu->arch.tec.tec_run; ++ struct tmi_tec_run *run = vcpu->arch.tec.run; + + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) +- vcpu_set_reg(vcpu, i, run->tec_exit.gprs[i]); ++ vcpu_set_reg(vcpu, i, run->exit.gprs[i]); + + return kvm_psci_call(vcpu); + } +@@ -147,12 +146,12 @@ static int tec_exit_psci(struct kvm_vcpu *vcpu) + static int tec_exit_host_call(struct kvm_vcpu *vcpu) + { + int ret, i; +- struct tmi_tec_run *run = vcpu->arch.tec.tec_run; ++ struct tmi_tec_run *run = vcpu->arch.tec.run; + + vcpu->stat.hvc_exit_stat++; + + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) +- vcpu_set_reg(vcpu, i, run->tec_exit.gprs[i]); ++ vcpu_set_reg(vcpu, i, run->exit.gprs[i]); + + ret = kvm_smccc_call_handler(vcpu); + +@@ -161,7 +160,7 @@ static int tec_exit_host_call(struct kvm_vcpu *vcpu) + ret = 1; + } + for (i = 0; i < TEC_EXIT_NR_GPRS; ++i) +- run->tec_entry.gprs[i] = vcpu_get_reg(vcpu, i); ++ run->enter.gprs[i] = vcpu_get_reg(vcpu, i); + + return ret; + } +@@ -174,8 +173,8 @@ static int tec_exit_host_call(struct kvm_vcpu *vcpu) + int handle_cvm_exit(struct kvm_vcpu *vcpu, int tec_run_ret) + { + unsigned long status; +- struct tmi_tec_run *run = vcpu->arch.tec.tec_run; +- u8 esr_ec = ESR_ELx_EC(run->tec_exit.esr); ++ struct tmi_tec_run *run = vcpu->arch.tec.run; ++ u8 esr_ec = ESR_ELx_EC(run->exit.esr); + bool is_wfx; + + status = TMI_RETURN_STATUS(tec_run_ret); +@@ -194,16 +193,16 @@ int handle_cvm_exit(struct kvm_vcpu *vcpu, int tec_run_ret) + if (tec_run_ret) + return -ENXIO; + +- vcpu->arch.fault.esr_el2 = run->tec_exit.esr; +- vcpu->arch.fault.far_el2 = run->tec_exit.far; +- vcpu->arch.fault.hpfar_el2 = run->tec_exit.hpfar; ++ vcpu->arch.fault.esr_el2 = run->exit.esr; ++ vcpu->arch.fault.far_el2 = run->exit.far; ++ vcpu->arch.fault.hpfar_el2 = run->exit.hpfar; + +- is_wfx = (run->tec_exit.exit_reason == TMI_EXIT_SYNC) && (esr_ec == ESR_ELx_EC_WFx); ++ is_wfx = (run->exit.exit_reason == TMI_EXIT_SYNC) && (esr_ec == ESR_ELx_EC_WFx); + update_arch_timer_irq_lines(vcpu, is_wfx); + +- run->tec_entry.flags = 0; ++ run->enter.flags = 0; + +- switch (run->tec_exit.exit_reason) { ++ switch (run->exit.exit_reason) { + case TMI_EXIT_FIQ: + case TMI_EXIT_IRQ: + return 1; +@@ -215,7 +214,7 @@ int handle_cvm_exit(struct kvm_vcpu *vcpu, int tec_run_ret) + return tec_exit_host_call(vcpu); + } + +- kvm_pr_unimpl("Unsupported exit reason : 0x%llx\n", +- run->tec_exit.exit_reason); ++ kvm_pr_unimpl("Unsupported exit reason : %llu\n", ++ run->exit.exit_reason); + return 0; + } +diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c +index 4ea07caba71c..647bbd9bad5b 100644 +--- a/arch/arm64/mm/fault.c ++++ b/arch/arm64/mm/fault.c +@@ -898,6 +898,25 @@ static int do_tag_check_fault(unsigned long far, unsigned long esr, + return 0; + } + ++static int do_gpf_ptw(unsigned long far, unsigned long esr, struct pt_regs *regs) ++{ ++ const struct fault_info *inf = esr_to_fault_info(esr); ++ ++ die_kernel_fault(inf->name, far, esr, regs); ++ return 0; ++} ++ ++static int do_gpf(unsigned long far, unsigned long esr, struct pt_regs *regs) ++{ ++ const struct fault_info *inf = esr_to_fault_info(esr); ++ ++ if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) ++ return 0; ++ ++ arm64_notify_die(inf->name, regs, inf->sig, inf->code, far, esr); ++ return 0; ++} ++ + static const struct fault_info fault_info[] = { + { do_bad, SIGKILL, SI_KERNEL, "ttbr address size fault" }, + { do_bad, SIGKILL, SI_KERNEL, "level 1 address size fault" }, +@@ -934,12 +953,12 @@ static const struct fault_info fault_info[] = { + { do_bad, SIGKILL, SI_KERNEL, "unknown 32" }, + { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 34" }, +- { do_bad, SIGKILL, SI_KERNEL, "unknown 35" }, +- { do_bad, SIGKILL, SI_KERNEL, "unknown 36" }, +- { do_bad, SIGKILL, SI_KERNEL, "unknown 37" }, +- { do_bad, SIGKILL, SI_KERNEL, "unknown 38" }, +- { do_bad, SIGKILL, SI_KERNEL, "unknown 39" }, +- { do_bad, SIGKILL, SI_KERNEL, "unknown 40" }, ++ { do_gpf_ptw, SIGKILL, SI_KERNEL, "Granule Protection Fault at level -1" }, ++ { do_gpf_ptw, SIGKILL, SI_KERNEL, "Granule Protection Fault at level 0" }, ++ { do_gpf_ptw, SIGKILL, SI_KERNEL, "Granule Protection Fault at level 1" }, ++ { do_gpf_ptw, SIGKILL, SI_KERNEL, "Granule Protection Fault at level 2" }, ++ { do_gpf_ptw, SIGKILL, SI_KERNEL, "Granule Protection Fault at level 3" }, ++ { do_gpf, SIGBUS, SI_KERNEL, "Granule Protection Fault not on table walk" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 41" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 42" }, + { do_bad, SIGKILL, SI_KERNEL, "unknown 43" }, +diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c +index 57cd2d1a9a18..247b038ff4d9 100644 +--- a/drivers/perf/arm_pmu.c ++++ b/drivers/perf/arm_pmu.c +@@ -770,7 +770,6 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node) + return 0; + } + +-#ifdef CONFIG_HISI_VIRTCCA_HOST + void arm_pmu_set_phys_irq(bool enable) + { + int cpu = get_cpu(); +@@ -785,7 +784,6 @@ void arm_pmu_set_phys_irq(bool enable) + + put_cpu(); + } +-#endif + + #ifdef CONFIG_CPU_PM + static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd) +diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h +index 9d3f034bd885..b527172e8916 100644 +--- a/include/kvm/arm_arch_timer.h ++++ b/include/kvm/arm_arch_timer.h +@@ -129,6 +129,8 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); + int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); + int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); + ++void kvm_realm_timers_update(struct kvm_vcpu *vcpu); ++ + u64 kvm_phys_timer_read(void); + + void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu); +diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h +index 467d60c53153..295c726d11cd 100644 +--- a/include/kvm/arm_pmu.h ++++ b/include/kvm/arm_pmu.h +@@ -76,6 +76,8 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); + void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); + void kvm_vcpu_pmu_resync_el0(void); + ++#define kvm_pmu_get_irq_level(vcpu) ((vcpu)->arch.pmu.irq_level) ++ + #define kvm_vcpu_has_pmu(vcpu) \ + (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) + +@@ -101,6 +103,7 @@ void kvm_vcpu_pmu_resync_el0(void); + }) + + u8 kvm_arm_pmu_get_pmuver_limit(void); ++u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm); + u64 kvm_pmu_evtyper_mask(struct kvm *kvm); + + u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu); +@@ -165,6 +168,8 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) + return 0; + } + ++#define kvm_pmu_get_irq_level(vcpu) (false) ++ + #define kvm_vcpu_has_pmu(vcpu) ({ false; }) + #define kvm_pmu_is_3p5(vcpu) ({ false; }) + static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {} +@@ -180,6 +185,11 @@ static inline u64 kvm_pmu_evtyper_mask(struct kvm *kvm) + } + static inline void kvm_vcpu_pmu_resync_el0(void) {} + ++static inline u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm) ++{ ++ return 0; ++} ++ + static inline u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu) + { + return 0; +diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h +index 6e55b9283789..bbeb68f031be 100644 +--- a/include/kvm/arm_psci.h ++++ b/include/kvm/arm_psci.h +@@ -10,6 +10,8 @@ + #include + #include + ++#include ++ + #define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) + #define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2) + #define KVM_ARM_PSCI_1_0 PSCI_VERSION(1, 0) +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index fa0542624f42..a5cebf76aaa5 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -259,11 +259,17 @@ union kvm_mmu_notifier_arg { + pte_t pte; + }; + ++enum kvm_gfn_range_filter { ++ KVM_FILTER_SHARED = BIT(0), ++ KVM_FILTER_PRIVATE = BIT(1), ++}; ++ + struct kvm_gfn_range { + struct kvm_memory_slot *slot; + gfn_t start; + gfn_t end; + union kvm_mmu_notifier_arg arg; ++ enum kvm_gfn_range_filter attr_filter; + bool may_block; + }; + bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); +@@ -545,26 +551,18 @@ static __always_inline void guest_state_exit_irqoff(void) + } + + #ifdef CONFIG_HISI_VIRTCCA_HOST +- +-#define KVM_TYPE_CVM_BIT 8 + #define CVM_MAX_HALT_POLL_NS 100000 + +-DECLARE_STATIC_KEY_FALSE(virtcca_cvm_is_available); +- + static __always_inline bool vcpu_is_tec(struct kvm_vcpu *vcpu) + { +- if (static_branch_unlikely(&virtcca_cvm_is_available)) +- return vcpu->arch.tec.tec_run; +- +- return false; ++ return (vcpu->arch.tec.run != NULL); + } +- +-static inline bool kvm_arm_cvm_type(unsigned long type) ++#else ++static __always_inline bool vcpu_is_tec(struct kvm_vcpu *vcpu) + { +- return type & (1UL << KVM_TYPE_CVM_BIT); ++ return false; + } +- +-#endif ++#endif /* CONFIG_HISI_VIRTCCA_HOST */ + + static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) + { +diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h +index ac84689cc11c..b4964e1f66c4 100644 +--- a/include/linux/perf/arm_pmu.h ++++ b/include/linux/perf/arm_pmu.h +@@ -209,6 +209,7 @@ void kvm_host_pmu_init(struct arm_pmu *pmu); + #endif + + bool arm_pmu_irq_is_nmi(void); ++void arm_pmu_set_phys_irq(bool enable); + + /* Internal functions only for core arm_pmu code */ + struct arm_pmu *armpmu_alloc(void); +@@ -219,13 +220,11 @@ void armpmu_free_irq(int irq, int cpu); + + #define ARMV8_PMU_PDEV_NAME "armv8-pmu" + +-#endif /* CONFIG_ARM_PMU */ ++#else /* CONFIG_ARM_PMU */ + +-#if defined(CONFIG_ARM_PMU) && defined(CONFIG_HISI_VIRTCCA_HOST) +-void arm_pmu_set_phys_irq(bool enable); +-#else +-#define arm_pmu_set_phys_irq(x) do {} while (0) +-#endif ++static inline void arm_pmu_set_phys_irq(bool enable) {} ++ ++#endif /* CONFIG_ARM_PMU */ + + #define ARMV8_SPE_PDEV_NAME "arm,spe-v1" + #define ARMV8_TRBE_PDEV_NAME "arm,trbe" +diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h +index d55699e1386d..51da8fe01061 100644 +--- a/include/uapi/linux/kvm.h ++++ b/include/uapi/linux/kvm.h +@@ -925,14 +925,25 @@ struct kvm_ppc_resize_hpt { + #define KVM_S390_SIE_PAGE_OFFSET 1 + + /* +- * On arm64, machine type can be used to request the physical +- * address size for the VM. Bits[7-0] are reserved for the guest +- * PA size shift (i.e, log2(PA_Size)). For backward compatibility, +- * value 0 implies the default IPA size, 40bits. ++ * On arm64, machine type can be used to request both the machine type and ++ * the physical address size for the VM. ++ * ++ * Bits[11-8] are reserved for the ARM specific machine type. ++ * ++ * Bits[7-0] are reserved for the guest PA size shift (i.e, log2(PA_Size)). ++ * For backward compatibility, value 0 implies the default IPA size, 40bits. + */ ++#define KVM_VM_TYPE_ARM_SHIFT 8 ++#define KVM_VM_TYPE_ARM_MASK (0xfULL << KVM_VM_TYPE_ARM_SHIFT) ++#define KVM_VM_TYPE_ARM(_type) \ ++ (((_type) << KVM_VM_TYPE_ARM_SHIFT) & KVM_VM_TYPE_ARM_MASK) ++#define KVM_VM_TYPE_ARM_NORMAL KVM_VM_TYPE_ARM(0) ++#define KVM_VM_TYPE_ARM_REALM KVM_VM_TYPE_ARM(1) ++ + #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL + #define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ + ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) ++ + /* + * ioctls for /dev/kvm fds: + */ +@@ -1206,6 +1217,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_COUNTER_OFFSET 227 + #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 + #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 ++#define KVM_CAP_ARM_RME 300 + + #define KVM_CAP_SEV_ES_GHCB 500 + #define KVM_CAP_HYGON_COCO_EXT 501 +@@ -2457,4 +2469,13 @@ struct kvm_csv3_handle_memory { + /* get tmi version */ + #define KVM_GET_TMI_VERSION _IOR(KVMIO, 0xd2, u64) + ++/* Available with KVM_CAP_ARM_RME, only for VMs with KVM_VM_TYPE_ARM_REALM */ ++struct kvm_arm_rmm_psci_complete { ++ __u64 target_mpidr; ++ __u32 psci_status; ++ __u32 padding[3]; ++}; ++ ++#define KVM_ARM_VCPU_RMM_PSCI_COMPLETE _IOW(KVMIO, 0xd6, struct kvm_arm_rmm_psci_complete) ++ + #endif /* __LINUX_KVM_H */ +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 79f73ebb728d..9e351bce483e 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -633,6 +633,11 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm, + */ + gfn_range.arg = range->arg; + gfn_range.may_block = range->may_block; ++ /* ++ * HVA-based notifications aren't relevant to private ++ * mappings as they don't have a userspace mapping. ++ */ ++ gfn_range.attr_filter = KVM_FILTER_SHARED; + + /* + * {gfn(page) | page intersects with [hva_start, hva_end)} = +-- +2.43.0 + diff --git a/kernel.spec b/kernel.spec index a55ae1b00b679332dcfa14d18889dbc8c8f78cb8..2f7a399ae6fc5a1ff19ab231a64acd58f42c0720 100644 --- a/kernel.spec +++ b/kernel.spec @@ -42,7 +42,7 @@ rm -f test_openEuler_sign.ko test_openEuler_sign.ko.sig %global upstream_sublevel 0 %global devel_release 102 %global maintenance_release .0.0 -%global pkg_release .3 +%global pkg_release .4 %global openeuler_lts 0 %global openeuler_major 2509 @@ -122,6 +122,8 @@ Source9002: series.conf Source9998: patches.tar.bz2 %endif +Patch0001: 0001-Support-RME-feature-for-CCA-host.patch + #BuildRequires: BuildRequires: module-init-tools, patch >= 2.5.4, bash >= 2.03, tar BuildRequires: bzip2, xz, findutils, gzip, m4, perl, make >= 3.78, diffutils, gawk @@ -360,6 +362,9 @@ Applypatches() Applypatches series.conf %{_builddir}/kernel-%{version}/linux-%{KernelVer} %endif +# Arm CCA patch +%patch0001 -p1 + # riscv-kernel patch %ifarch riscv64 %endif @@ -1117,6 +1122,9 @@ fi %endif %changelog +* Fri Aug 15 2025 Hou Mingyong - 6.6.0-102.0.0.4 +- Support RME feature for CCA host + * Tue Aug 12 2025 Liu Wang <1823363429@qq.com> - 6.6.0-102.0.0.3 - Split kernel modules into kernel-extra-modules subpackage - Prioritizes core kmod (networking/drm/block/modesetting) in main kernel package