diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 7d175d3262fe0b78097b526d11660053013af932..f472fc4f697fa803dd5b196b42601e932ec1baf5 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2363,13 +2363,13 @@ static int kvm_recommended_vcpus(KVMState *s) static int kvm_max_vcpus(KVMState *s) { - int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS); + int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS); return (ret) ? ret : kvm_recommended_vcpus(s); } static int kvm_max_vcpu_id(KVMState *s) { - int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPU_ID); + int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPU_ID); return (ret) ? ret : kvm_max_vcpus(s); } @@ -2491,10 +2491,6 @@ static int kvm_init(MachineState *ms) goto err; } - if (kvm_is_virtcca_cvm_type(type)) { - virtcca_cvm_allowed = true; - } - do { ret = kvm_ioctl(s, KVM_CREATE_VM, type); } while (ret == -EINTR); @@ -2625,7 +2621,7 @@ static int kvm_init(MachineState *ms) #ifdef KVM_CAP_SET_GUEST_DEBUG kvm_has_guest_debug = - (kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0); + (kvm_vm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0); #endif kvm_sstep_flags = 0; @@ -2782,7 +2778,7 @@ bool kvm_cpu_check_are_resettable(void) static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->vcpu_dirty) { + if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { int ret = kvm_arch_get_registers(cpu); if (ret) { error_report("Failed to get registers: %s", strerror(-ret)); @@ -2796,7 +2792,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) void kvm_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->vcpu_dirty) { + if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -2831,7 +2827,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) void kvm_cpu_synchronize_post_init(CPUState *cpu) { - run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); + if (!kvm_state->guest_state_protected) { + /* + * This runs before the machine_init_done notifiers, and is the last + * opportunity to synchronize the state of confidential guests. + */ + run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); + } } static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) @@ -4223,3 +4225,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) query_stats_schema_vcpu(first_cpu, &stats_args); } } + +void kvm_mark_guest_state_protected(void) +{ + kvm_state->guest_state_protected = true; +} diff --git a/docs/devel/qom.rst b/docs/devel/qom.rst index 9918fac7f21216bddb4f3d450836d4c3ff87cd4f..0889ca949c160a3ae50fea5d3965bff8e01458d5 100644 --- a/docs/devel/qom.rst +++ b/docs/devel/qom.rst @@ -348,12 +348,14 @@ used. This does the same as OBJECT_DECLARE_SIMPLE_TYPE(), but without the 'struct MyDeviceClass' definition. To implement the type, the OBJECT_DEFINE macro family is available. -In the simple case the OBJECT_DEFINE_TYPE macro is suitable: +For the simplest case of a leaf class which doesn't need any of its +own virtual functions (i.e. which was declared with OBJECT_DECLARE_SIMPLE_TYPE) +the OBJECT_DEFINE_SIMPLE_TYPE macro is suitable: .. code-block:: c :caption: Defining a simple type - OBJECT_DEFINE_TYPE(MyDevice, my_device, MY_DEVICE, DEVICE) + OBJECT_DEFINE_SIMPLE_TYPE(MyDevice, my_device, MY_DEVICE, DEVICE) This is equivalent to the following: @@ -370,7 +372,6 @@ This is equivalent to the following: .instance_size = sizeof(MyDevice), .instance_init = my_device_init, .instance_finalize = my_device_finalize, - .class_size = sizeof(MyDeviceClass), .class_init = my_device_class_init, }; @@ -385,13 +386,36 @@ This is sufficient to get the type registered with the type system, and the three standard methods now need to be implemented along with any other logic required for the type. +If the class needs its own virtual methods, or has some other +per-class state it needs to store in its own class struct, +then you can use the OBJECT_DEFINE_TYPE macro. This does the +same thing as OBJECT_DEFINE_SIMPLE_TYPE, but it also sets the +class_size of the type to the size of the class struct. + +.. code-block:: c + :caption: Defining a type which needs a class struct + + OBJECT_DEFINE_TYPE(MyDevice, my_device, MY_DEVICE, DEVICE) + If the type needs to implement one or more interfaces, then the -OBJECT_DEFINE_TYPE_WITH_INTERFACES() macro can be used instead. -This accepts an array of interface type names. +OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES() and +OBJECT_DEFINE_TYPE_WITH_INTERFACES() macros can be used instead. +These accept an array of interface type names. The difference between +them is that the former is for simple leaf classes that don't need +a class struct, and the latter is for when you will be defining +a class struct. .. code-block:: c :caption: Defining a simple type implementing interfaces + OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(MyDevice, my_device, + MY_DEVICE, DEVICE, + { TYPE_USER_CREATABLE }, + { NULL }) + +.. code-block:: c + :caption: Defining a type implementing interfaces + OBJECT_DEFINE_TYPE_WITH_INTERFACES(MyDevice, my_device, MY_DEVICE, DEVICE, { TYPE_USER_CREATABLE }, diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json index cc8f869186817ea5c33c534aa01acc314e5c235b..08c2fbabe7c9217352df99a9ebcb413e715f2b8a 100644 --- a/docs/interop/firmware.json +++ b/docs/interop/firmware.json @@ -127,6 +127,9 @@ # options related to this feature are documented in # "docs/system/i386/amd-memory-encryption.rst". # +# @arm-rme: The firmware supports running in a Realm, under the Arm Realm +# Management Extension (RME). +# # @intel-tdx: The firmware supports running under Intel Trust Domain # Extensions (TDX). # @@ -196,7 +199,7 @@ { 'enum' : 'FirmwareFeature', 'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'amd-sev-es', 'amd-sev-snp', - 'intel-tdx', + 'arm-rme', 'intel-tdx', 'enrolled-keys', 'requires-smm', 'secure-boot', 'verbose-dynamic', 'verbose-static' ] } diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst index 7c4c80180c63835ad2541dac19452f72b4e8f322..0ba6d8610f6a872256385875c854f9e94a464454 100644 --- a/docs/system/arm/virt.rst +++ b/docs/system/arm/virt.rst @@ -153,10 +153,11 @@ dtb-randomness rng-seed and kaslr-seed nodes (in both "/chosen" and "/secure-chosen") to use for features like the random number generator and address space randomisation. The default is - ``on``. You will want to disable it if your trusted boot chain - will verify the DTB it is passed, since this option causes the - DTB to be non-deterministic. It would be the responsibility of - the firmware to come up with a seed and pass it on if it wants to. + ``off`` for confidential VMs, and ``on`` otherwise. You will want + to disable it if your trusted boot chain will verify the DTB it is + passed, since this option causes the DTB to be non-deterministic. + It would be the responsibility of the firmware to come up with a + seed and pass it on if it wants to. dtb-kaslr-seed A deprecated synonym for dtb-randomness. diff --git a/docs/system/confidential-guest-support.rst b/docs/system/confidential-guest-support.rst index 0c490dbda2b72c8dc8bef57962c54220bd3dab36..acf46d88567cc884bac4e9e684e563eeb20106fc 100644 --- a/docs/system/confidential-guest-support.rst +++ b/docs/system/confidential-guest-support.rst @@ -40,5 +40,6 @@ Currently supported confidential guest mechanisms are: * AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`) * POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`) * s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`) +* Arm Realm Management Extension (RME) Other mechanisms may be supported in future. diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 9a33601d358c1c3610c7c9a6ae974e66027a3ad4..4f5bf6e77cae22029e56cc66704a875bb9be21c7 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -527,7 +527,14 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, char **node_path; Error *err = NULL; - if (binfo->dtb_filename) { + if (binfo->dtb_filename && binfo->confidential) { + /* + * If the user is providing a DTB for a confidential VM, it is already + * tailored to this configuration and measured. Load it as is, without + * any modification. + */ + return rom_add_file_fixed_as(binfo->dtb_filename, addr, -1, as); + } else if (binfo->dtb_filename) { char *filename; filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, binfo->dtb_filename); if (!filename) { @@ -662,6 +669,24 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, fdt_add_psci_node(fdt); + /* Add a reserved-memory node for the event log */ + if (binfo->log_size) { + char *nodename; + + qemu_fdt_add_subnode(fdt, "/reserved-memory"); + qemu_fdt_setprop_cell(fdt, "/reserved-memory", "#address-cells", 0x2); + qemu_fdt_setprop_cell(fdt, "/reserved-memory", "#size-cells", 0x2); + qemu_fdt_setprop(fdt, "/reserved-memory", "ranges", NULL, 0); + + nodename = g_strdup_printf("/reserved-memory/event-log@%" PRIx64, + binfo->log_paddr); + qemu_fdt_add_subnode(fdt, nodename); + qemu_fdt_setprop_string(fdt, nodename, "compatible", "cc-event-log"); + qemu_fdt_setprop_sized_cells(fdt, nodename, "reg", 2, binfo->log_paddr, + 2, binfo->log_size); + g_free(nodename); + } + if (binfo->modify_dtb) { binfo->modify_dtb(binfo, fdt); } @@ -759,7 +784,13 @@ void do_cpu_reset(void *opaque) if (cs == first_cpu) { AddressSpace *as = arm_boot_address_space(cpu, info); - cpu_set_pc(cs, info->loader_start); + if (info->skip_bootloader) { + assert(is_a64(env)); + env->xregs[0] = info->dtb_start; + cpu_set_pc(cs, info->entry); + } else { + cpu_set_pc(cs, info->loader_start); + } if (!have_dtb(info)) { if (old_param) { @@ -851,7 +882,8 @@ static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, } static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, - hwaddr *entry, AddressSpace *as) + hwaddr *entry, AddressSpace *as, + bool skip_bootloader) { hwaddr kernel_load_offset = KERNEL64_LOAD_ADDR; uint64_t kernel_size = 0; @@ -903,7 +935,8 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, * bootloader, we can just load it starting at 2MB+offset rather * than 0MB + offset. */ - if (kernel_load_offset < BOOTLOADER_MAX_SIZE) { + if (kernel_load_offset < BOOTLOADER_MAX_SIZE && + !skip_bootloader) { kernel_load_offset += 2 * MiB; } } @@ -926,6 +959,30 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, return kernel_size; } +static void add_event_log(struct arm_boot_info *info) +{ + if (!info->log_size) { + return; + } + + if (!info->dtb_limit) { + int dtb_size = 0; + + if (!info->get_dtb(info, &dtb_size) || dtb_size == 0) { + error_report("Board does not have a DTB"); + exit(1); + } + info->dtb_limit = info->dtb_start + dtb_size; + } + + info->log_paddr = info->dtb_limit; + if (info->log_paddr + info->log_size > + info->loader_start + info->ram_size) { + error_report("Not enough space for measurement log and DTB"); + exit(1); + } +} + static void arm_setup_direct_kernel_boot(ARMCPU *cpu, struct arm_boot_info *info) { @@ -973,6 +1030,7 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, } info->dtb_start = info->loader_start; info->dtb_limit = image_low_addr; + add_event_log(info); } } entry = elf_entry; @@ -987,7 +1045,8 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, } if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && kernel_size < 0) { kernel_size = load_aarch64_image(info->kernel_filename, - info->loader_start, &entry, as); + info->loader_start, &entry, as, + info->skip_bootloader); is_linux = 1; if (kernel_size >= 0) { image_low_addr = entry; @@ -1110,6 +1169,8 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, error_report("Not enough space for DTB after kernel/initrd"); exit(1); } + add_event_log(info); + fixupcontext[FIXUP_ARGPTR_LO] = info->dtb_start; fixupcontext[FIXUP_ARGPTR_HI] = info->dtb_start >> 32; } else { @@ -1127,8 +1188,10 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, fixupcontext[FIXUP_ENTRYPOINT_LO] = entry; fixupcontext[FIXUP_ENTRYPOINT_HI] = entry >> 32; - arm_write_bootloader("bootloader", as, info->loader_start, - primary_loader, fixupcontext); + if (!info->skip_bootloader) { + arm_write_bootloader("bootloader", as, info->loader_start, + primary_loader, fixupcontext); + } if (info->write_board_setup) { info->write_board_setup(cpu, info); @@ -1194,6 +1257,8 @@ static void arm_setup_confidential_firmware_boot(ARMCPU *cpu, error_report("could not load firmware '%s'", firmware_filename); exit(EXIT_FAILURE); } + + add_event_log(info); } static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, const char *firmware_filename) @@ -1330,6 +1395,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) } } + /* Mark all Realm memory as RAM */ + kvm_arm_rme_init_guest_ram(info->loader_start, info->ram_size); + /* Load the kernel. */ if (!info->kernel_filename || info->firmware_loaded) { arm_setup_firmware_boot(cpu, info, ms->firmware); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index a43f18020c1d60ac1b37919cb5b6f242ef487a6a..f12bc645d2ab3fd73d20abe3515fb6edb286dcdb 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -260,6 +260,11 @@ static bool cpu_type_valid(const char *cpu) return false; } +static bool virt_machine_is_confidential(VirtMachineState *vms) +{ + return MACHINE(vms)->cgs; +} + static void create_randomness(MachineState *ms, const char *node) { struct { @@ -276,6 +281,7 @@ static void create_randomness(MachineState *ms, const char *node) static void create_fdt(VirtMachineState *vms) { + bool dtb_randomness = true; MachineState *ms = MACHINE(vms); int nb_numa_nodes = ms->numa_state->num_nodes; void *fdt = create_device_tree(&vms->fdt_size); @@ -285,6 +291,16 @@ static void create_fdt(VirtMachineState *vms) exit(1); } + /* + * Including random data in the DTB causes random intial measurement on CCA, + * so disable it for confidential VMs. + */ + if (vms->dtb_randomness == ON_OFF_AUTO_OFF || + (vms->dtb_randomness == ON_OFF_AUTO_AUTO && + virt_machine_is_confidential(vms))) { + dtb_randomness = false; + } + ms->fdt = fdt; /* Header */ @@ -301,7 +317,7 @@ static void create_fdt(VirtMachineState *vms) kvm_type = object_property_get_str(OBJECT(current_machine), "kvm-type", &error_abort); } - if (vms->dtb_randomness) { + if (dtb_randomness) { if (!(kvm_type && !strcmp(kvm_type, "cvm"))) { create_randomness(ms, "/chosen"); } @@ -309,7 +325,7 @@ static void create_fdt(VirtMachineState *vms) if (vms->secure) { qemu_fdt_add_subnode(fdt, "/secure-chosen"); - if (vms->dtb_randomness) { + if (dtb_randomness) { create_randomness(ms, "/secure-chosen"); } } @@ -1391,6 +1407,10 @@ static PFlashCFI01 *virt_flash_create1(VirtMachineState *vms, static void virt_flash_create(VirtMachineState *vms) { + if (virt_machine_is_confidential(vms)) { + return; + } + vms->flash[0] = virt_flash_create1(vms, "virt.flash0", "pflash0"); vms->flash[1] = virt_flash_create1(vms, "virt.flash1", "pflash1"); } @@ -1429,6 +1449,10 @@ static void virt_flash_map(VirtMachineState *vms, hwaddr flashsize = vms->memmap[VIRT_FLASH].size / 2; hwaddr flashbase = vms->memmap[VIRT_FLASH].base; + if (virt_machine_is_confidential(vms)) { + return; + } + virt_flash_map1(vms->flash[0], flashbase, flashsize, secure_sysmem); virt_flash_map1(vms->flash[1], flashbase + flashsize, flashsize, @@ -1444,7 +1468,7 @@ static void virt_flash_fdt(VirtMachineState *vms, MachineState *ms = MACHINE(vms); char *nodename; - if (virtcca_cvm_enabled()) { + if (virtcca_cvm_enabled() || virt_machine_is_confidential(vms)) { return; } @@ -1508,6 +1532,15 @@ static bool virt_firmware_init(VirtMachineState *vms, const char *bios_name; BlockBackend *pflash_blk0; + /* + * For a confidential VM, the firmware image and any boot information, + * including EFI variables, are stored in RAM in order to be measurable and + * private. Create a RAM region and load the firmware image there. + */ + if (virt_machine_is_confidential(vms)) { + return virt_confidential_firmware_init(vms, sysmem); + } + /* Map legacy -drive if=pflash to machine properties */ for (i = 0; i < ARRAY_SIZE(vms->flash); i++) { pflash_cfi01_legacy_drive(vms->flash[i], @@ -1956,6 +1989,11 @@ void virt_machine_done(Notifier *notifier, void *data) exit(1); } + if (vms->event_log) { + object_property_set_uint(vms->event_log, "load-addr", + vms->bootinfo.log_paddr, &error_fatal); + } + fw_cfg_add_extra_pci_roots(vms->bus, vms->fw_cfg); virt_acpi_setup(vms); @@ -2365,6 +2403,21 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) } } +static void create_measurement_log(VirtMachineState *vms) +{ + Error *err = NULL; + + vms->event_log = kvm_arm_rme_get_measurement_log(); + if (vms->event_log == NULL) { + return; + } + vms->bootinfo.log_size = object_property_get_uint(vms->event_log, + "max-size", &err); + if (err != NULL) { + error_report_err(err); + } +} + static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, Error **errp) { @@ -2556,6 +2609,7 @@ static void machvirt_init(MachineState *machine) } finalize_gic_version(vms); + virt_flash_create(vms); possible_cpus = mc->possible_cpu_arch_ids(machine); @@ -2610,10 +2664,12 @@ static void machvirt_init(MachineState *machine) * if the guest has EL2 then we will use SMC as the conduit, * and otherwise we will use HVC (for backwards compatibility and * because if we're using KVM then we must use HVC). + * Realm guests must also use SMC. */ if (vms->secure && firmware_loaded) { vms->psci_conduit = QEMU_PSCI_CONDUIT_DISABLED; - } else if (vms->virt || virtcca_cvm_enabled()) { + } else if (vms->virt || virtcca_cvm_enabled() || + virt_machine_is_confidential(vms)) { vms->psci_conduit = QEMU_PSCI_CONDUIT_SMC; } else { vms->psci_conduit = QEMU_PSCI_CONDUIT_HVC; @@ -2862,6 +2918,10 @@ static void machvirt_init(MachineState *machine) vms->fw_cfg, OBJECT(vms)); } + kvm_arm_rme_init_gpa_space(vms->highest_gpa, vms->bus); + + create_measurement_log(vms); + vms->bootinfo.ram_size = machine->ram_size; vms->bootinfo.board_id = -1; vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; @@ -2872,6 +2932,8 @@ static void machvirt_init(MachineState *machine) vms->bootinfo.firmware_max_size = vms->memmap[VIRT_FLASH].size; vms->bootinfo.confidential = virtcca_cvm_enabled(); vms->bootinfo.psci_conduit = vms->psci_conduit; + vms->bootinfo.confidential = virt_machine_is_confidential(vms); + vms->bootinfo.skip_bootloader = vms->bootinfo.confidential; arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo); vms->machine_done.notify = virt_machine_done; @@ -2991,18 +3053,21 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } -static bool virt_get_dtb_randomness(Object *obj, Error **errp) +static void virt_get_dtb_randomness(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); + OnOffAuto dtb_randomness = vms->dtb_randomness; - return vms->dtb_randomness; + visit_type_OnOffAuto(v, name, &dtb_randomness, errp); } -static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +static void virt_set_dtb_randomness(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); - vms->dtb_randomness = value; + visit_type_OnOffAuto(v, name, &vms->dtb_randomness, errp); } static char *virt_get_oem_id(Object *obj, Error **errp) @@ -3811,17 +3876,29 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) if (!strcmp(kvm_type, "cvm")) { virtcca_cvm_type = VIRTCCA_CVM_TYPE; + virtcca_cvm_allowed = true; } } + int rme_vm_type = kvm_arm_rme_vm_type(ms), type; int max_vm_pa_size, requested_pa_size; + int rme_reserve_bit = 0; bool fixed_ipa; - max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa); + if (rme_vm_type) { + /* + * With RME, the upper GPA bit differentiates Realm from NS memory. + * Reserve the upper bit to ensure that highmem devices will fit. + */ + rme_reserve_bit = 1; + } + + max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa) - + rme_reserve_bit; /* we freeze the memory map to compute the highest gpa */ virt_set_memmap(vms, max_vm_pa_size); - requested_pa_size = 64 - clz64(vms->highest_gpa); + requested_pa_size = 64 - clz64(vms->highest_gpa) + rme_reserve_bit; /* * KVM requires the IPA size to be at least 32 bits. @@ -3830,11 +3907,11 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) requested_pa_size = 32; } - if (requested_pa_size > max_vm_pa_size) { + if (requested_pa_size > max_vm_pa_size + rme_reserve_bit) { error_report("-m and ,maxmem option values " "require an IPA range (%d bits) larger than " "the one supported by the host (%d bits)", - requested_pa_size, max_vm_pa_size); + requested_pa_size, max_vm_pa_size + rme_reserve_bit); return -1; } /* @@ -3842,9 +3919,12 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) * the implicit legacy 40b IPA setting, in which case the kvm_type * must be 0. */ - return strcmp(type_str, "cvm") == 0 ? - ((fixed_ipa ? 0 : requested_pa_size) | virtcca_cvm_type) : - (fixed_ipa ? 0 : requested_pa_size); + type = strcmp(type_str, "cvm") == 0 ? virtcca_cvm_type : 0; + if (fixed_ipa) { + return type; + } + + return requested_pa_size | rme_vm_type | type; } static void virt_machine_class_init(ObjectClass *oc, void *data) @@ -3985,16 +4065,16 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) "Set on/off to enable/disable " "ITS instantiation"); - object_class_property_add_bool(oc, "dtb-randomness", - virt_get_dtb_randomness, - virt_set_dtb_randomness); + object_class_property_add(oc, "dtb-randomness", "OnOffAuto", + virt_get_dtb_randomness, virt_set_dtb_randomness, + NULL, NULL); object_class_property_set_description(oc, "dtb-randomness", "Set off to disable passing random or " "non-deterministic dtb nodes to guest"); - object_class_property_add_bool(oc, "dtb-kaslr-seed", - virt_get_dtb_randomness, - virt_set_dtb_randomness); + object_class_property_add(oc, "dtb-kaslr-seed", "OnOffAuto", + virt_get_dtb_randomness, virt_set_dtb_randomness, + NULL, NULL); object_class_property_set_description(oc, "dtb-kaslr-seed", "Deprecated synonym of dtb-randomness"); @@ -4081,13 +4161,8 @@ static void virt_instance_init(Object *obj) /* MTE is disabled by default. */ vms->mte = false; - /* Supply kaslr-seed and rng-seed by default */ - vms->dtb_randomness = true; - vms->irqmap = a15irqmap; - virt_flash_create(vms); - vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); diff --git a/hw/core/loader.c b/hw/core/loader.c index e7a9b3775bbc9f7cca0c438c7653f28309a64853..7990147adeb89430a0ef41934cf6bd37cc6c3221 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -67,6 +67,8 @@ #include static int roms_loaded; +static NotifierList rom_loader_notifier = + NOTIFIER_LIST_INITIALIZER(rom_loader_notifier); /* return the size or -1 if error */ int64_t get_image_size(const char *filename) @@ -1209,6 +1211,11 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len, return mr; } +void rom_add_load_notifier(Notifier *notifier) +{ + notifier_list_add(&rom_loader_notifier, notifier); +} + /* This function is specific for elf program because we don't need to allocate * all the rom. We just allocate the first part and the rest is just zeros. This * is why romsize and datasize are different. Also, this function takes its own @@ -1250,6 +1257,7 @@ ssize_t rom_add_option(const char *file, int32_t bootindex) static void rom_reset(void *unused) { Rom *rom; + RomLoaderNotifyData notify; QTAILQ_FOREACH(rom, &roms, next) { if (rom->fw_file) { @@ -1298,6 +1306,14 @@ static void rom_reset(void *unused) cpu_flush_icache_range(rom->addr, rom->datasize); trace_loader_write_rom(rom->name, rom->addr, rom->datasize, rom->isrom); + + notify = (RomLoaderNotifyData) { + .name = rom->name, + .blob_ptr = rom->data, + .addr = rom->addr, + .len = rom->datasize, + }; + notifier_list_notify(&rom_loader_notifier, ¬ify); } } diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig index a46663288cb8ae5ae024508ce4968805f6a6e219..70694b14a360c26e2ad75ee92d5f1f5e4d03a8e4 100644 --- a/hw/tpm/Kconfig +++ b/hw/tpm/Kconfig @@ -30,3 +30,7 @@ config TPM_SPAPR default y depends on TPM && PSERIES select TPM_BACKEND + +config TPM_LOG + bool + default y diff --git a/hw/tpm/meson.build b/hw/tpm/meson.build index 6968e60b3f7c193e57e19f1c645b811b6755d521..81efb557f3a31c7195e5a7ddfd00e67b841ae107 100644 --- a/hw/tpm/meson.build +++ b/hw/tpm/meson.build @@ -6,4 +6,5 @@ system_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_crb.c')) system_ss.add(when: 'CONFIG_TPM_TIS', if_true: files('tpm_ppi.c')) system_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_ppi.c')) +system_ss.add(when: 'CONFIG_TPM_LOG', if_true: files('tpm_log.c')) specific_ss.add(when: 'CONFIG_TPM_SPAPR', if_true: files('tpm_spapr.c')) diff --git a/hw/tpm/tpm_log.c b/hw/tpm/tpm_log.c new file mode 100644 index 0000000000000000000000000000000000000000..ab29d8569bbb2c97f7f2a6f133e97267f4d180d2 --- /dev/null +++ b/hw/tpm/tpm_log.c @@ -0,0 +1,325 @@ +/* + * tpm_log.c - Event log as described by the Trusted Computing Group (TCG) + * + * Copyright (c) 2024 Linaro Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * Create an event log in the format specified by: + * + * TCG PC Client Platform Firmware Profile Specification + * Level 00 Version 1.06 Revision 52 + * Family “2.0” + */ + +#include "qemu/osdep.h" + +#include "crypto/hash.h" +#include "exec/address-spaces.h" +#include "exec/memory.h" +#include "hw/tpm/tpm_log.h" +#include "qapi/error.h" +#include "qemu/bswap.h" +#include "qom/object_interfaces.h" + +/* + * Legacy structure used only in the first event in the log, for compatibility + */ +struct TcgPcClientPcrEvent { + uint32_t pcr_index; + uint32_t event_type; + uint8_t digest[20]; + uint32_t event_data_size; + uint8_t event[]; +} QEMU_PACKED; + +struct TcgEfiSpecIdEvent { + uint8_t signature[16]; + uint32_t platform_class; + uint8_t family_version_minor; + uint8_t family_version_major; + uint8_t spec_revision; + uint8_t uintn_size; + uint32_t number_of_algorithms; /* 1 */ + /* + * For now we declare a single algo, but if we want UEFI to reuse this + * header then we'd need to add entries here for all algos supported by + * UEFI (and expand the digest field for EV_NO_ACTION). + */ + uint16_t algorithm_id; + uint16_t digest_size; + uint8_t vendor_info_size; + uint8_t vendor_info[]; +} QEMU_PACKED; + +struct TcgPcrEvent2Head { + uint32_t pcr_index; + uint32_t event_type; + /* variable-sized digests */ + uint8_t digests[]; +} QEMU_PACKED; + +struct TcgPcrEvent2Tail { + uint32_t event_size; + uint8_t event[]; +} QEMU_PACKED; + +struct TpmlDigestValues { + uint32_t count; /* 1 */ + uint16_t hash_alg; + uint8_t digest[]; +} QEMU_PACKED; + +struct TpmLog { + Object parent_obj; + + TpmLogDigestAlgo digest_algo; + size_t max_size; + uint64_t load_addr; + + uint16_t tcg_algo; + GByteArray *content; + uint8_t *digest; + size_t digest_size; +}; + +OBJECT_DEFINE_SIMPLE_TYPE(TpmLog, tpm_log, TPM_LOG, OBJECT) + +static void tpm_log_init(Object *obj) +{ + TpmLog *log = TPM_LOG(obj); + + log->digest_algo = TPM_LOG_DIGEST_ALGO_SHA256; +} + +static void tpm_log_destroy(TpmLog *log) +{ + if (!log->content) { + return; + } + g_free(log->digest); + log->digest = NULL; + g_byte_array_free(log->content, /* free_segment */ true); + log->content = NULL; +} + +static void tpm_log_finalize(Object *obj) +{ + tpm_log_destroy(TPM_LOG(obj)); +} + +static int tpm_log_get_digest_algo(Object *obj, Error **errp) +{ + TpmLog *log = TPM_LOG(obj); + + return log->digest_algo; +} + +static void tpm_log_set_digest_algo(Object *obj, int algo, Error **errp) +{ + TpmLog *log = TPM_LOG(obj); + + if (log->content != NULL) { + error_setg(errp, "cannot set digest algo after log creation"); + return; + } + + log->digest_algo = algo; +} + +static void tpm_log_get_max_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + TpmLog *log = TPM_LOG(obj); + uint64_t value = log->max_size; + + visit_type_uint64(v, name, &value, errp); +} + +static void tpm_log_get_load_addr(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + TpmLog *log = TPM_LOG(obj); + uint64_t value = log->load_addr; + + visit_type_uint64(v, name, &value, errp); +} + +static void tpm_log_set_load_addr(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + TpmLog *log = TPM_LOG(obj); + uint64_t value; + + if (!visit_type_uint64(v, name, &value, errp)) { + return; + } + + log->load_addr = value; +} + + +static void tpm_log_class_init(ObjectClass *oc, void *data) +{ + object_class_property_add_enum(oc, "digest-algo", + "TpmLogDigestAlgo", + &TpmLogDigestAlgo_lookup, + tpm_log_get_digest_algo, + tpm_log_set_digest_algo); + object_class_property_set_description(oc, "digest-algo", + "Algorithm used to hash blobs added as events ('sha256', 'sha512')"); + + /* max_size is set while allocating the log in tpm_log_create */ + object_class_property_add(oc, "max-size", "uint64", tpm_log_get_max_size, + NULL, NULL, NULL); + object_class_property_set_description(oc, "max-size", + "Maximum size of the log, reserved in guest memory"); + + object_class_property_add(oc, "load-addr", "uint64", tpm_log_get_load_addr, + tpm_log_set_load_addr, NULL, NULL); + object_class_property_set_description(oc, "load-addr", + "Base address of the log in guest memory"); +} + +int tpm_log_create(TpmLog *log, size_t max_size, Error **errp) +{ + struct TcgEfiSpecIdEvent event; + struct TcgPcClientPcrEvent header = { + .pcr_index = 0, + .event_type = cpu_to_le32(TCG_EV_NO_ACTION), + .digest = {0}, + .event_data_size = cpu_to_le32(sizeof(event)), + }; + + log->content = g_byte_array_sized_new(max_size); + log->max_size = max_size; + + switch (log->digest_algo) { + case TPM_LOG_DIGEST_ALGO_SHA256: + log->tcg_algo = TCG_ALG_SHA256; + log->digest_size = TCG_ALG_SHA256_DIGEST_SIZE; + break; + case TPM_LOG_DIGEST_ALGO_SHA512: + log->tcg_algo = TCG_ALG_SHA512; + log->digest_size = TCG_ALG_SHA512_DIGEST_SIZE; + break; + default: + g_assert_not_reached(); + } + + log->digest = g_malloc0(log->digest_size); + + event = (struct TcgEfiSpecIdEvent) { + .signature = "Spec ID Event03", + .platform_class = 0, + .family_version_minor = 0, + .family_version_major = 2, + .spec_revision = 106, + .uintn_size = 2, /* UINT64 */ + .number_of_algorithms = cpu_to_le32(1), + .algorithm_id = cpu_to_le16(log->tcg_algo), + .digest_size = cpu_to_le16(log->digest_size), + .vendor_info_size = 0, + }; + + g_byte_array_append(log->content, (guint8 *)&header, sizeof(header)); + g_byte_array_append(log->content, (guint8 *)&event, sizeof(event)); + return 0; +} + +int tpm_log_add_event(TpmLog *log, uint32_t event_type, const uint8_t *event, + size_t event_size, const uint8_t *data, size_t data_size, + Error **errp) +{ + int digests = 0; + size_t rollback_len; + struct TcgPcrEvent2Head header = { + .pcr_index = 0, + .event_type = cpu_to_le32(event_type), + }; + struct TpmlDigestValues digest_header = {0}; + struct TcgPcrEvent2Tail tail = { + .event_size = cpu_to_le32(event_size), + }; + + if (log->content == NULL) { + error_setg(errp, "event log is not initialized"); + return -EINVAL; + } + rollback_len = log->content->len; + + g_byte_array_append(log->content, (guint8 *)&header, sizeof(header)); + + if (data) { + QCryptoHashAlgorithm qc_algo; + + digest_header.hash_alg = cpu_to_le16(log->tcg_algo); + switch (log->digest_algo) { + case TPM_LOG_DIGEST_ALGO_SHA256: + qc_algo = QCRYPTO_HASH_ALG_SHA256; + break; + case TPM_LOG_DIGEST_ALGO_SHA512: + qc_algo = QCRYPTO_HASH_ALG_SHA512; + break; + default: + g_assert_not_reached(); + } + if (qcrypto_hash_bytes(qc_algo, (const char *)data, data_size, + &log->digest, &log->digest_size, errp)) { + goto err_rollback; + } + digests = 1; + } else if (event_type == TCG_EV_NO_ACTION) { + /* EV_NO_ACTION contains empty digests for each supported algo */ + memset(log->digest, 0, log->digest_size); + digest_header.hash_alg = 0; + digests = 1; + } + + if (digests) { + digest_header.count = cpu_to_le32(digests); + g_byte_array_append(log->content, (guint8 *)&digest_header, + sizeof(digest_header)); + g_byte_array_append(log->content, log->digest, log->digest_size); + } else { + /* Add an empty digests list */ + g_byte_array_append(log->content, (guint8 *)&digest_header.count, + sizeof(digest_header.count)); + } + + g_byte_array_append(log->content, (guint8 *)&tail, sizeof(tail)); + g_byte_array_append(log->content, event, event_size); + + if (log->content->len > log->max_size) { + error_setg(errp, "event log exceeds max size"); + goto err_rollback; + } + + return 0; + +err_rollback: + g_byte_array_set_size(log->content, rollback_len); + return -1; +} + +int tpm_log_write_and_close(TpmLog *log, Error **errp) +{ + int ret; + + if (!log->content) { + error_setg(errp, "event log is not initialized"); + return -1; + } + + ret = address_space_write_rom(&address_space_memory, log->load_addr, + MEMTXATTRS_UNSPECIFIED, log->content->data, + log->content->len); + if (ret) { + error_setg(errp, "cannot load log into memory"); + return -1; + } + + tpm_log_destroy(log); + return ret; +} diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 0be63c5fbc574b040ed797cb6411243807f1e537..c0bc61fdeef86e882a3ccc3d5fa3bfe3e3181cd5 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -350,12 +350,9 @@ out: rcu_read_unlock(); } -static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, - MemoryRegionSection *section) +static int vfio_state_change_notify_to_state_clear(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) { - VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, - listener); - VFIOContainerBase *bcontainer = vrdl->bcontainer; const hwaddr size = int128_get64(section->size); const hwaddr iova = section->offset_within_address_space; int ret; @@ -366,14 +363,32 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, strerror(-ret)); } + + return ret; } -static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, - MemoryRegionSection *section) +static int vfio_ram_discard_notify_discard(StateChangeListener *scl, + MemoryRegionSection *section) { + RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, listener); - VFIOContainerBase *bcontainer = vrdl->bcontainer; + return vfio_state_change_notify_to_state_clear(vrdl->bcontainer, section); +} + +static int vfio_private_shared_notify_to_private(StateChangeListener *scl, + MemoryRegionSection *section) +{ + PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl); + VFIOPrivateSharedListener *vpsl = container_of(psl, VFIOPrivateSharedListener, + listener); + return vfio_state_change_notify_to_state_clear(vpsl->bcontainer, section); +} + +static int vfio_state_change_notify_to_state_set(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, + uint64_t granularity) +{ const hwaddr end = section->offset_within_region + int128_get64(section->size); hwaddr start, next, iova; @@ -385,7 +400,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, * unmap in minimum granularity later. */ for (start = section->offset_within_region; start < end; start = next) { - next = ROUND_UP(start + 1, vrdl->granularity); + next = ROUND_UP(start + 1, granularity); next = MIN(next, end); iova = start - section->offset_within_region + @@ -396,18 +411,39 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, vaddr, section->readonly); if (ret) { /* Rollback */ - vfio_ram_discard_notify_discard(rdl, section); + vfio_state_change_notify_to_state_clear(bcontainer, section); return ret; } } return 0; } +static int vfio_ram_discard_notify_populate(StateChangeListener *scl, + MemoryRegionSection *section) +{ + RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); + return vfio_state_change_notify_to_state_set(vrdl->bcontainer, section, + vrdl->granularity); +} + +static int vfio_private_shared_notify_to_shared(StateChangeListener *scl, + MemoryRegionSection *section) +{ + PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl); + VFIOPrivateSharedListener *vpsl = container_of(psl, VFIOPrivateSharedListener, + listener); + return vfio_state_change_notify_to_state_set(vpsl->bcontainer, section, + vpsl->granularity); +} + static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); VFIORamDiscardListener *vrdl; + RamDiscardListener *rdl; /* Ignore some corner cases not relevant in practice. */ g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE)); @@ -420,17 +456,18 @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, vrdl->mr = section->mr; vrdl->offset_within_address_space = section->offset_within_address_space; vrdl->size = int128_get64(section->size); - vrdl->granularity = ram_discard_manager_get_min_granularity(rdm, - section->mr); + vrdl->granularity = generic_state_manager_get_min_granularity(gsm, + section->mr); g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity)); g_assert(bcontainer->pgsizes && vrdl->granularity >= 1ULL << ctz64(bcontainer->pgsizes)); - ram_discard_listener_init(&vrdl->listener, + rdl = &vrdl->listener; + ram_discard_listener_init(rdl, vfio_ram_discard_notify_populate, vfio_ram_discard_notify_discard, true); - ram_discard_manager_register_listener(rdm, &vrdl->listener, section); + generic_state_manager_register_listener(gsm, &rdl->scl, section); QLIST_INSERT_HEAD(&bcontainer->vrdl_list, vrdl, next); /* @@ -477,11 +514,34 @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, } } +static void vfio_register_private_shared_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) +{ + GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); + VFIOPrivateSharedListener *vpsl; + PrivateSharedListener *psl; + + vpsl = g_new0(VFIOPrivateSharedListener, 1); + vpsl->bcontainer = bcontainer; + vpsl->mr = section->mr; + vpsl->offset_within_address_space = section->offset_within_address_space; + vpsl->granularity = generic_state_manager_get_min_granularity(gsm, + section->mr); + + psl = &vpsl->listener; + private_shared_listener_init(psl, vfio_private_shared_notify_to_shared, + vfio_private_shared_notify_to_private, + PRIVATE_SHARED_LISTENER_PRIORITY_COMMON); + generic_state_manager_register_listener(gsm, &psl->scl, section); + QLIST_INSERT_HEAD(&bcontainer->vpsl_list, vpsl, next); +} + static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); VFIORamDiscardListener *vrdl = NULL; + RamDiscardListener *rdl; QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { if (vrdl->mr == section->mr && @@ -495,11 +555,37 @@ static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, hw_error("vfio: Trying to unregister missing RAM discard listener"); } - ram_discard_manager_unregister_listener(rdm, &vrdl->listener); + rdl = &vrdl->listener; + generic_state_manager_unregister_listener(gsm, &rdl->scl); QLIST_REMOVE(vrdl, next); g_free(vrdl); } +static void vfio_unregister_private_shared_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) +{ + GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); + VFIOPrivateSharedListener *vpsl = NULL; + PrivateSharedListener *psl; + + QLIST_FOREACH(vpsl, &bcontainer->vpsl_list, next) { + if (vpsl->mr == section->mr && + vpsl->offset_within_address_space == + section->offset_within_address_space) { + break; + } + } + + if (!vpsl) { + hw_error("vfio: Trying to unregister missing RAM discard listener"); + } + + psl = &vpsl->listener; + generic_state_manager_unregister_listener(gsm, &psl->scl); + QLIST_REMOVE(vpsl, next); + g_free(vpsl); +} + static bool vfio_known_safe_misalignment(MemoryRegionSection *section) { MemoryRegion *mr = section->mr; @@ -671,6 +757,9 @@ static void vfio_listener_region_add(MemoryListener *listener, if (memory_region_has_ram_discard_manager(section->mr)) { vfio_register_ram_discard_listener(bcontainer, section); return; + } else if (memory_region_has_private_shared_manager(section->mr)) { + vfio_register_private_shared_listener(bcontainer, section); + return; } vaddr = memory_region_get_ram_ptr(section->mr) + @@ -790,6 +879,10 @@ static void vfio_listener_region_del(MemoryListener *listener, vfio_unregister_ram_discard_listener(bcontainer, section); /* Unregistering will trigger an unmap. */ try_unmap = false; + } else if (memory_region_has_private_shared_manager(section->mr)) { + vfio_unregister_private_shared_listener(bcontainer, section); + /* Unregistering will trigger an unmap. */ + try_unmap = false; } if (try_unmap) { @@ -1275,7 +1368,7 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); VFIORamDiscardListener *vrdl = NULL; QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { @@ -1294,7 +1387,7 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, * We only want/can synchronize the bitmap for actually mapped parts - * which correspond to populated parts. Replay all populated parts. */ - return ram_discard_manager_replay_populated(rdm, section, + return generic_state_manager_replay_on_state_set(gsm, section, vfio_ram_discard_get_dirty_bitmap, &vrdl); } diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 913ae49077c4f09b7b27517c1231cfbe4befb7fb..a356ae91a9d9884baf3b59d1797a2270f3ae3c7c 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -82,6 +82,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, bcontainer->iova_ranges = NULL; QLIST_INIT(&bcontainer->giommu_list); QLIST_INIT(&bcontainer->vrdl_list); + QLIST_INIT(&bcontainer->vpsl_list); } void vfio_container_destroy(VFIOContainerBase *bcontainer) diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 75ee38aa46b9d3de63626329ab2ec0f6bb5f18d5..d60bc994ad212ab0d4dfc439bc147154e8f8fd65 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -235,28 +235,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg, return ret; } -/* - * Adjust the memory section to cover the intersection with the given range. - * - * Returns false if the intersection is empty, otherwise returns true. - */ -static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s, - uint64_t offset, uint64_t size) -{ - uint64_t start = MAX(s->offset_within_region, offset); - uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), - offset + size); - - if (end <= start) { - return false; - } - - s->offset_within_address_space += start - s->offset_within_region; - s->offset_within_region = start; - s->size = int128_make64(end - start); - return true; -} - typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, @@ -278,7 +256,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -310,7 +288,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -325,16 +303,16 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg) { - RamDiscardListener *rdl = arg; + StateChangeListener *scl = arg; - return rdl->notify_populate(rdl, s); + return scl->notify_to_state_set(scl, s); } static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg) { - RamDiscardListener *rdl = arg; + StateChangeListener *scl = arg; - rdl->notify_discard(rdl, s); + scl->notify_to_state_clear(scl, s); return 0; } @@ -344,12 +322,13 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, RamDiscardListener *rdl; QLIST_FOREACH(rdl, &vmem->rdl_list, next) { - MemoryRegionSection tmp = *rdl->section; + StateChangeListener *scl = &rdl->scl; + MemoryRegionSection tmp = *scl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } - rdl->notify_discard(rdl, &tmp); + scl->notify_to_state_clear(scl, &tmp); } } @@ -360,12 +339,13 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, int ret = 0; QLIST_FOREACH(rdl, &vmem->rdl_list, next) { - MemoryRegionSection tmp = *rdl->section; + StateChangeListener *scl = &rdl->scl; + MemoryRegionSection tmp = *scl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } - ret = rdl->notify_populate(rdl, &tmp); + ret = scl->notify_to_state_set(scl, &tmp); if (ret) { break; } @@ -374,15 +354,16 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, if (ret) { /* Notify all already-notified listeners. */ QLIST_FOREACH(rdl2, &vmem->rdl_list, next) { - MemoryRegionSection tmp = *rdl2->section; + StateChangeListener *scl2 = &rdl2->scl; + MemoryRegionSection tmp = *scl2->section; if (rdl2 == rdl) { break; } - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } - rdl2->notify_discard(rdl2, &tmp); + scl2->notify_to_state_clear(scl2, &tmp); } } return ret; @@ -397,10 +378,11 @@ static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem) } QLIST_FOREACH(rdl, &vmem->rdl_list, next) { + StateChangeListener *scl = &rdl->scl; if (rdl->double_discard_supported) { - rdl->notify_discard(rdl, rdl->section); + scl->notify_to_state_clear(scl, scl->section); } else { - virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, + virtio_mem_for_each_plugged_section(vmem, scl->section, scl, virtio_mem_notify_discard_cb); } } @@ -1071,6 +1053,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) return; } + /* + * Set ourselves as RamDiscardManager before the plug handler maps the + * memory region and exposes it via an address space. + */ + if (memory_region_set_generic_state_manager(&vmem->memdev->mr, + GENERIC_STATE_MANAGER(vmem))) { + error_setg(errp, "Failed to set RamDiscardManager"); + ram_block_coordinated_discard_require(false); + return; + } + /* * We don't know at this point whether shared RAM is migrated using * QEMU or migrated using the file content. "x-ignore-shared" will be @@ -1125,13 +1118,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) &vmstate_virtio_mem_device_early, vmem); } qemu_register_reset(virtio_mem_system_reset, vmem); - - /* - * Set ourselves as RamDiscardManager before the plug handler maps the - * memory region and exposes it via an address space. - */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, - RAM_DISCARD_MANAGER(vmem)); } static void virtio_mem_device_unrealize(DeviceState *dev) @@ -1139,11 +1125,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOMEM *vmem = VIRTIO_MEM(dev); - /* - * The unplug handler unmapped the memory region, it cannot be - * found via an address space anymore. Unset ourselves. - */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); qemu_unregister_reset(virtio_mem_system_reset, vmem); if (vmem->early_migration) { vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early, @@ -1154,6 +1135,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev) virtio_del_queue(vdev, 0); virtio_cleanup(vdev); g_free(vmem->bitmap); + /* + * The unplug handler unmapped the memory region, it cannot be + * found via an address space anymore. Unset ourselves. + */ + memory_region_set_generic_state_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); } @@ -1202,7 +1188,8 @@ static int virtio_mem_post_load_bitmap(VirtIOMEM *vmem) * into an address space. Replay, now that we updated the bitmap. */ QLIST_FOREACH(rdl, &vmem->rdl_list, next) { - ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, + StateChangeListener *scl = &rdl->scl; + ret = virtio_mem_for_each_plugged_section(vmem, scl->section, scl, virtio_mem_notify_populate_cb); if (ret) { return ret; @@ -1701,19 +1688,19 @@ static Property virtio_mem_properties[] = { DEFINE_PROP_END_OF_LIST(), }; -static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm, +static uint64_t virtio_mem_rdm_get_min_granularity(const GenericStateManager *gsm, const MemoryRegion *mr) { - const VirtIOMEM *vmem = VIRTIO_MEM(rdm); + const VirtIOMEM *vmem = VIRTIO_MEM(gsm); g_assert(mr == &vmem->memdev->mr); return vmem->block_size; } -static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, +static bool virtio_mem_rdm_is_populated(const GenericStateManager *gsm, const MemoryRegionSection *s) { - const VirtIOMEM *vmem = VIRTIO_MEM(rdm); + const VirtIOMEM *vmem = VIRTIO_MEM(gsm); uint64_t start_gpa = vmem->addr + s->offset_within_region; uint64_t end_gpa = start_gpa + int128_get64(s->size); @@ -1730,7 +1717,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, } struct VirtIOMEMReplayData { - void *fn; + ReplayStateChange fn; void *opaque; }; @@ -1738,15 +1725,15 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) { struct VirtIOMEMReplayData *data = arg; - return ((ReplayRamPopulate)data->fn)(s, data->opaque); + return data->fn(s, data->opaque); } -static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, +static int virtio_mem_rdm_replay_populated(const GenericStateManager *gsm, MemoryRegionSection *s, - ReplayRamPopulate replay_fn, + ReplayStateChange replay_fn, void *opaque) { - const VirtIOMEM *vmem = VIRTIO_MEM(rdm); + const VirtIOMEM *vmem = VIRTIO_MEM(gsm); struct VirtIOMEMReplayData data = { .fn = replay_fn, .opaque = opaque, @@ -1762,38 +1749,39 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, { struct VirtIOMEMReplayData *data = arg; - ((ReplayRamDiscard)data->fn)(s, data->opaque); + data->fn(s, data->opaque); return 0; } -static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *s, - ReplayRamDiscard replay_fn, - void *opaque) +static int virtio_mem_rdm_replay_discarded(const GenericStateManager *gsm, + MemoryRegionSection *s, + ReplayStateChange replay_fn, + void *opaque) { - const VirtIOMEM *vmem = VIRTIO_MEM(rdm); + const VirtIOMEM *vmem = VIRTIO_MEM(gsm); struct VirtIOMEMReplayData data = { .fn = replay_fn, .opaque = opaque, }; g_assert(s->mr == &vmem->memdev->mr); - virtio_mem_for_each_unplugged_section(vmem, s, &data, - virtio_mem_rdm_replay_discarded_cb); + return virtio_mem_for_each_unplugged_section(vmem, s, &data, + virtio_mem_rdm_replay_discarded_cb); } -static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, - RamDiscardListener *rdl, +static void virtio_mem_rdm_register_listener(GenericStateManager *gsm, + StateChangeListener *scl, MemoryRegionSection *s) { - VirtIOMEM *vmem = VIRTIO_MEM(rdm); + VirtIOMEM *vmem = VIRTIO_MEM(gsm); + RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); int ret; g_assert(s->mr == &vmem->memdev->mr); - rdl->section = memory_region_section_new_copy(s); + scl->section = memory_region_section_new_copy(s); QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next); - ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, + ret = virtio_mem_for_each_plugged_section(vmem, scl->section, scl, virtio_mem_notify_populate_cb); if (ret) { error_report("%s: Replaying plugged ranges failed: %s", __func__, @@ -1801,23 +1789,24 @@ static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, } } -static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm, - RamDiscardListener *rdl) +static void virtio_mem_rdm_unregister_listener(GenericStateManager *gsm, + StateChangeListener *scl) { - VirtIOMEM *vmem = VIRTIO_MEM(rdm); + VirtIOMEM *vmem = VIRTIO_MEM(gsm); + RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl); - g_assert(rdl->section->mr == &vmem->memdev->mr); + g_assert(scl->section->mr == &vmem->memdev->mr); if (vmem->size) { if (rdl->double_discard_supported) { - rdl->notify_discard(rdl, rdl->section); + scl->notify_to_state_clear(scl, scl->section); } else { - virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl, + virtio_mem_for_each_plugged_section(vmem, scl->section, scl, virtio_mem_notify_discard_cb); } } - memory_region_section_free_copy(rdl->section); - rdl->section = NULL; + memory_region_section_free_copy(scl->section); + scl->section = NULL; QLIST_REMOVE(rdl, next); } @@ -1850,7 +1839,7 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass); - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass); + GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_CLASS(klass); device_class_set_props(dc, virtio_mem_properties); dc->vmsd = &vmstate_virtio_mem; @@ -1871,12 +1860,12 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data) vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; vmc->unplug_request_check = virtio_mem_unplug_request_check; - rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; - rdmc->is_populated = virtio_mem_rdm_is_populated; - rdmc->replay_populated = virtio_mem_rdm_replay_populated; - rdmc->replay_discarded = virtio_mem_rdm_replay_discarded; - rdmc->register_listener = virtio_mem_rdm_register_listener; - rdmc->unregister_listener = virtio_mem_rdm_unregister_listener; + gsmc->get_min_granularity = virtio_mem_rdm_get_min_granularity; + gsmc->is_state_set = virtio_mem_rdm_is_populated; + gsmc->replay_on_state_set = virtio_mem_rdm_replay_populated; + gsmc->replay_on_state_clear = virtio_mem_rdm_replay_discarded; + gsmc->register_listener = virtio_mem_rdm_register_listener; + gsmc->unregister_listener = virtio_mem_rdm_unregister_listener; } static const TypeInfo virtio_mem_info = { diff --git a/include/exec/memory.h b/include/exec/memory.h index 0361ec20547bfa5fa9d856a7f2d4bbc23e06443f..51fe10d4a00f7c48168d8479c2d28c47b3cd7d1a 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -43,12 +43,24 @@ typedef struct IOMMUMemoryRegionClass IOMMUMemoryRegionClass; DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass, IOMMU_MEMORY_REGION, TYPE_IOMMU_MEMORY_REGION) +#define TYPE_GENERIC_STATE_MANAGER "generic-state-manager" +typedef struct GenericStateManagerClass GenericStateManagerClass; +typedef struct GenericStateManager GenericStateManager; +DECLARE_OBJ_CHECKERS(GenericStateManager, GenericStateManagerClass, + GENERIC_STATE_MANAGER, TYPE_GENERIC_STATE_MANAGER) + #define TYPE_RAM_DISCARD_MANAGER "qemu:ram-discard-manager" typedef struct RamDiscardManagerClass RamDiscardManagerClass; typedef struct RamDiscardManager RamDiscardManager; DECLARE_OBJ_CHECKERS(RamDiscardManager, RamDiscardManagerClass, RAM_DISCARD_MANAGER, TYPE_RAM_DISCARD_MANAGER); +#define TYPE_PRIVATE_SHARED_MANAGER "private-shared-manager" +typedef struct PrivateSharedManagerClass PrivateSharedManagerClass; +typedef struct PrivateSharedManager PrivateSharedManager; +DECLARE_OBJ_CHECKERS(PrivateSharedManager, PrivateSharedManagerClass, + PRIVATE_SHARED_MANAGER, TYPE_PRIVATE_SHARED_MANAGER) + #ifdef CONFIG_FUZZ void fuzz_dma_read_cb(size_t addr, size_t len, @@ -563,104 +575,59 @@ struct IOMMUMemoryRegionClass { Error **errp); }; -typedef struct RamDiscardListener RamDiscardListener; -typedef int (*NotifyRamPopulate)(RamDiscardListener *rdl, - MemoryRegionSection *section); -typedef void (*NotifyRamDiscard)(RamDiscardListener *rdl, - MemoryRegionSection *section); +typedef int (*ReplayStateChange)(MemoryRegionSection *section, void *opaque); -struct RamDiscardListener { +typedef struct StateChangeListener StateChangeListener; +typedef int (*NotifyStateSet)(StateChangeListener *scl, + MemoryRegionSection *section); +typedef int (*NotifyStateClear)(StateChangeListener *scl, + MemoryRegionSection *section); + +struct StateChangeListener { /* - * @notify_populate: + * @notify_to_state_set: * - * Notification that previously discarded memory is about to get populated. - * Listeners are able to object. If any listener objects, already - * successfully notified listeners are notified about a discard again. + * Notification that previously state clear part is about to be set. * - * @rdl: the #RamDiscardListener getting notified - * @section: the #MemoryRegionSection to get populated. The section + * @scl: the #StateChangeListener getting notified + * @section: the #MemoryRegionSection to be state-set. The section * is aligned within the memory region to the minimum granularity * unless it would exceed the registered section. * * Returns 0 on success. If the notification is rejected by the listener, * an error is returned. */ - NotifyRamPopulate notify_populate; + NotifyStateSet notify_to_state_set; /* - * @notify_discard: + * @notify_to_state_clear: * - * Notification that previously populated memory was discarded successfully - * and listeners should drop all references to such memory and prevent - * new population (e.g., unmap). + * Notification that previously state set part is about to be cleared * - * @rdl: the #RamDiscardListener getting notified - * @section: the #MemoryRegionSection to get populated. The section + * @scl: the #StateChangeListener getting notified + * @section: the #MemoryRegionSection to be state-cleared. The section * is aligned within the memory region to the minimum granularity * unless it would exceed the registered section. - */ - NotifyRamDiscard notify_discard; - - /* - * @double_discard_supported: * - * The listener suppors getting @notify_discard notifications that span - * already discarded parts. + * Returns 0 on success. If the notification is rejected by the listener, + * an error is returned. */ - bool double_discard_supported; + NotifyStateClear notify_to_state_clear; MemoryRegionSection *section; - QLIST_ENTRY(RamDiscardListener) next; }; -static inline void ram_discard_listener_init(RamDiscardListener *rdl, - NotifyRamPopulate populate_fn, - NotifyRamDiscard discard_fn, - bool double_discard_supported) -{ - rdl->notify_populate = populate_fn; - rdl->notify_discard = discard_fn; - rdl->double_discard_supported = double_discard_supported; -} - -typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque); -typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque); - /* - * RamDiscardManagerClass: - * - * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion - * regions are currently populated to be used/accessed by the VM, notifying - * after parts were discarded (freeing up memory) and before parts will be - * populated (consuming memory), to be used/accessed by the VM. - * - * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the - * #MemoryRegion isn't mapped into an address space yet (either directly - * or via an alias); it cannot change while the #MemoryRegion is - * mapped into an address space. - * - * The #RamDiscardManager is intended to be used by technologies that are - * incompatible with discarding of RAM (e.g., VFIO, which may pin all - * memory inside a #MemoryRegion), and require proper coordination to only - * map the currently populated parts, to hinder parts that are expected to - * remain discarded from silently getting populated and consuming memory. - * Technologies that support discarding of RAM don't have to bother and can - * simply map the whole #MemoryRegion. + * GenericStateManagerClass: * - * An example #RamDiscardManager is virtio-mem, which logically (un)plugs - * memory within an assigned RAM #MemoryRegion, coordinated with the VM. - * Logically unplugging memory consists of discarding RAM. The VM agreed to not - * access unplugged (discarded) memory - especially via DMA. virtio-mem will - * properly coordinate with listeners before memory is plugged (populated), - * and after memory is unplugged (discarded). + * A #GenericStateManager is a common interface used to manage the state of + * a #MemoryRegion. The managed states is a pair of opposite states, such as + * populated and discarded, or private and shared. It is abstract as set and + * clear in below callbacks, and the actual state is managed by the + * implementation. * - * Listeners are called in multiples of the minimum granularity (unless it - * would exceed the registered range) and changes are aligned to the minimum - * granularity within the #MemoryRegion. Listeners have to prepare for memory - * becoming discarded in a different granularity than it was populated and the - * other way around. */ -struct RamDiscardManagerClass { +struct GenericStateManagerClass { /* private */ InterfaceClass parent_class; @@ -670,120 +637,229 @@ struct RamDiscardManagerClass { * @get_min_granularity: * * Get the minimum granularity in which listeners will get notified - * about changes within the #MemoryRegion via the #RamDiscardManager. + * about changes within the #MemoryRegion via the #GenericStateManager. * - * @rdm: the #RamDiscardManager + * @gsm: the #GenericStateManager * @mr: the #MemoryRegion * * Returns the minimum granularity. */ - uint64_t (*get_min_granularity)(const RamDiscardManager *rdm, + uint64_t (*get_min_granularity)(const GenericStateManager *gsm, const MemoryRegion *mr); /** - * @is_populated: + * @is_state_set: * - * Check whether the given #MemoryRegionSection is completely populated - * (i.e., no parts are currently discarded) via the #RamDiscardManager. - * There are no alignment requirements. + * Check whether the given #MemoryRegionSection state is set. + * via the #GenericStateManager. * - * @rdm: the #RamDiscardManager + * @gsm: the #GenericStateManager * @section: the #MemoryRegionSection * - * Returns whether the given range is completely populated. + * Returns whether the given range is completely set. */ - bool (*is_populated)(const RamDiscardManager *rdm, + bool (*is_state_set)(const GenericStateManager *gsm, const MemoryRegionSection *section); /** - * @replay_populated: + * @replay_on_state_set: * - * Call the #ReplayRamPopulate callback for all populated parts within the - * #MemoryRegionSection via the #RamDiscardManager. + * Call the #ReplayStateChange callback for all state set parts within the + * #MemoryRegionSection via the #GenericStateManager. * * In case any call fails, no further calls are made. * - * @rdm: the #RamDiscardManager + * @gsm: the #GenericStateManager * @section: the #MemoryRegionSection - * @replay_fn: the #ReplayRamPopulate callback + * @replay_fn: the #ReplayStateChange callback * @opaque: pointer to forward to the callback * * Returns 0 on success, or a negative error if any notification failed. */ - int (*replay_populated)(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamPopulate replay_fn, void *opaque); + int (*replay_on_state_set)(const GenericStateManager *gsm, + MemoryRegionSection *section, + ReplayStateChange replay_fn, void *opaque); /** - * @replay_discarded: + * @replay_on_state_clear: + * + * Call the #ReplayStateChange callback for all state clear parts within the + * #MemoryRegionSection via the #GenericStateManager. * - * Call the #ReplayRamDiscard callback for all discarded parts within the - * #MemoryRegionSection via the #RamDiscardManager. + * In case any call fails, no further calls are made. * - * @rdm: the #RamDiscardManager + * @gsm: the #GenericStateManager * @section: the #MemoryRegionSection - * @replay_fn: the #ReplayRamDiscard callback + * @replay_fn: the #ReplayStateChange callback * @opaque: pointer to forward to the callback + * + * Returns 0 on success, or a negative error if any notification failed. */ - void (*replay_discarded)(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, void *opaque); + int (*replay_on_state_clear)(const GenericStateManager *gsm, + MemoryRegionSection *section, + ReplayStateChange replay_fn, void *opaque); /** * @register_listener: * - * Register a #RamDiscardListener for the given #MemoryRegionSection and - * immediately notify the #RamDiscardListener about all populated parts - * within the #MemoryRegionSection via the #RamDiscardManager. + * Register a #StateChangeListener for the given #MemoryRegionSection and + * immediately notify the #StateChangeListener about all state-set parts + * within the #MemoryRegionSection via the #GenericStateManager. * * In case any notification fails, no further notifications are triggered * and an error is logged. * - * @rdm: the #RamDiscardManager - * @rdl: the #RamDiscardListener + * @rdm: the #GenericStateManager + * @rdl: the #StateChangeListener * @section: the #MemoryRegionSection */ - void (*register_listener)(RamDiscardManager *rdm, - RamDiscardListener *rdl, + void (*register_listener)(GenericStateManager *gsm, + StateChangeListener *scl, MemoryRegionSection *section); /** * @unregister_listener: * - * Unregister a previously registered #RamDiscardListener via the - * #RamDiscardManager after notifying the #RamDiscardListener about all - * populated parts becoming unpopulated within the registered + * Unregister a previously registered #StateChangeListener via the + * #GenericStateManager after notifying the #StateChangeListener about all + * state-set parts becoming state-cleared within the registered * #MemoryRegionSection. * - * @rdm: the #RamDiscardManager - * @rdl: the #RamDiscardListener + * @rdm: the #GenericStateManager + * @rdl: the #StateChangeListener + */ + void (*unregister_listener)(GenericStateManager *gsm, + StateChangeListener *scl); +}; + +uint64_t generic_state_manager_get_min_granularity(const GenericStateManager *gsm, + const MemoryRegion *mr); + +bool generic_state_manager_is_state_set(const GenericStateManager *gsm, + const MemoryRegionSection *section); + +int generic_state_manager_replay_on_state_set(const GenericStateManager *gsm, + MemoryRegionSection *section, + ReplayStateChange replay_fn, + void *opaque); + +int generic_state_manager_replay_on_state_clear(const GenericStateManager *gsm, + MemoryRegionSection *section, + ReplayStateChange replay_fn, + void *opaque); + +void generic_state_manager_register_listener(GenericStateManager *gsm, + StateChangeListener *scl, + MemoryRegionSection *section); + +void generic_state_manager_unregister_listener(GenericStateManager *gsm, + StateChangeListener *scl); + +static inline void state_change_listener_init(StateChangeListener *scl, + NotifyStateSet state_set_fn, + NotifyStateClear state_clear_fn) +{ + scl->notify_to_state_set = state_set_fn; + scl->notify_to_state_clear = state_clear_fn; +} + +typedef struct RamDiscardListener RamDiscardListener; + +struct RamDiscardListener { + struct StateChangeListener scl; + + /* + * @double_discard_supported: + * + * The listener suppors getting @notify_discard notifications that span + * already discarded parts. */ - void (*unregister_listener)(RamDiscardManager *rdm, - RamDiscardListener *rdl); + bool double_discard_supported; + + QLIST_ENTRY(RamDiscardListener) next; +}; + +static inline void ram_discard_listener_init(RamDiscardListener *rdl, + NotifyStateSet populate_fn, + NotifyStateClear discard_fn, + bool double_discard_supported) +{ + state_change_listener_init(&rdl->scl, populate_fn, discard_fn); + rdl->double_discard_supported = double_discard_supported; +} + +/* + * RamDiscardManagerClass: + * + * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion + * regions are currently populated to be used/accessed by the VM, notifying + * after parts were discarded (freeing up memory) and before parts will be + * populated (consuming memory), to be used/accessed by the VM. + * + * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the + * #MemoryRegion isn't mapped into an address space yet (either directly + * or via an alias); it cannot change while the #MemoryRegion is + * mapped into an address space. + * + * The #RamDiscardManager is intended to be used by technologies that are + * incompatible with discarding of RAM (e.g., VFIO, which may pin all + * memory inside a #MemoryRegion), and require proper coordination to only + * map the currently populated parts, to hinder parts that are expected to + * remain discarded from silently getting populated and consuming memory. + * Technologies that support discarding of RAM don't have to bother and can + * simply map the whole #MemoryRegion. + * + * An example #RamDiscardManager is virtio-mem, which logically (un)plugs + * memory within an assigned RAM #MemoryRegion, coordinated with the VM. + * Logically unplugging memory consists of discarding RAM. The VM agreed to not + * access unplugged (discarded) memory - especially via DMA. virtio-mem will + * properly coordinate with listeners before memory is plugged (populated), + * and after memory is unplugged (discarded). + * + * Listeners are called in multiples of the minimum granularity (unless it + * would exceed the registered range) and changes are aligned to the minimum + * granularity within the #MemoryRegion. Listeners have to prepare for memory + * becoming discarded in a different granularity than it was populated and the + * other way around. + */ +struct RamDiscardManagerClass { + /* private */ + GenericStateManagerClass parent_class; }; -uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, - const MemoryRegion *mr); +#define PRIVATE_SHARED_LISTENER_PRIORITY_MIN 0 +#define PRIVATE_SHARED_LISTENER_PRIORITY_COMMON 10 -bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, - const MemoryRegionSection *section); +typedef struct PrivateSharedListener PrivateSharedListener; +struct PrivateSharedListener { + struct StateChangeListener scl; -int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamPopulate replay_fn, - void *opaque); + /* + * @priority: + * + * Govern the order in which ram discard listeners are invoked. Lower priorities + * are invoked earlier. + * The listener priority can help to undo the effects of previous listeners in + * a reverse order in case of a failure callback. + */ + int priority; -void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, - void *opaque); + QTAILQ_ENTRY(PrivateSharedListener) next; +}; -void ram_discard_manager_register_listener(RamDiscardManager *rdm, - RamDiscardListener *rdl, - MemoryRegionSection *section); +struct PrivateSharedManagerClass { + /* private */ + GenericStateManagerClass parent_class; +}; -void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, - RamDiscardListener *rdl); +static inline void private_shared_listener_init(PrivateSharedListener *psl, + NotifyStateSet populate_fn, + NotifyStateClear discard_fn, + int priority) +{ + state_change_listener_init(&psl->scl, populate_fn, discard_fn); + psl->priority = priority; +} bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, ram_addr_t *ram_addr, bool *read_only, @@ -850,7 +926,7 @@ struct MemoryRegion { const char *name; unsigned ioeventfd_nb; MemoryRegionIoeventfd *ioeventfds; - RamDiscardManager *rdm; /* Only for RAM */ + GenericStateManager *gsm; /* Only for RAM */ /* For devices designed to perform re-entrant IO into their own IO MRs */ bool disable_reentrancy_guard; @@ -1272,6 +1348,33 @@ MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s); */ void memory_region_section_free_copy(MemoryRegionSection *s); +/** + * memory_region_section_intersect_range: Adjust the memory section to cover + * the intersection with the given range. + * + * @s: the #MemoryRegionSection to be adjusted + * @offset: the offset of the given range in the memory region + * @size: the size of the given range + * + * Returns false if the intersection is empty, otherwise returns true. + */ +static inline bool memory_region_section_intersect_range(MemoryRegionSection *s, + uint64_t offset, uint64_t size) +{ + uint64_t start = MAX(s->offset_within_region, offset); + Int128 end = int128_min(int128_add(int128_make64(s->offset_within_region), s->size), + int128_add(int128_make64(offset), int128_make64(size))); + + if (int128_le(end, int128_make64(start))) { + return false; + } + + s->offset_within_address_space += start - s->offset_within_region; + s->offset_within_region = start; + s->size = int128_sub(end, int128_make64(start)); + return true; +} + /** * memory_region_init: Initialize a memory region * @@ -2501,14 +2604,28 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr); bool memory_region_is_mapped(MemoryRegion *mr); /** - * memory_region_get_ram_discard_manager: get the #RamDiscardManager for a + * memory_region_get_generic_state_manager: get the #GenericStateManager for a + * #MemoryRegion + * + * The #GenericStateManager cannot change while a memory region is mapped. + * + * @mr: the #MemoryRegion + */ +GenericStateManager *memory_region_get_generic_state_manager(MemoryRegion *mr); + +/** + * memory_region_set_generic_state_manager: set the #GenericStateManager for a * #MemoryRegion * - * The #RamDiscardManager cannot change while a memory region is mapped. + * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion + * that does not cover RAM, or a #MemoryRegion that already has a + * #GenericStateManager assigned. Return 0 if the gsm is set successfully. * * @mr: the #MemoryRegion + * @gsm: #GenericStateManager to set */ -RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr); +int memory_region_set_generic_state_manager(MemoryRegion *mr, + GenericStateManager *gsm); /** * memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a @@ -2516,24 +2633,15 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr); * * @mr: the #MemoryRegion */ -static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr) -{ - return !!memory_region_get_ram_discard_manager(mr); -} +bool memory_region_has_ram_discard_manager(MemoryRegion *mr); /** - * memory_region_set_ram_discard_manager: set the #RamDiscardManager for a - * #MemoryRegion - * - * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion - * that does not cover RAM, or a #MemoryRegion that already has a - * #RamDiscardManager assigned. + * memory_region_has_private_shared_manager: check whether a #MemoryRegion has a + * #PrivateSharedManager assigned * * @mr: the #MemoryRegion - * @rdm: #RamDiscardManager to set */ -void memory_region_set_ram_discard_manager(MemoryRegion *mr, - RamDiscardManager *rdm); +bool memory_region_has_private_shared_manager(MemoryRegion *mr); /** * memory_region_find: translate an address/size relative to a diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h index 06ca1d90b222483ffb769ab3c83f6056f0ebbb11..8fed25706bf86356f987d9649bb483be333ffaef 100644 --- a/include/hw/arm/boot.h +++ b/include/hw/arm/boot.h @@ -133,9 +133,23 @@ struct arm_boot_info { bool secure_board_setup; arm_endianness endianness; + + /* Used when loading firmware into RAM */ hwaddr firmware_base; hwaddr firmware_max_size; + /* + * Instead of starting in a small bootloader that jumps to the kernel, + * immediately start in the kernel. + */ + bool skip_bootloader; + + /* + * Confidential guest boot loads everything into RAM so it can be measured. + */ bool confidential; + /* measurement log location in guest memory */ + hwaddr log_paddr; + size_t log_size; }; /** diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 3e2759d225066c96e9a408dda79e1520639c06f1..fee7c27e0c18c8810b6f86179d56d4ad4a52813c 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -225,7 +225,7 @@ struct VirtMachineState { bool cpu_hotplug_enabled; bool ras; bool mte; - bool dtb_randomness; + OnOffAuto dtb_randomness; bool pmu; int smmu_accel_count; OnOffAuto acpi; @@ -254,6 +254,7 @@ struct VirtMachineState { char *oem_table_id; char *kvm_type; NotifierList cpuhp_notifiers; + Object *event_log; }; #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) diff --git a/include/hw/loader.h b/include/hw/loader.h index 8685e27334f9c046f651a702bd27a92760c4d211..3a5212b8974eefa01f56d8ad31b9509612e27d84 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -356,6 +356,25 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict); ssize_t rom_add_vga(const char *file); ssize_t rom_add_option(const char *file, int32_t bootindex); +typedef struct RomLoaderNotifyData { + /* Description of the loaded ROM */ + const char *name; + /* Blob */ + void *blob_ptr; + /* Address of the blob in guest memory */ + hwaddr addr; + /* Length of the blob */ + size_t len; +} RomLoaderNotifyData; + +/** + * rom_add_load_notifier - Add a notifier for loaded images + * + * Add a notifier that will be invoked with a RomLoaderNotifyData structure for + * each blob loaded into guest memory, after the blob is loaded. + */ +void rom_add_load_notifier(Notifier *notifier); + /* This is the usual maximum in uboot, so if a uImage overflows this, it would * overflow on real hardware too. */ #define UBOOT_MAX_GUNZIP_BYTES (64 << 20) diff --git a/include/hw/tpm/tpm_log.h b/include/hw/tpm/tpm_log.h new file mode 100644 index 0000000000000000000000000000000000000000..b3cd2e75636c6159d3dc4e535ecdb262f98f6b19 --- /dev/null +++ b/include/hw/tpm/tpm_log.h @@ -0,0 +1,89 @@ +#ifndef QEMU_TPM_LOG_H +#define QEMU_TPM_LOG_H + +#include "qom/object.h" +#include "sysemu/tpm.h" + +/* + * Defined in: TCG Algorithm Registry + * Family 2.0 Level 00 Revision 01.34 + * + * (Here TCG stands for Trusted Computing Group) + */ +#define TCG_ALG_SHA256 0xB +#define TCG_ALG_SHA512 0xD + +/* Size of a digest in bytes */ +#define TCG_ALG_SHA256_DIGEST_SIZE 32 +#define TCG_ALG_SHA512_DIGEST_SIZE 64 + +/* + * Defined in: TCG PC Client Platform Firmware Profile Specification + * Version 1.06 revision 52 + */ +#define TCG_EV_NO_ACTION 0x00000003 +#define TCG_EV_EVENT_TAG 0x00000006 +#define TCG_EV_POST_CODE2 0x00000013 +#define TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2 0x8000000A + +struct UefiPlatformFirmwareBlob2Head { + uint8_t blob_description_size; + uint8_t blob_description[]; +} __attribute__((packed)); + +struct UefiPlatformFirmwareBlob2Tail { + uint64_t blob_base; + uint64_t blob_size; +} __attribute__((packed)); + +#define TYPE_TPM_LOG "tpm-log" + +OBJECT_DECLARE_SIMPLE_TYPE(TpmLog, TPM_LOG) + +/** + * tpm_log_create - Create the event log + * @log: the log object + * @max_size: maximum size of the log. Adding an event past that size will + * return an error + * @errp: pointer to a NULL-initialized error object + * + * Allocate the event log and create the initial entry (Spec ID Event03) + * describing the log format. + * + * Returns: 0 on success, -1 on error + */ +int tpm_log_create(TpmLog *log, size_t max_size, Error **errp); + +/** + * tpm_log_add_event - Append an event to the log + * @log: the log object + * @event_type: the `eventType` field in TCG_PCR_EVENT2 + * @event: the `event` field in TCG_PCR_EVENT2 + * @event_size: the `eventSize` field in TCG_PCR_EVENT2 + * @data: content to be hashed into the event digest. May be NULL. + * @data_size: size of @data. Should be zero when @data is NULL. + * @errp: pointer to a NULL-initialized error object + * + * Add a TCG_PCR_EVENT2 event to the event log. Depending on the event type, a + * data buffer may be hashed into the event digest (for example + * TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2 contains a digest of the blob.) + * + * Returns: 0 on success, -1 on error + */ +int tpm_log_add_event(TpmLog *log, uint32_t event_type, const uint8_t *event, + size_t event_size, const uint8_t *data, size_t data_size, + Error **errp); + +/** + * tpm_log_write_and_close - Move the log to guest memory + * @log: the log object + * @errp: pointer to a NULL-initialized error object + * + * Write the log into memory, at the address set in the load-addr property. + * After this operation, the log is not writable anymore. + * + * Return: 0 on success, -1 on error + */ +int tpm_log_write_and_close(TpmLog *log, Error **errp); + +#endif diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 7a4c575115a74112bff1eb21890b2e7f7b1014e6..faed33bf92bc09091f2b54859280bd704a5ee56d 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -46,6 +46,7 @@ typedef struct VFIOContainerBase { bool dirty_pages_supported; QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_HEAD(, VFIOPrivateSharedListener) vpsl_list; QLIST_ENTRY(VFIOContainerBase) next; QLIST_HEAD(, VFIODevice) device_list; GList *iova_ranges; @@ -69,6 +70,15 @@ typedef struct VFIORamDiscardListener { QLIST_ENTRY(VFIORamDiscardListener) next; } VFIORamDiscardListener; +typedef struct VFIOPrivateSharedListener { + VFIOContainerBase *bcontainer; + MemoryRegion *mr; + hwaddr offset_within_address_space; + uint64_t granularity; + PrivateSharedListener listener; + QLIST_ENTRY(VFIOPrivateSharedListener) next; +} VFIOPrivateSharedListener; + int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly); diff --git a/include/qom/object.h b/include/qom/object.h index afccd24ca7ab7197dc7169ba4394d3957657d1fa..f52ab216cdde6eadfe160fb9e70d171a0b4114cb 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -259,31 +259,23 @@ struct Object /** - * OBJECT_DEFINE_TYPE_EXTENDED: + * DO_OBJECT_DEFINE_TYPE_EXTENDED: * @ModuleObjName: the object name with initial caps * @module_obj_name: the object name in lowercase with underscore separators * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore * separators * @ABSTRACT: boolean flag to indicate whether the object can be instantiated + * @CLASS_SIZE: size of the type's class * @...: list of initializers for "InterfaceInfo" to declare implemented interfaces * - * This macro is typically used in a source file, and will: - * - * - declare prototypes for _finalize, _class_init and _init methods - * - declare the TypeInfo struct instance - * - provide the constructor to register the type - * - * After using this macro, implementations of the _finalize, _class_init, - * and _init methods need to be written. Any of these can be zero-line - * no-op impls if no special logic is required for a given type. - * - * This macro should rarely be used, instead one of the more specialized - * macros is usually a better choice. + * This is the base macro used to implement all the OBJECT_DEFINE_* + * macros. It should never be used directly in a source file. */ -#define OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ - MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ - ABSTRACT, ...) \ +#define DO_OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, \ + PARENT_MODULE_OBJ_NAME, \ + ABSTRACT, CLASS_SIZE, ...) \ static void \ module_obj_name##_finalize(Object *obj); \ static void \ @@ -298,7 +290,7 @@ struct Object .instance_align = __alignof__(ModuleObjName), \ .instance_init = module_obj_name##_init, \ .instance_finalize = module_obj_name##_finalize, \ - .class_size = sizeof(ModuleObjName##Class), \ + .class_size = CLASS_SIZE, \ .class_init = module_obj_name##_class_init, \ .abstract = ABSTRACT, \ .interfaces = (InterfaceInfo[]) { __VA_ARGS__ } , \ @@ -311,6 +303,37 @@ struct Object } \ type_init(module_obj_name##_register_types); +/** + * OBJECT_DEFINE_TYPE_EXTENDED: + * @ModuleObjName: the object name with initial caps + * @module_obj_name: the object name in lowercase with underscore separators + * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators + * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore + * separators + * @ABSTRACT: boolean flag to indicate whether the object can be instantiated + * @...: list of initializers for "InterfaceInfo" to declare implemented interfaces + * + * This macro is typically used in a source file, and will: + * + * - declare prototypes for _finalize, _class_init and _init methods + * - declare the TypeInfo struct instance + * - provide the constructor to register the type + * + * After using this macro, implementations of the _finalize, _class_init, + * and _init methods need to be written. Any of these can be zero-line + * no-op impls if no special logic is required for a given type. + * + * This macro should rarely be used, instead one of the more specialized + * macros is usually a better choice. + */ +#define OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ + ABSTRACT, ...) \ + DO_OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ + ABSTRACT, sizeof(ModuleObjName##Class), \ + __VA_ARGS__) + /** * OBJECT_DEFINE_TYPE: * @ModuleObjName: the object name with initial caps @@ -368,6 +391,45 @@ struct Object MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ true, { NULL }) +/** + * OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES: + * @ModuleObjName: the object name with initial caps + * @module_obj_name: the object name in lowercase with underscore separators + * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators + * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore + * separators + * + * This is a variant of OBJECT_DEFINE_TYPE_EXTENDED, which is suitable for + * the case of a non-abstract type, with interfaces, and with no requirement + * for a class struct. + */ +#define OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(ModuleObjName, \ + module_obj_name, \ + MODULE_OBJ_NAME, \ + PARENT_MODULE_OBJ_NAME, ...) \ + DO_OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \ + false, 0, __VA_ARGS__) + +/** + * OBJECT_DEFINE_SIMPLE_TYPE: + * @ModuleObjName: the object name with initial caps + * @module_obj_name: the object name in lowercase with underscore separators + * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators + * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore + * separators + * + * This is a variant of OBJECT_DEFINE_TYPE_EXTENDED, which is suitable for + * the common case of a non-abstract type, without any interfaces, and with + * no requirement for a class struct. If you declared your type with + * OBJECT_DECLARE_SIMPLE_TYPE then this is probably the right choice for + * defining it. + */ +#define OBJECT_DEFINE_SIMPLE_TYPE(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME) \ + OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(ModuleObjName, module_obj_name, \ + MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, { NULL }) + /** * struct TypeInfo: * @name: The name of the type. diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 098257e72f08dff9155d36fe39df1a5786c2fa7c..5f3f779de452a3674089996238d58384a3706a94 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -604,4 +604,7 @@ int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_star int kvm_create_shadow_device(PCIDevice *dev); int kvm_delete_shadow_device(PCIDevice *dev); #endif + +void kvm_mark_guest_state_protected(void); + #endif diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index b2d2c5947795868a2c5e003b3f7bd45c6ef4372a..9a7bc1a4b8b4d51baa86889c218267e369b0f799 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -87,6 +87,7 @@ struct KVMState bool kernel_irqchip_required; OnOffAuto kernel_irqchip_split; bool sync_mmu; + bool guest_state_protected; uint64_t manual_dirty_log_protect; /* The man page (and posix) say ioctl numbers are signed int, but * they're not. Linux, glibc and *BSD all treat ioctl numbers as diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 552fdcb18f290e4e7a2217ba03a8dc73a5463674..777b6688511051d150571265439486a0e5c95316 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -110,7 +110,8 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ -#define KVM_ARM_VCPU_TEC 8 /* VCPU TEC state as part of cvm */ +#define KVM_ARM_VCPU_REC 8 /* VCPU REC state as part of Realm */ +#define KVM_ARM_VCPU_HAS_EL2_E2H0 9 /* Limit NV support to E2H RES0 */ struct kvm_vcpu_init { __u32 target; @@ -366,6 +367,7 @@ enum { KVM_REG_ARM_STD_HYP_BIT_PV_TIME = 0, }; +/* Vendor hyper call function numbers 0-63 */ #define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2) enum { @@ -373,6 +375,14 @@ enum { KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1, }; +/* Vendor hyper call function numbers 64-127 */ +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2 KVM_REG_ARM_FW_FEAT_BMAP_REG(3) + +enum { + KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER = 0, + KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS = 1, +}; + /* Device Control API on vm fd */ #define KVM_ARM_VM_SMCCC_CTRL 0 #define KVM_ARM_VM_SMCCC_FILTER 0 @@ -395,6 +405,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) @@ -407,6 +418,54 @@ enum { #define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3 #define KVM_DEV_ARM_ITS_CTRL_RESET 4 +/* KVM_CAP_ARM_RME on VM fd */ +#define KVM_CAP_ARM_RME_CONFIG_REALM 0 +#define KVM_CAP_ARM_RME_CREATE_REALM 1 +#define KVM_CAP_ARM_RME_INIT_RIPAS_REALM 2 +#define KVM_CAP_ARM_RME_POPULATE_REALM 3 +#define KVM_CAP_ARM_RME_ACTIVATE_REALM 4 + +/* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */ +#define ARM_RME_CONFIG_RPV 0 +#define ARM_RME_CONFIG_HASH_ALGO 1 + +#define ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256 0 +#define ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512 1 + +#define ARM_RME_CONFIG_RPV_SIZE 64 + +struct arm_rme_config { + __u32 cfg; + union { + /* cfg == ARM_RME_CONFIG_RPV */ + struct { + __u8 rpv[ARM_RME_CONFIG_RPV_SIZE]; + }; + + /* cfg == ARM_RME_CONFIG_HASH_ALGO */ + struct { + __u32 hash_algo; + }; + + /* Fix the size of the union */ + __u8 reserved[256]; + }; +}; + +#define KVM_ARM_RME_POPULATE_FLAGS_MEASURE (1 << 0) +struct arm_rme_populate_realm { + __u64 base; + __u64 size; + __u32 flags; + __u32 reserved[3]; +}; + +struct arm_rme_init_ripas { + __u64 base; + __u64 size; + __u64 reserved[2]; +}; + /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index d3bf7fac004bde381bf0ed20947b5a8ffa9dc8b0..96bc60475e84fa21accd0c2ce517af237980af6b 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -924,14 +924,25 @@ struct kvm_ppc_resize_hpt { #define KVM_S390_SIE_PAGE_OFFSET 1 /* - * On arm64, machine type can be used to request the physical - * address size for the VM. Bits[7-0] are reserved for the guest - * PA size shift (i.e, log2(PA_Size)). For backward compatibility, - * value 0 implies the default IPA size, 40bits. + * On arm64, machine type can be used to request both the machine type and + * the physical address size for the VM. + * + * Bits[11-8] are reserved for the ARM specific machine type. + * + * Bits[7-0] are reserved for the guest PA size shift (i.e, log2(PA_Size)). + * For backward compatibility, value 0 implies the default IPA size, 40bits. */ +#define KVM_VM_TYPE_ARM_SHIFT 8 +#define KVM_VM_TYPE_ARM_MASK (0xfULL << KVM_VM_TYPE_ARM_SHIFT) +#define KVM_VM_TYPE_ARM(_type) \ + (((_type) << KVM_VM_TYPE_ARM_SHIFT) & KVM_VM_TYPE_ARM_MASK) +#define KVM_VM_TYPE_ARM_NORMAL KVM_VM_TYPE_ARM(0) +#define KVM_VM_TYPE_ARM_REALM KVM_VM_TYPE_ARM(1) + #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL #define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) + /* * ioctls for /dev/kvm fds: */ @@ -1206,8 +1217,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 - -#define KVM_CAP_ARM_TMM 300 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_RME 300 #define KVM_CAP_SEV_ES_GHCB 500 #define KVM_CAP_HYGON_COCO_EXT 501 @@ -2451,4 +2462,11 @@ struct kvm_s390_zpci_op { #define KVM_GET_TMI_VERSION _IOR(KVMIO, 0xd2, uint64_t) #define MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM 0x20001 +/* Available with KVM_CAP_ARM_RME, only for VMs with KVM_VM_TYPE_ARM_REALM */ +struct kvm_arm_rmm_psci_complete { + __u64 target_mpidr; + __u32 psci_status; + __u32 padding[3]; +}; + #endif /* __LINUX_KVM_H */ diff --git a/migration/ram.c b/migration/ram.c index 91bec89a6e80d74712594809a1cbe9dada709c89..e6baecf14306c74b3d1a84e4372aeadeb4689e1e 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -841,8 +841,8 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, return ret; } -static void dirty_bitmap_clear_section(MemoryRegionSection *section, - void *opaque) +static int dirty_bitmap_clear_section(MemoryRegionSection *section, + void *opaque) { const hwaddr offset = section->offset_within_region; const hwaddr size = int128_get64(section->size); @@ -861,6 +861,7 @@ static void dirty_bitmap_clear_section(MemoryRegionSection *section, } *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages); bitmap_clear(rb->bmap, start, npages); + return 0; } /* @@ -881,14 +882,14 @@ static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb) uint64_t cleared_bits = 0; if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); + GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr); MemoryRegionSection section = { .mr = rb->mr, .offset_within_region = 0, .size = int128_make64(qemu_ram_get_used_length(rb)), }; - ram_discard_manager_replay_discarded(rdm, §ion, + generic_state_manager_replay_on_state_clear(gsm, §ion, dirty_bitmap_clear_section, &cleared_bits); } @@ -904,14 +905,14 @@ static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb) bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start) { if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); + GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr); MemoryRegionSection section = { .mr = rb->mr, .offset_within_region = start, .size = int128_make64(qemu_ram_pagesize(rb)), }; - return !ram_discard_manager_is_populated(rdm, §ion); + return !generic_state_manager_is_state_set(gsm, §ion); } return false; } @@ -1731,14 +1732,14 @@ static void ram_block_populate_read(RAMBlock *rb) * Note: The result is only stable while migrating (precopy/postcopy). */ if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); + GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr); MemoryRegionSection section = { .mr = rb->mr, .offset_within_region = 0, .size = rb->mr->size, }; - ram_discard_manager_replay_populated(rdm, §ion, + generic_state_manager_replay_on_state_set(gsm, §ion, populate_read_section, NULL); } else { populate_read_range(rb, 0, rb->used_length); @@ -1790,14 +1791,14 @@ static int ram_block_uffd_protect(RAMBlock *rb, int uffd_fd) /* See ram_block_populate_read() */ if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr); + GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr); MemoryRegionSection section = { .mr = rb->mr, .offset_within_region = 0, .size = rb->mr->size, }; - return ram_discard_manager_replay_populated(rdm, §ion, + return generic_state_manager_replay_on_state_set(gsm, §ion, uffd_protect_section, (void *)(uintptr_t)uffd_fd); } diff --git a/qapi/qom.json b/qapi/qom.json index a5336e6b11accaf35668f3a7ea4f499486556603..e0590a6019dfbca03db02463d0170da3d953d70e 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -952,6 +952,45 @@ '*kae': 'uint32', '*measurement-algo': 'TmmGuestMeasurementAlgo' } } +## +# @RmeGuestMeasurementAlgorithm: +# +# @sha256: Use the SHA256 algorithm +# +# @sha512: Use the SHA512 algorithm +# +# Algorithm to use for realm measurements +# +# Since: 10.0 +## +{ 'enum': 'RmeGuestMeasurementAlgorithm', + 'data': ['sha256', 'sha512'] } + +## +# @RmeGuestProperties: +# +# Properties for rme-guest objects. +# +# @personalization-value: a base64 string encoding a 64-byte (512-bit) value. +# This optional parameter allows to uniquely identify the VM instance +# during attestation. (default: all-zero) +# +# @measurement-algorithm: Realm measurement algorithm +# (default: sha512) +# +# @measurement-log: Enable a measurement log for the Realm. All events +# that contribute to the Realm Initial Measurement (RIM) are added +# to a log in TCG TPM2 format, which is itself loaded into Realm +# memory (unmeasured) and can then be read by a verifier to +# reconstruct the RIM. +# +# Since: 10.0 +## +{ 'struct': 'RmeGuestProperties', + 'data': { '*personalization-value': 'str', + '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm', + '*measurement-log': 'bool'} } + ## # @ObjectType: # @@ -999,6 +1038,7 @@ { 'name': 'pr-manager-helper', 'if': 'CONFIG_LINUX' }, 'qtest', + 'rme-guest', 'rng-builtin', 'rng-egd', { 'name': 'rng-random', @@ -1069,6 +1109,7 @@ 'pr-manager-helper': { 'type': 'PrManagerHelperProperties', 'if': 'CONFIG_LINUX' }, 'qtest': 'QtestProperties', + 'rme-guest': 'RmeGuestProperties', 'rng-builtin': 'RngProperties', 'rng-egd': 'RngEgdProperties', 'rng-random': { 'type': 'RngRandomProperties', diff --git a/qapi/tpm.json b/qapi/tpm.json index a754455ca5581c1bdd9cb84d204d7664179e1a6b..a051d7bf5c5d8a9c80a02d7dd7d41549fcfaed5f 100644 --- a/qapi/tpm.json +++ b/qapi/tpm.json @@ -186,3 +186,17 @@ ## { 'command': 'query-tpm', 'returns': ['TPMInfo'], 'if': 'CONFIG_TPM' } + +## +# @TpmLogDigestAlgo: +# +# @sha256: Use the SHA256 algorithm +# +# @sha512: Use the SHA512 algorithm +# +# Algorithm to use for event log digests +# +# Since: 9.3 +## +{ 'enum': 'TpmLogDigestAlgo', + 'data': ['sha256', 'sha512'] } diff --git a/system/memory.c b/system/memory.c index 607ce9cf60c4117cd08e8a1ac6e0a33cfdf63127..fa990097013936c6e937a11923dc9e16165abbcf 100644 --- a/system/memory.c +++ b/system/memory.c @@ -2113,79 +2113,103 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr) return imrc->num_indexes(iommu_mr); } -RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr) +GenericStateManager *memory_region_get_generic_state_manager(MemoryRegion *mr) { if (!memory_region_is_ram(mr)) { return NULL; } - return mr->rdm; + return mr->gsm; } -void memory_region_set_ram_discard_manager(MemoryRegion *mr, - RamDiscardManager *rdm) +int memory_region_set_generic_state_manager(MemoryRegion *mr, + GenericStateManager *gsm) { g_assert(memory_region_is_ram(mr)); - g_assert(!rdm || !mr->rdm); - mr->rdm = rdm; + if (mr->gsm && gsm) { + return -EBUSY; + } + + mr->gsm = gsm; + return 0; +} + +bool memory_region_has_ram_discard_manager(MemoryRegion *mr) +{ + if (!memory_region_is_ram(mr) || + !object_dynamic_cast(OBJECT(mr->gsm), TYPE_RAM_DISCARD_MANAGER)) { + return false; + } + + return true; +} + +bool memory_region_has_private_shared_manager(MemoryRegion *mr) +{ + if (!memory_region_is_ram(mr) || + !object_dynamic_cast(OBJECT(mr->gsm), TYPE_PRIVATE_SHARED_MANAGER)) { + return false; + } + + return true; } -uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, - const MemoryRegion *mr) +uint64_t generic_state_manager_get_min_granularity(const GenericStateManager *gsm, + const MemoryRegion *mr) { - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); - g_assert(rdmc->get_min_granularity); - return rdmc->get_min_granularity(rdm, mr); + g_assert(gsmc->get_min_granularity); + return gsmc->get_min_granularity(gsm, mr); } -bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, - const MemoryRegionSection *section) +bool generic_state_manager_is_state_set(const GenericStateManager *gsm, + const MemoryRegionSection *section) { - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); - g_assert(rdmc->is_populated); - return rdmc->is_populated(rdm, section); + g_assert(gsmc->is_state_set); + return gsmc->is_state_set(gsm, section); } -int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamPopulate replay_fn, - void *opaque) +int generic_state_manager_replay_on_state_set(const GenericStateManager *gsm, + MemoryRegionSection *section, + ReplayStateChange replay_fn, + void *opaque) { - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); - g_assert(rdmc->replay_populated); - return rdmc->replay_populated(rdm, section, replay_fn, opaque); + g_assert(gsmc->replay_on_state_set); + return gsmc->replay_on_state_set(gsm, section, replay_fn, opaque); } -void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, - void *opaque) +int generic_state_manager_replay_on_state_clear(const GenericStateManager *gsm, + MemoryRegionSection *section, + ReplayStateChange replay_fn, + void *opaque) { - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); - g_assert(rdmc->replay_discarded); - rdmc->replay_discarded(rdm, section, replay_fn, opaque); + g_assert(gsmc->replay_on_state_clear); + return gsmc->replay_on_state_clear(gsm, section, replay_fn, opaque); } -void ram_discard_manager_register_listener(RamDiscardManager *rdm, - RamDiscardListener *rdl, - MemoryRegionSection *section) +void generic_state_manager_register_listener(GenericStateManager *gsm, + StateChangeListener *scl, + MemoryRegionSection *section) { - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); - g_assert(rdmc->register_listener); - rdmc->register_listener(rdm, rdl, section); + g_assert(gsmc->register_listener); + gsmc->register_listener(gsm, scl, section); } -void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, - RamDiscardListener *rdl) +void generic_state_manager_unregister_listener(GenericStateManager *gsm, + StateChangeListener *scl) { - RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); + GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm); - g_assert(rdmc->unregister_listener); - rdmc->unregister_listener(rdm, rdl); + g_assert(gsmc->unregister_listener); + gsmc->unregister_listener(gsm, scl); } /* Called with rcu_read_lock held. */ @@ -2212,7 +2236,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, error_report("iommu map to non memory area %" HWADDR_PRIx "", xlat); return false; } else if (memory_region_has_ram_discard_manager(mr)) { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr); + GenericStateManager *gsm = memory_region_get_generic_state_manager(mr); MemoryRegionSection tmp = { .mr = mr, .offset_within_region = xlat, @@ -2227,7 +2251,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, * Disallow that. vmstate priorities make sure any RamDiscardManager * were already restored before IOMMUs are restored. */ - if (!ram_discard_manager_is_populated(rdm, &tmp)) { + if (!generic_state_manager_is_state_set(gsm, &tmp)) { error_report("iommu map to discarded memory (e.g., unplugged via" " virtio-mem): %" HWADDR_PRIx "", iotlb->translated_addr); @@ -3733,17 +3757,32 @@ static const TypeInfo iommu_memory_region_info = { .abstract = true, }; -static const TypeInfo ram_discard_manager_info = { +static const TypeInfo generic_state_manager_info = { .parent = TYPE_INTERFACE, + .name = TYPE_GENERIC_STATE_MANAGER, + .class_size = sizeof(GenericStateManagerClass), + .abstract = true, +}; + +static const TypeInfo ram_discard_manager_info = { + .parent = TYPE_GENERIC_STATE_MANAGER, .name = TYPE_RAM_DISCARD_MANAGER, .class_size = sizeof(RamDiscardManagerClass), }; +static const TypeInfo private_shared_manager_info = { + .parent = TYPE_GENERIC_STATE_MANAGER, + .name = TYPE_PRIVATE_SHARED_MANAGER, + .class_size = sizeof(PrivateSharedManagerClass), +}; + static void memory_register_types(void) { type_register_static(&memory_region_info); type_register_static(&iommu_memory_region_info); + type_register_static(&generic_state_manager_info); type_register_static(&ram_discard_manager_info); + type_register_static(&private_shared_manager_info); } type_init(memory_register_types) diff --git a/system/memory_mapping.c b/system/memory_mapping.c index 6f884c5b90c9f899b06d10f68595ab2bb6214362..7bd8972b55bcaaed737ba93139cbb2980dacc290 100644 --- a/system/memory_mapping.c +++ b/system/memory_mapping.c @@ -270,10 +270,8 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, /* for special sparse regions, only add populated parts */ if (memory_region_has_ram_discard_manager(section->mr)) { - RamDiscardManager *rdm; - - rdm = memory_region_get_ram_discard_manager(section->mr); - ram_discard_manager_replay_populated(rdm, section, + GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); + generic_state_manager_replay_on_state_set(gsm, section, guest_phys_ram_populate_cb, g); return; } diff --git a/target/arm/Kconfig b/target/arm/Kconfig index bf57d739cd133e81cdaa6cf26d4e4400718c659f..14977f1d83ddf4e799dd0231ca319c169fd9f3f4 100644 --- a/target/arm/Kconfig +++ b/target/arm/Kconfig @@ -9,3 +9,4 @@ config ARM config AARCH64 bool select ARM + select TPM_LOG if KVM diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c index b53d5efe13d0d1276ccfcbc9f30df57139714f48..d201d319bdd3e39af4cca9f6e01c9b1be8e89daa 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c @@ -96,6 +96,7 @@ static const char *cpu_model_advertised_features[] = { "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", "kvm-no-adjvtime", "kvm-steal-time", "pauth", "pauth-impdef", "pauth-qarma3", + "num-breakpoints", "num-watchpoints", "num-pmu-counters", NULL }; diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 09d391bd348aaf9c7e59a58d35487a1e68b540e6..3de2e1a3c30b037ed9109ab09d9488224a4f478c 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1082,6 +1082,11 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) const char *ns_status; bool sve; + if (cpu->kvm_rme) { + qemu_fprintf(f, "the CPU registers are confidential to the realm\n"); + return; + } + qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc); for (i = 0; i < 32; i++) { if (i == 31) { diff --git a/target/arm/cpu.h b/target/arm/cpu.h index a5ba7f2a2657dbe871b755f4afc2355eb57f5914..cb546a93e2e2091854fff98666ae6f44578a4553 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -976,6 +976,9 @@ struct ArchCPU { bool kvm_sve_finalized; #endif /* CONFIG_KVM */ + /* Realm Management Extension */ + bool kvm_rme; + /* Uniprocessor system with MP extensions */ bool mp_is_up; @@ -1121,6 +1124,11 @@ struct ArchCPU { /* Generic timer counter frequency, in Hz */ uint64_t gt_cntfrq_hz; + + /* Allows to override the default configuration */ + uint8_t num_bps; + uint8_t num_wps; + int8_t num_pmu_ctrs; }; typedef struct ARMCPUInfo { @@ -2470,6 +2478,8 @@ FIELD(MFAR, FPA, 12, 40) FIELD(MFAR, NSE, 62, 1) FIELD(MFAR, NS, 63, 1) +FIELD(PMCR, N, 11, 5) + QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); /* If adding a feature bit which corresponds to a Linux ELF diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 6eca55ac29ff8b2a659fafc5571b8fb058dacd52..4cf8446b6e32fad93f971ab3ced54133502e8248 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -576,6 +576,123 @@ void aarch64_add_pauth_properties(Object *obj) } } +#if defined(CONFIG_KVM) +static void arm_cpu_get_num_wps(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + + val = cpu->num_wps; + if (val == 0) { + val = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; + } + + visit_type_uint8(v, name, &val, errp); +} + +static void arm_cpu_set_num_wps(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + uint8_t max_wps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; + + if (!visit_type_uint8(v, name, &val, errp)) { + return; + } + + if (val < 2 || val > max_wps) { + error_setg(errp, "invalid number of watchpoints"); + return; + } + + cpu->num_wps = val; +} + +static void arm_cpu_get_num_bps(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + + val = cpu->num_bps; + if (val == 0) { + val = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; + } + + visit_type_uint8(v, name, &val, errp); +} + +static void arm_cpu_set_num_bps(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + uint8_t max_bps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; + + if (!visit_type_uint8(v, name, &val, errp)) { + return; + } + + if (val < 2 || val > max_bps) { + error_setg(errp, "invalid number of breakpoints"); + return; + } + + cpu->num_bps = val; +} + +static void arm_cpu_get_num_pmu_ctrs(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + + if (cpu->num_pmu_ctrs == -1) { + val = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N); + } else { + val = cpu->num_pmu_ctrs; + } + + visit_type_uint8(v, name, &val, errp); +} + +static void arm_cpu_set_num_pmu_ctrs(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + uint8_t val; + ARMCPU *cpu = ARM_CPU(obj); + uint8_t max_ctrs = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N); + + if (!visit_type_uint8(v, name, &val, errp)) { + return; + } + + if (val > max_ctrs) { + error_setg(errp, "invalid number of PMU counters"); + return; + } + + cpu->num_pmu_ctrs = val; +} + +static void aarch64_add_kvm_writable_properties(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + object_property_add(obj, "num-breakpoints", "uint8", arm_cpu_get_num_bps, + arm_cpu_set_num_bps, NULL, NULL); + object_property_add(obj, "num-watchpoints", "uint8", arm_cpu_get_num_wps, + arm_cpu_set_num_wps, NULL, NULL); + + cpu->num_pmu_ctrs = -1; + object_property_add(obj, "num-pmu-counters", "uint8", + arm_cpu_get_num_pmu_ctrs, arm_cpu_set_num_pmu_ctrs, + NULL, NULL); +} +#endif /* CONFIG_KVM */ + void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) { uint64_t t; @@ -789,6 +906,7 @@ static void aarch64_host_initfn(Object *obj) if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { aarch64_add_sve_properties(obj); aarch64_add_pauth_properties(obj); + aarch64_add_kvm_writable_properties(obj); } #elif defined(CONFIG_HVF) ARMCPU *cpu = ARM_CPU(obj); diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c new file mode 100644 index 0000000000000000000000000000000000000000..26dda39df65f82f7dd74f8a1b0dadd54c9073352 --- /dev/null +++ b/target/arm/kvm-rme.c @@ -0,0 +1,1029 @@ +/* + * QEMU Arm RME support + * + * Copyright Linaro 2024 + */ + +#include "qemu/osdep.h" + +#include "hw/boards.h" +#include "hw/core/cpu.h" +#include "hw/loader.h" +#include "hw/pci/pci.h" +#include "hw/tpm/tpm_log.h" +#include "kvm_arm.h" +#include "migration/blocker.h" +#include "qapi/error.h" +#include "qemu/base64.h" +#include "qemu/error-report.h" +#include "qemu/units.h" +#include "qom/object_interfaces.h" +#include "exec/confidential-guest-support.h" +#include "sysemu/kvm.h" +#include "sysemu/runstate.h" + +#define TYPE_RME_GUEST "rme-guest" +OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST) + +#define RME_PAGE_SIZE qemu_real_host_page_size() + +#define RME_MEASUREMENT_LOG_SIZE (64 * KiB) + +typedef struct RmeLogFiletype { + uint32_t event_type; + /* Description copied into the log event */ + const char *desc; +} RmeLogFiletype; + +/* + * Realms have a split guest-physical address space: the bottom half is private + * to the realm, and the top half is shared with the host. Within QEMU, we use a + * merged view of both halves. Most of RAM is private to the guest and not + * accessible to us, but the guest shares some pages with us. + * + * For DMA, devices generally target the shared half (top) of the guest address + * space. Only the devices trusted by the guest (using mechanisms like TDISP for + * device authentication) can access the bottom half. + * + * RealmDmaRegion performs remapping of top-half accesses to system memory. + */ +struct RealmDmaRegion { + IOMMUMemoryRegion parent_obj; +}; + +#define TYPE_REALM_DMA_REGION "realm-dma-region" +OBJECT_DECLARE_SIMPLE_TYPE(RealmDmaRegion, REALM_DMA_REGION) +OBJECT_DEFINE_SIMPLE_TYPE(RealmDmaRegion, realm_dma_region, + REALM_DMA_REGION, IOMMU_MEMORY_REGION); + +typedef struct RealmPrivateSharedListener { + MemoryRegion *mr; + hwaddr offset_within_region; + uint64_t granularity; + PrivateSharedListener listener; + QLIST_ENTRY(RealmPrivateSharedListener) rpsl_next; +} RealmPrivateSharedListener; + +typedef struct { + hwaddr base; + hwaddr size; + uint8_t *blob_ptr; + RmeLogFiletype *filetype; +} RmeRamRegion; + +struct RmeGuest { + ConfidentialGuestSupport parent_obj; + Notifier rom_load_notifier; + GSList *ram_regions; + + char *personalization_value_str; + uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE]; + RmeGuestMeasurementAlgorithm measurement_algo; + bool use_measurement_log; + + RmeRamRegion init_ram; + uint8_t ipa_bits; + size_t num_cpus; + + RealmDmaRegion *dma_region; + QLIST_HEAD(, RealmPrivateSharedListener) ram_discard_list; + MemoryListener memory_listener; + AddressSpace dma_as; + + TpmLog *log; + GHashTable *images; +}; + +OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST, + CONFIDENTIAL_GUEST_SUPPORT, + { TYPE_USER_CREATABLE }, { }) + +typedef struct { + char signature[16]; + char name[32]; + char version[40]; + uint64_t ram_size; + uint32_t num_cpus; + uint64_t flags; +} EventLogVmmVersion; + +typedef struct { + uint32_t id; + uint32_t data_size; + uint8_t data[]; +} EventLogTagged; + +#define EVENT_LOG_TAG_REALM_CREATE 1 +#define EVENT_LOG_TAG_INIT_RIPAS 2 +#define EVENT_LOG_TAG_REC_CREATE 3 + +#define REALM_PARAMS_FLAG_SVE (1 << 1) +#define REALM_PARAMS_FLAG_PMU (1 << 2) + +#define REC_CREATE_FLAG_RUNNABLE (1 << 0) + +static RmeGuest *rme_guest; + +static int rme_init_measurement_log(MachineState *ms) +{ + Object *log; + gpointer filename; + TpmLogDigestAlgo algo; + RmeLogFiletype *filetype; + + if (!rme_guest->use_measurement_log) { + return 0; + } + + switch (rme_guest->measurement_algo) { + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256: + algo = TPM_LOG_DIGEST_ALGO_SHA256; + break; + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512: + algo = TPM_LOG_DIGEST_ALGO_SHA512; + break; + default: + g_assert_not_reached(); + } + + log = object_new_with_props(TYPE_TPM_LOG, OBJECT(rme_guest), + "log", &error_fatal, + "digest-algo", TpmLogDigestAlgo_str(algo), + NULL); + + tpm_log_create(TPM_LOG(log), RME_MEASUREMENT_LOG_SIZE, &error_fatal); + rme_guest->log = TPM_LOG(log); + + /* + * Write down the image names we're expecting to encounter when handling the + * ROM load notifications, so we can record the type of image being loaded + * to help the verifier. + */ + rme_guest->images = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + g_free); + + filename = g_strdup(ms->kernel_filename); + if (filename) { + filetype = g_new0(RmeLogFiletype, 1); + filetype->event_type = TCG_EV_POST_CODE2; + filetype->desc = "KERNEL"; + g_hash_table_insert(rme_guest->images, filename, (gpointer)filetype); + } + + filename = g_strdup(ms->initrd_filename); + if (filename) { + filetype = g_new0(RmeLogFiletype, 1); + filetype->event_type = TCG_EV_POST_CODE2; + filetype->desc = "INITRD"; + g_hash_table_insert(rme_guest->images, filename, (gpointer)filetype); + } + + filename = g_strdup(ms->firmware); + if (filename) { + filetype = g_new0(RmeLogFiletype, 1); + filetype->event_type = TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2; + filetype->desc = "FIRMWARE"; + g_hash_table_insert(rme_guest->images, filename, filetype); + } + + filename = g_strdup(ms->dtb); + if (!filename) { + filename = g_strdup("dtb"); + } + filetype = g_new0(RmeLogFiletype, 1); + filetype->event_type = TCG_EV_POST_CODE2; + filetype->desc = "DTB"; + g_hash_table_insert(rme_guest->images, filename, filetype); + + return 0; +} + +static int rme_log_event_tag(uint32_t id, uint8_t *data, size_t size, + Error **errp) +{ + int ret; + EventLogTagged event = { + .id = id, + .data_size = size, + }; + GByteArray *bytes = g_byte_array_new(); + + if (!rme_guest->log) { + return 0; + } + + g_byte_array_append(bytes, (uint8_t *)&event, sizeof(event)); + g_byte_array_append(bytes, data, size); + ret = tpm_log_add_event(rme_guest->log, TCG_EV_EVENT_TAG, bytes->data, + bytes->len, NULL, 0, errp); + g_byte_array_free(bytes, true); + return ret; +} + +/* Log VM type and Realm Descriptor create */ +static int rme_log_realm_create(Error **errp) +{ + int ret; + ARMCPU *cpu; + EventLogVmmVersion vmm_version = { + .signature = "VM VERSION", + .name = "QEMU", + .version = QEMU_VERSION, + .ram_size = cpu_to_le64(rme_guest->init_ram.size), + .num_cpus = cpu_to_le32(rme_guest->num_cpus), + .flags = 0, + }; + struct { + uint64_t flags; + uint8_t s2sz; + uint8_t sve_vl; + uint8_t num_bps; + uint8_t num_wps; + uint8_t pmu_num_ctrs; + uint8_t hash_algo; + } params = { + .s2sz = rme_guest->ipa_bits, + }; + + if (!rme_guest->log) { + return 0; + } + + ret = tpm_log_add_event(rme_guest->log, TCG_EV_NO_ACTION, + (uint8_t *)&vmm_version, sizeof(vmm_version), + NULL, 0, errp); + if (ret) { + return ret; + } + + /* With KVM all CPUs have the same capability */ + cpu = ARM_CPU(first_cpu); + if (cpu->has_pmu) { + params.flags |= REALM_PARAMS_FLAG_PMU; + params.pmu_num_ctrs = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N); + } + + if (cpu->sve_max_vq) { + params.flags |= REALM_PARAMS_FLAG_SVE; + params.sve_vl = cpu->sve_max_vq - 1; + } + params.num_bps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); + params.num_wps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); + + switch (rme_guest->measurement_algo) { + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256: + params.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256; + break; + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512: + params.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512; + break; + default: + g_assert_not_reached(); + } + + return rme_log_event_tag(EVENT_LOG_TAG_REALM_CREATE, (uint8_t *)¶ms, + sizeof(params), errp); +} + +/* unmeasured images are logged with @data == NULL */ +static int rme_log_image(RmeLogFiletype *filetype, uint8_t *data, hwaddr base, + size_t size, Error **errp) +{ + int ret; + size_t desc_size; + GByteArray *event = g_byte_array_new(); + struct UefiPlatformFirmwareBlob2Head head = {0}; + struct UefiPlatformFirmwareBlob2Tail tail = {0}; + + if (!rme_guest->log) { + return 0; + } + + if (!filetype) { + error_setg(errp, "cannot log image without a filetype"); + return -1; + } + + /* EV_POST_CODE2 strings are not NUL-terminated */ + desc_size = strlen(filetype->desc); + head.blob_description_size = desc_size; + tail.blob_base = cpu_to_le64(base); + tail.blob_size = cpu_to_le64(size); + + g_byte_array_append(event, (guint8 *)&head, sizeof(head)); + g_byte_array_append(event, (guint8 *)filetype->desc, desc_size); + g_byte_array_append(event, (guint8 *)&tail, sizeof(tail)); + + ret = tpm_log_add_event(rme_guest->log, filetype->event_type, event->data, + event->len, data, size, errp); + g_byte_array_free(event, true); + return ret; +} + +static int rme_log_ripas(hwaddr base, size_t size, Error **errp) +{ + struct { + uint64_t base; + uint64_t size; + } init_ripas = { + .base = cpu_to_le64(base), + .size = cpu_to_le64(size), + }; + + return rme_log_event_tag(EVENT_LOG_TAG_INIT_RIPAS, (uint8_t *)&init_ripas, + sizeof(init_ripas), errp); +} + +static int rme_log_rec(uint64_t flags, uint64_t pc, uint64_t gprs[8], Error **errp) +{ + struct { + uint64_t flags; + uint64_t pc; + uint64_t gprs[8]; + } rec_create = { + .flags = cpu_to_le64(flags), + .pc = cpu_to_le64(pc), + .gprs[0] = cpu_to_le64(gprs[0]), + .gprs[1] = cpu_to_le64(gprs[1]), + .gprs[2] = cpu_to_le64(gprs[2]), + .gprs[3] = cpu_to_le64(gprs[3]), + .gprs[4] = cpu_to_le64(gprs[4]), + .gprs[5] = cpu_to_le64(gprs[5]), + .gprs[6] = cpu_to_le64(gprs[6]), + .gprs[7] = cpu_to_le64(gprs[7]), + }; + + return rme_log_event_tag(EVENT_LOG_TAG_REC_CREATE, (uint8_t *)&rec_create, + sizeof(rec_create), errp); +} + +static int rme_populate_range(hwaddr base, size_t size, bool measure, + Error **errp); + +static int rme_close_measurement_log(Error **errp) +{ + int ret; + hwaddr base; + size_t size; + RmeLogFiletype filetype = { + .event_type = TCG_EV_POST_CODE2, + .desc = "LOG", + }; + + if (!rme_guest->log) { + return 0; + } + + base = object_property_get_uint(OBJECT(rme_guest->log), "load-addr", errp); + if (*errp) { + return -1; + } + + size = object_property_get_uint(OBJECT(rme_guest->log), "max-size", errp); + if (*errp) { + return -1; + } + + /* Log the log itself */ + ret = rme_log_image(&filetype, NULL, base, size, errp); + if (ret) { + return ret; + } + + ret = tpm_log_write_and_close(rme_guest->log, errp); + if (ret) { + return ret; + } + + ret = rme_populate_range(base, size, /* measure */ false, errp); + if (ret) { + return ret; + } + + g_hash_table_destroy(rme_guest->images); + + /* The log is now in the guest. Free this object */ + object_unparent(OBJECT(rme_guest->log)); + rme_guest->log = NULL; + return 0; +} + +static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp) +{ + int ret; + const char *cfg_str; + struct arm_rme_config args = { + .cfg = cfg, + }; + + switch (cfg) { + case ARM_RME_CONFIG_RPV: + memcpy(args.rpv, guest->personalization_value, ARM_RME_CONFIG_RPV_SIZE); + cfg_str = "personalization value"; + break; + case ARM_RME_CONFIG_HASH_ALGO: + switch (guest->measurement_algo) { + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256: + args.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256; + break; + case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512: + args.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512; + break; + default: + g_assert_not_reached(); + } + cfg_str = "hash algorithm"; + break; + default: + g_assert_not_reached(); + } + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_CONFIG_REALM, (intptr_t)&args); + if (ret) { + error_setg_errno(errp, -ret, "failed to configure %s", cfg_str); + } + return ret; +} + +static int rme_configure(Error **errp) +{ + int ret; + size_t option; + const uint32_t config_options[] = { + ARM_RME_CONFIG_RPV, + ARM_RME_CONFIG_HASH_ALGO, + }; + + for (option = 0; option < ARRAY_SIZE(config_options); option++) { + ret = rme_configure_one(rme_guest, config_options[option], errp); + if (ret) { + return ret; + } + } + return 0; +} + +static int rme_init_ram(RmeRamRegion *ram, Error **errp) +{ + int ret; + hwaddr start = QEMU_ALIGN_DOWN(ram->base, RME_PAGE_SIZE); + hwaddr end = QEMU_ALIGN_UP(ram->base + ram->size, RME_PAGE_SIZE); + struct arm_rme_init_ripas init_args = { + .base = start, + .size = end - start, + }; + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_INIT_RIPAS_REALM, + (intptr_t)&init_args); + if (ret) { + error_setg_errno(errp, -ret, + "failed to init RAM [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")", + start, end); + return ret; + } + + return rme_log_ripas(ram->base, ram->size, errp); +} + +static int rme_populate_range(hwaddr base, size_t size, bool measure, + Error **errp) +{ + int ret; + hwaddr start = QEMU_ALIGN_DOWN(base, RME_PAGE_SIZE); + hwaddr end = QEMU_ALIGN_UP(base + size, RME_PAGE_SIZE); + struct arm_rme_populate_realm populate_args = { + .base = start, + .size = end - start, + .flags = measure ? KVM_ARM_RME_POPULATE_FLAGS_MEASURE : 0, + }; + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_POPULATE_REALM, + (intptr_t)&populate_args); + if (ret) { + error_setg_errno(errp, -ret, + "failed to populate realm [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")", + start, end); + } + return ret; +} + +static void rme_populate_ram_region(gpointer data, gpointer err) +{ + Error **errp = err; + const RmeRamRegion *region = data; + + if (*errp) { + return; + } + + rme_populate_range(region->base, region->size, /* measure */ true, errp); + if (*errp) { + return; + } + + rme_log_image(region->filetype, region->blob_ptr, region->base, + region->size, errp); +} + +static int rme_init_cpus(Error **errp) +{ + int ret; + CPUState *cs; + bool logged_primary_cpu = false; + + /* + * Now that do_cpu_reset() initialized the boot PC and + * kvm_cpu_synchronize_post_reset() registered it, we can finalize the REC. + */ + CPU_FOREACH(cs) { + ARMCPU *cpu = ARM_CPU(cs); + + ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_REC); + if (ret) { + error_setg_errno(errp, -ret, "failed to finalize vCPU"); + return ret; + } + + if (!logged_primary_cpu) { + ret = rme_log_rec(REC_CREATE_FLAG_RUNNABLE, cpu->env.pc, + cpu->env.xregs, errp); + if (ret) { + return ret; + } + + logged_primary_cpu = true; + } + } + return 0; +} + +static int rme_create_realm(Error **errp) +{ + int ret; + + if (rme_configure(errp)) { + return -1; + } + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_CREATE_REALM); + if (ret) { + error_setg_errno(errp, -ret, "failed to create Realm Descriptor"); + return -1; + } + + if (rme_log_realm_create(errp)) { + return -1; + } + + if (rme_init_ram(&rme_guest->init_ram, errp)) { + return -1; + } + + g_slist_foreach(rme_guest->ram_regions, rme_populate_ram_region, errp); + g_slist_free_full(g_steal_pointer(&rme_guest->ram_regions), g_free); + if (*errp) { + return -1; + } + + if (rme_init_cpus(errp)) { + return -1; + } + + if (rme_close_measurement_log(errp)) { + return -1; + } + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, + KVM_CAP_ARM_RME_ACTIVATE_REALM); + if (ret) { + error_setg_errno(errp, -ret, "failed to activate realm"); + return -1; + } + + kvm_mark_guest_state_protected(); + return 0; +} + +static void rme_vm_state_change(void *opaque, bool running, RunState state) +{ + Error *err = NULL; + + if (!running) { + return; + } + + if (rme_create_realm(&err)) { + error_propagate_prepend(&error_fatal, err, "RME: "); + } +} + +static char *rme_get_rpv(Object *obj, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + return g_strdup(guest->personalization_value_str); +} + +static void rme_set_rpv(Object *obj, const char *value, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + g_autofree uint8_t *rpv = NULL; + size_t len; + + rpv = qbase64_decode(value, -1, &len, errp); + if (!rpv) { + return; + } + + if (len != sizeof(guest->personalization_value)) { + error_setg(errp, + "expecting a Realm Personalization Value of size %zu, got %zu\n", + sizeof(guest->personalization_value), len); + return; + } + memcpy(guest->personalization_value, rpv, len); + + /* Save the value so we don't need to encode it in the getter */ + g_free(guest->personalization_value_str); + guest->personalization_value_str = g_strdup(value); +} + +static int rme_get_measurement_algo(Object *obj, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + return guest->measurement_algo; +} + +static void rme_set_measurement_algo(Object *obj, int algo, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + guest->measurement_algo = algo; +} + +static bool rme_get_measurement_log(Object *obj, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + return guest->use_measurement_log; +} + +static void rme_set_measurement_log(Object *obj, bool value, Error **errp) +{ + RmeGuest *guest = RME_GUEST(obj); + + guest->use_measurement_log = value; +} + +static void rme_guest_class_init(ObjectClass *oc, void *data) +{ + object_class_property_add_str(oc, "personalization-value", rme_get_rpv, + rme_set_rpv); + object_class_property_set_description(oc, "personalization-value", + "Realm personalization value (64 bytes encodede in base64)"); + + object_class_property_add_enum(oc, "measurement-algorithm", + "RmeGuestMeasurementAlgorithm", + &RmeGuestMeasurementAlgorithm_lookup, + rme_get_measurement_algo, + rme_set_measurement_algo); + object_class_property_set_description(oc, "measurement-algorithm", + "Realm measurement algorithm ('sha256', 'sha512')"); + + object_class_property_add_bool(oc, "measurement-log", + rme_get_measurement_log, + rme_set_measurement_log); + object_class_property_set_description(oc, "measurement-log", + "Enable/disable Realm measurement log"); +} + +static void rme_guest_init(Object *obj) +{ + if (rme_guest) { + error_report("a single instance of RmeGuest is supported"); + exit(1); + } + rme_guest = RME_GUEST(obj); + rme_guest->measurement_algo = RME_GUEST_MEASUREMENT_ALGORITHM_SHA512; +} + +static void rme_guest_finalize(Object *obj) +{ + memory_listener_unregister(&rme_guest->memory_listener); +} + +static gint rme_compare_ram_regions(gconstpointer a, gconstpointer b) +{ + const RmeRamRegion *ra = a; + const RmeRamRegion *rb = b; + + g_assert(ra->base != rb->base); + return ra->base < rb->base ? -1 : 1; +} + +static void rme_rom_load_notify(Notifier *notifier, void *data) +{ + RmeRamRegion *region; + RomLoaderNotifyData *rom = data; + + if (rom->addr == -1) { + /* + * These blobs (ACPI tables) are not loaded into guest RAM at reset. + * Instead the firmware will load them via fw_cfg and measure them + * itself. + */ + return; + } + + region = g_new0(RmeRamRegion, 1); + region->base = rom->addr; + region->size = rom->len; + /* + * TODO: double-check lifetime. Is data is still available when we measure + * it, while writing the log. Should be fine since data is kept for the next + * reset. + */ + region->blob_ptr = rom->blob_ptr; + + /* + * rme_guest->images is destroyed after ram_regions, so we can store + * filetype even if we don't own the struct. + */ + if (rme_guest->images) { + region->filetype = g_hash_table_lookup(rme_guest->images, rom->name); + } + + /* + * The Realm Initial Measurement (RIM) depends on the order in which we + * initialize and populate the RAM regions. To help a verifier + * independently calculate the RIM, sort regions by GPA. + */ + rme_guest->ram_regions = g_slist_insert_sorted(rme_guest->ram_regions, + region, + rme_compare_ram_regions); +} + +int kvm_arm_rme_init(MachineState *ms) +{ + static Error *rme_mig_blocker; + ConfidentialGuestSupport *cgs = ms->cgs; + + if (!rme_guest) { + return 0; + } + + if (!cgs) { + error_report("missing -machine confidential-guest-support parameter"); + return -EINVAL; + } + + if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_RME)) { + return -ENODEV; + } + + if (rme_init_measurement_log(ms)) { + return -ENODEV; + } + + rme_guest->num_cpus = ms->smp.max_cpus; + + error_setg(&rme_mig_blocker, "RME: migration is not implemented"); + migrate_add_blocker(&rme_mig_blocker, &error_fatal); + + /* + * The realm activation is done last, when the VM starts, after all images + * have been loaded and all vcpus finalized. + */ + qemu_add_vm_change_state_handler(rme_vm_state_change, NULL); + + rme_guest->rom_load_notifier.notify = rme_rom_load_notify; + rom_add_load_notifier(&rme_guest->rom_load_notifier); + + cgs->ready = true; + return 0; +} + +void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size) +{ + if (rme_guest) { + rme_guest->init_ram.base = base; + rme_guest->init_ram.size = size; + } +} + +int kvm_arm_rme_vcpu_init(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + + if (rme_guest) { + cpu->kvm_rme = true; + cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_REC); + } + return 0; +} + +int kvm_arm_rme_vm_type(MachineState *ms) +{ + if (rme_guest) { + return KVM_VM_TYPE_ARM_REALM; + } + return 0; +} + +static int rme_ram_discard_notify(StateChangeListener *scl, + MemoryRegionSection *section, + bool populate) +{ + hwaddr gpa, next; + IOMMUTLBEvent event; + const hwaddr end = section->offset_within_address_space + + int128_get64(section->size); + const hwaddr address_mask = MAKE_64BIT_MASK(0, rme_guest->ipa_bits - 1); + PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl); + RealmPrivateSharedListener *rpsl = container_of(psl, RealmPrivateSharedListener, + listener); + + assert(rme_guest->dma_region != NULL); + + event.type = populate ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; + event.entry.target_as = &address_space_memory; + event.entry.perm = populate ? IOMMU_RW : IOMMU_NONE; + event.entry.addr_mask = rpsl->granularity - 1; + + assert(end <= address_mask); + + /* + * Create IOMMU mappings from the top half of the address space to the RAM + * region. + */ + for (gpa = section->offset_within_address_space; gpa < end; gpa = next) { + event.entry.iova = gpa + address_mask + 1; + event.entry.translated_addr = gpa; + memory_region_notify_iommu(IOMMU_MEMORY_REGION(rme_guest->dma_region), + 0, event); + + next = ROUND_UP(gpa + 1, rpsl->granularity); + next = MIN(next, end); + } + + return 0; +} + +static int rme_ram_discard_notify_populate(StateChangeListener *scl, + MemoryRegionSection *section) +{ + return rme_ram_discard_notify(scl, section, /* populate */ true); +} + +static int rme_ram_discard_notify_discard(StateChangeListener *scl, + MemoryRegionSection *section) +{ + return rme_ram_discard_notify(scl, section, /* populate */ false); +} + +/* Install a RAM discard listener */ +static void rme_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + RealmPrivateSharedListener *rpsl; + GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); + + + if (!gsm) { + return; + } + + rpsl = g_new0(RealmPrivateSharedListener, 1); + rpsl->mr = section->mr; + rpsl->offset_within_region = section->offset_within_region; + rpsl->granularity = generic_state_manager_get_min_granularity(gsm, + section->mr); + QLIST_INSERT_HEAD(&rme_guest->ram_discard_list, rpsl, rpsl_next); + + private_shared_listener_init(&rpsl->listener, + rme_ram_discard_notify_populate, + rme_ram_discard_notify_discard, true); + generic_state_manager_register_listener(gsm, &rpsl->listener.scl, section); +} + +static void rme_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + RealmPrivateSharedListener *rpsl; + GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr); + + if (!gsm) { + return; + } + + QLIST_FOREACH(rpsl, &rme_guest->ram_discard_list, rpsl_next) { + if (MEMORY_REGION(rpsl->mr) == section->mr && + rpsl->offset_within_region == section->offset_within_region) { + generic_state_manager_unregister_listener(gsm, &rpsl->listener.scl); + g_free(rpsl); + break; + } + } +} + +static AddressSpace *rme_dma_get_address_space(PCIBus *bus, void *opaque, + int devfn) +{ + return &rme_guest->dma_as; +} + +static const PCIIOMMUOps rme_dma_ops = { + .get_address_space = rme_dma_get_address_space, +}; + +void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus) +{ + RealmDmaRegion *dma_region; + const unsigned int ipa_bits = 64 - clz64(highest_gpa) + 1; + + if (!rme_guest) { + return; + } + + assert(ipa_bits < 64); + + /* + * Setup a DMA translation from the shared top half of the guest-physical + * address space to our merged view of RAM. + */ + dma_region = g_new0(RealmDmaRegion, 1); + + memory_region_init_iommu(dma_region, sizeof(*dma_region), + TYPE_REALM_DMA_REGION, OBJECT(rme_guest), + "realm-dma-region", 1ULL << ipa_bits); + address_space_init(&rme_guest->dma_as, MEMORY_REGION(dma_region), + TYPE_REALM_DMA_REGION); + rme_guest->dma_region = dma_region; + + pci_setup_iommu(pci_bus, &rme_dma_ops, NULL); + + /* + * Install notifiers to forward RAM discard changes to the IOMMU notifiers + * (ie. tell VFIO to map shared pages and unmap private ones). + */ + rme_guest->memory_listener = (MemoryListener) { + .name = "rme", + .region_add = rme_listener_region_add, + .region_del = rme_listener_region_del, + }; + memory_listener_register(&rme_guest->memory_listener, + &address_space_memory); + + rme_guest->ipa_bits = ipa_bits; +} + +static void realm_dma_region_init(Object *obj) +{ +} + +static IOMMUTLBEntry realm_dma_region_translate(IOMMUMemoryRegion *mr, + hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + const hwaddr address_mask = MAKE_64BIT_MASK(0, rme_guest->ipa_bits - 1); + IOMMUTLBEntry entry = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = addr & address_mask, + .addr_mask = address_mask, + .perm = IOMMU_RW, + }; + + return entry; +} + +static void realm_dma_region_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) +{ + /* Nothing is shared at boot */ +} + +static void realm_dma_region_finalize(Object *obj) +{ +} + +static void realm_dma_region_class_init(ObjectClass *oc, void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(oc); + + imrc->translate = realm_dma_region_translate; + imrc->replay = realm_dma_region_replay; +} + +Object *kvm_arm_rme_get_measurement_log(void) +{ + if (rme_guest && rme_guest->log) { + return OBJECT(rme_guest->log); + } + return NULL; +} diff --git a/target/arm/kvm-tmm.c b/target/arm/kvm-tmm.c index d18ac1089647fbf035a896c469e6d2901dda3ce7..d6dc8342c4deafb630024a4fce7a11c8808b3461 100644 --- a/target/arm/kvm-tmm.c +++ b/target/arm/kvm-tmm.c @@ -118,7 +118,7 @@ static int tmm_configure_one(TmmGuest *guest, uint32_t cfg, Error **errp) g_assert_not_reached(); } - ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, KVM_CAP_ARM_TMM_CONFIG_CVM, (intptr_t)&args); if (ret) { error_setg_errno(errp, -ret, "TMM: failed to configure %s", cfg_str); @@ -167,7 +167,7 @@ static void tmm_populate_region(gpointer data, gpointer unused) return; } - ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, KVM_CAP_ARM_TMM_POPULATE_CVM, (intptr_t)&populate_args); if (ret) { @@ -179,7 +179,7 @@ static void tmm_populate_region(gpointer data, gpointer unused) static int tmm_create_rd(Error **errp) { - int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, + int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, KVM_CAP_ARM_TMM_CREATE_RD); if (ret) { error_setg_errno(errp, -ret, "TMM: failed to create tmm Descriptor"); @@ -200,14 +200,14 @@ static void tmm_vm_state_change(void *opaque, bool running, RunState state) g_slist_free_full(g_steal_pointer(&tmm_guest->ram_regions), g_free); CPU_FOREACH(cs) { - ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_TEC); + ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_REC); if (ret) { error_report("TMM: failed to finalize vCPU: %s", strerror(-ret)); exit(1); } } - ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0, + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0, KVM_CAP_ARM_TMM_ACTIVATE_CVM); if (ret) { error_report("TMM: failed to activate cvm: %s", strerror(-ret)); @@ -224,7 +224,7 @@ int kvm_arm_tmm_init(ConfidentialGuestSupport *cgs, Error **errp) return -ENODEV; } - if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_TMM)) { + if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_RME)) { error_setg(errp, "KVM does not support TMM"); return -ENODEV; } diff --git a/target/arm/kvm.c b/target/arm/kvm.c index ab31515a2af6500374ec9676117c162e13692365..f45783a9dafe76c81e3411fcf35d7b455db3d1e4 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -73,6 +73,7 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, { int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1; int max_vm_pa_size; + int vm_type; kvmfd = qemu_open_old("/dev/kvm", O_RDWR); if (kvmfd < 0) { @@ -82,8 +83,9 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, if (max_vm_pa_size < 0) { max_vm_pa_size = 0; } + vm_type = kvm_arm_rme_vm_type(MACHINE(qdev_get_machine())); do { - vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); + vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size | vm_type); } while (vmfd == -1 && errno == EINTR); if (vmfd < 0) { goto err; @@ -276,7 +278,7 @@ static void kvm_update_ipiv_cap(KVMState *s) int kvm_arch_init(MachineState *ms, KVMState *s) { MachineClass *mc = MACHINE_GET_CLASS(ms); - int ret = 0; + int ret; /* For ARM interrupt delivery is always asynchronous, * whether we are using an in-kernel VGIC or not. @@ -295,7 +297,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) { error_report("Using more than 256 vcpus requires a host kernel " "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2"); - ret = -EINVAL; + return -EINVAL; } if (kvm_check_extension(s, KVM_CAP_ARM_NISV_TO_USER)) { @@ -317,13 +319,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s) warn_report("Eager Page Split support not available"); } else if (!(s->kvm_eager_split_size & sizes)) { error_report("Eager Page Split requested chunk size not valid"); - ret = -EINVAL; + return -EINVAL; } else { ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0, s->kvm_eager_split_size); if (ret < 0) { error_report("Enabling of Eager Page Split failed: %s", strerror(-ret)); + return ret; } } } @@ -348,6 +351,11 @@ int kvm_arch_init(MachineState *ms, KVMState *s) kvm_arm_init_debug(s); kvm_update_ipiv_cap(s); + ret = kvm_arm_rme_init(ms); + if (ret) { + error_report("Failed to enable RME: %s", strerror(-ret)); + } + return ret; } @@ -673,6 +681,86 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu) } } +static void kvm_arm_configure_aa64dfr0(ARMCPU *cpu) +{ + int ret; + uint64_t val, newval; + CPUState *cs = CPU(cpu); + + if (!cpu->num_bps && !cpu->num_wps) { + return; + } + + newval = cpu->isar.id_aa64dfr0; + if (cpu->num_bps) { + uint64_t ctx_cmps = FIELD_EX64(newval, ID_AA64DFR0, CTX_CMPS); + + /* CTX_CMPs is never greater than BRPs */ + ctx_cmps = MIN(ctx_cmps, cpu->num_bps - 1); + newval = FIELD_DP64(newval, ID_AA64DFR0, BRPS, cpu->num_bps - 1); + newval = FIELD_DP64(newval, ID_AA64DFR0, CTX_CMPS, ctx_cmps); + } + if (cpu->num_wps) { + newval = FIELD_DP64(newval, ID_AA64DFR0, WRPS, cpu->num_wps - 1); + } + ret = kvm_set_one_reg(cs, KVM_REG_ARM_ID_AA64DFR0_EL1, &newval); + if (ret) { + error_report("Failed to set KVM_REG_ARM_ID_AA64DFR0_EL1"); + return; + } + + /* + * Check if the write succeeded. KVM does offer the writable mask for this + * register, but this way we also check if the value we wrote was sane. + */ + ret = kvm_get_one_reg(cs, KVM_REG_ARM_ID_AA64DFR0_EL1, &val); + if (ret) { + error_report("Failed to get KVM_REG_ARM_ID_AA64DFR0_EL1"); + return; + } + + if (val != newval) { + error_report("Failed to update KVM_REG_ARM_ID_AA64DFR0_EL1"); + } +} + +static void kvm_arm_configure_pmcr(ARMCPU *cpu) +{ + int ret; + uint64_t val, newval; + CPUState *cs = CPU(cpu); + + if (cpu->num_pmu_ctrs == -1) { + return; + } + + newval = FIELD_DP64(cpu->isar.reset_pmcr_el0, PMCR, N, cpu->num_pmu_ctrs); + ret = kvm_set_one_reg(cs, KVM_REG_ARM_PMCR_EL0, &newval); + if (ret) { + error_report("Failed to set KVM_REG_ARM_PMCR_EL0"); + return; + } + + /* + * Check if the write succeeded, since older versions of KVM ignore it. + */ + ret = kvm_get_one_reg(cs, KVM_REG_ARM_PMCR_EL0, &val); + if (ret) { + error_report("Failed to get KVM_REG_ARM_PMCR_EL0"); + return; + } + + if (val != newval) { + error_report("Failed to update KVM_REG_ARM_PMCR_EL0"); + } +} + +static void kvm_arm_configure_vcpu_regs(ARMCPU *cpu) +{ + kvm_arm_configure_aa64dfr0(cpu); + kvm_arm_configure_pmcr(cpu); +} + void kvm_arm_reset_vcpu(ARMCPU *cpu) { int ret; @@ -686,6 +774,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); abort(); } + + /* + * Before loading the KVM values into CPUState, update the KVM configuration + */ + kvm_arm_configure_vcpu_regs(cpu); + if (!write_kvmstate_to_list(cpu)) { fprintf(stderr, "write_kvmstate_to_list failed\n"); abort(); diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index b099287ed0645d466604456236171c6b1dc3dd52..6a8aad0f066ae17a8b6b5e37ff15c69c3759ebcf 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -39,11 +39,11 @@ void kvm_arm_init_debug(KVMState *s) have_guest_debug = kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG); - max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS); + max_hw_wps = kvm_vm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS); hw_watchpoints = g_array_sized_new(true, true, sizeof(HWWatchpoint), max_hw_wps); - max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS); + max_hw_bps = kvm_vm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS); hw_breakpoints = g_array_sized_new(true, true, sizeof(HWBreakpoint), max_hw_bps); return; @@ -338,7 +338,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0, ARM64_SYS_REG(3, 0, 0, 4, 5)); err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, - ARM64_SYS_REG(3, 0, 0, 5, 0)); + KVM_REG_ARM_ID_AA64DFR0_EL1); err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, ARM64_SYS_REG(3, 0, 0, 5, 1)); err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, @@ -438,7 +438,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) if (pmu_supported) { /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */ err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0, - ARM64_SYS_REG(3, 3, 9, 12, 0)); + KVM_REG_ARM_PMCR_EL0); } if (sve_supported) { @@ -513,12 +513,12 @@ bool kvm_arm_aarch32_supported(void) bool kvm_arm_sve_supported(void) { - return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); + return kvm_vm_check_extension(kvm_state, KVM_CAP_ARM_SVE); } bool kvm_arm_steal_time_supported(void) { - return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); + return kvm_vm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); } QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); @@ -646,6 +646,11 @@ int kvm_arch_init_vcpu(CPUState *cs) 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); } + ret = kvm_arm_rme_vcpu_init(cs); + if (ret) { + return ret; + } + /* Do KVM_ARM_VCPU_INIT ioctl */ ret = kvm_arm_vcpu_init(cs); if (ret) { @@ -838,7 +843,30 @@ static int kvm_arch_put_sve(CPUState *cs) return 0; } -int kvm_arch_put_registers(CPUState *cs, int level) +static int kvm_arm_rme_put_core_regs(CPUState *cs) +{ + int i, ret; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + /* The RME ABI only allows us to set 8 GPRs and the PC */ + for (i = 0; i < 8; i++) { + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); + if (ret) { + return ret; + } + } + + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); + if (ret) { + return ret; + } + + return 0; +} + +static int kvm_arm_put_core_regs(CPUState *cs, int level) { uint64_t val; uint32_t fpr; @@ -848,6 +876,10 @@ int kvm_arch_put_registers(CPUState *cs, int level) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; + if (cpu->kvm_rme) { + return kvm_arm_rme_put_core_regs(cs); + } + /* If we are in AArch32 mode then we need to copy the AArch32 regs to the * AArch64 registers before pushing them out to 64-bit KVM. */ @@ -941,6 +973,19 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + return 0; +} + +int kvm_arch_put_registers(CPUState *cs, int level) +{ + int ret; + ARMCPU *cpu = ARM_CPU(cs); + + ret = kvm_arm_put_core_regs(cs, level); + if (ret) { + return ret; + } + write_cpustate_to_list(cpu, true); if (!write_list_to_kvmstate(cpu, level)) { @@ -1024,7 +1069,24 @@ static int kvm_arch_get_sve(CPUState *cs) return 0; } -int kvm_arch_get_registers(CPUState *cs) +static int kvm_arm_rme_get_core_regs(CPUState *cs) +{ + int i, ret; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + for (i = 0; i < 8; i++) { + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); + if (ret) { + return ret; + } + } + + return 0; +} + +static int kvm_arm_get_core_regs(CPUState *cs) { uint64_t val; unsigned int el; @@ -1034,6 +1096,10 @@ int kvm_arch_get_registers(CPUState *cs) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; + if (cpu->kvm_rme) { + return kvm_arm_rme_get_core_regs(cs); + } + for (i = 0; i < 31; i++) { ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), &env->xregs[i]); @@ -1127,6 +1193,19 @@ int kvm_arch_get_registers(CPUState *cs) } vfp_set_fpcr(env, fpr); + return 0; +} + +int kvm_arch_get_registers(CPUState *cs) +{ + int ret; + ARMCPU *cpu = ARM_CPU(cs); + + ret = kvm_arm_get_core_regs(cs); + if (ret) { + return ret; + } + ret = kvm_get_vcpu_events(cpu); if (ret) { return ret; diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index a29d4548f4ce39a767ac59c694beda3ed7b61506..8e9b2039c4dcad02c88ba1b6807b0c83aadb4d4d 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -18,6 +18,9 @@ #define KVM_ARM_VGIC_V2 (1 << 0) #define KVM_ARM_VGIC_V3 (1 << 1) +#define KVM_REG_ARM_ID_AA64DFR0_EL1 ARM64_SYS_REG(3, 0, 0, 5, 0) +#define KVM_REG_ARM_PMCR_EL0 ARM64_SYS_REG(3, 3, 9, 12, 0) + /** * kvm_arm_init_debug() - initialize guest debug capabilities * @s: KVMState @@ -38,20 +41,6 @@ void kvm_arm_init_debug(KVMState *s); */ int kvm_arm_vcpu_init(CPUState *cs); -/** - * kvm_arm_vcpu_finalize: - * @cs: CPUState - * @feature: feature to finalize - * - * Finalizes the configuration of the specified VCPU feature by - * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring - * this are documented in the "KVM_ARM_VCPU_FINALIZE" section of - * KVM's API documentation. - * - * Returns: 0 if success else < 0 error code - */ -int kvm_arm_vcpu_finalize(CPUState *cs, int feature); - /** * kvm_arm_register_device: * @mr: memory region for this device @@ -285,6 +274,14 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); */ void kvm_arm_add_vcpu_properties(Object *obj); +/** + * @cs: CPUState + * @feature: a KVM_ARM_VCPU_* feature + * + * Finalize the configuration of the given vcpu feature. + */ +int kvm_arm_vcpu_finalize(CPUState *cs, int feature); + /** * kvm_arm_steal_time_finalize: * @cpu: ARMCPU for which to finalize kvm-steal-time @@ -408,6 +405,62 @@ bool kvm_arm_tmm_enabled(void); */ int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction); +/** + * kvm_arm_rme_init + * @ms: the machine state + * + * Prepare the machine to be a Realm, if the user enabled it. + */ +int kvm_arm_rme_init(MachineState *ms); + +/** + * kvm_arm_rme_vm_type + * @ms: the machine state + * + * Returns the Realm KVM VM type if the user requested a Realm, 0 otherwise. + */ +int kvm_arm_rme_vm_type(MachineState *ms); + +/** + * kvm_arm_rme_vcpu_init + * @cs: the CPU + * + * If the user requested a Realm, setup the given vCPU accordingly. Realm vCPUs + * behave a little differently, for example most of their register state is + * hidden from the host. + */ +int kvm_arm_rme_vcpu_init(CPUState *cs); + +/* + * kvm_arm_rme_init_guest_ram + * @base: base address of RAM + * @size: size of RAM + * + * If the user requested a Realm, set the base and size of guest RAM, in order + * to initialize the Realm IPA space. + */ +void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size); + +/** + * kvm_arm_rme_setup_gpa + * @highest_gpa: highest address of the lower half of the guest address space + * @pci_bus: The main PCI bus, for which PCI queries DMA address spaces + * + * Setup the guest-physical address space for a Realm. Install a memory region + * and notifier to manage the shared upper half of the address space. + */ +void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus); + +/** + * kvm_arm_rme_get_measurement_log + * + * Obtain the measurement log object if enabled, in order to get its size and + * set its base address. + * + * Returns NULL if measurement log is disabled. + */ +Object *kvm_arm_rme_get_measurement_log(void); + #else /* @@ -434,6 +487,20 @@ static inline bool kvm_arm_steal_time_supported(void) return false; } +static inline void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size) +{ +} + +static inline void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, + PCIBus *pci_bus) +{ +} + +static inline Object *kvm_arm_rme_get_measurement_log(void) +{ + return NULL; +} + /* * These functions should never actually be called without KVM support. */ @@ -447,6 +514,11 @@ static inline void kvm_arm_add_vcpu_properties(Object *obj) g_assert_not_reached(); } +static inline int kvm_arm_vcpu_finalize(CPUState *cs, int feature) +{ + g_assert_not_reached(); +} + static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) { g_assert_not_reached(); @@ -512,6 +584,22 @@ static inline int tmm_get_kae_num(void) { g_assert_not_reached(); } + +static inline int kvm_arm_rme_init(MachineState *ms) +{ + g_assert_not_reached(); +} + +static inline int kvm_arm_rme_vm_type(MachineState *ms) +{ + g_assert_not_reached(); +} + +static inline int kvm_arm_rme_vcpu_init(CPUState *cs) +{ + g_assert_not_reached(); +} + #endif /** diff --git a/target/arm/meson.build b/target/arm/meson.build index 389ee5465882e31718e27dedd878036eeae2ea8a..7973b35ccacf6097b528e2f501cfba416053c1d4 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -8,7 +8,7 @@ arm_ss.add(files( )) arm_ss.add(zlib) -arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c', 'kvm64.c'), if_false: files('kvm-stub.c')) +arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c', 'kvm64.c', 'kvm-rme.c'), if_false: files('kvm-stub.c')) arm_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c')) arm_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c', 'kvm64.c', 'kvm-tmm.c'), if_false: files('kvm-stub.c')) diff --git a/target/i386/sev.c b/target/i386/sev.c index b4b42fd71650c05751983cd87ccf905abd9ea1bb..8c1f4d653ef4b8ec3bd792adabfd9cea98cb2f82 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -936,6 +936,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) if (ret) { exit(1); } + kvm_mark_guest_state_protected(); } /* query the measurement blob length */